MFC r282212:

Microsoft vmbus, storage and other related driver enhancements for HyperV. - Vmbus multi channel support. - Vector interrupt support. - Signal optimization. - Storvsc driver performance improvement. - Scatter and gather support for storvsc driver. - Minor bug fix for KVP driver. Thanks royger, jhb and delphij from FreeBSD community for the reviews and comments. Also thanks Hovy Xu from NetApp for the contributions to the storvsc driver. PR: 195238 Submitted by: whu Reviewed by: royger Approved by: royger Relnotes: yes Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D2575
author: whu <whu@FreeBSD.org> 2015-05-22 09:03:55 +0000
committer: whu <whu@FreeBSD.org> 2015-05-22 09:03:55 +0000
commit: 30cd3b9808be2ed4002993166f0790b8f07d95d8 (patch)
tree: f2a61a02982282e7de44dcf271ef00cd723d44c5 /sys/dev/hyperv/vmbus/hv_channel_mgmt.c
parent: b453b295750133b89170fcb27025f932be66ad18 (diff)
download: FreeBSD-src-30cd3b9808be2ed4002993166f0790b8f07d95d8.zip
FreeBSD-src-30cd3b9808be2ed4002993166f0790b8f07d95d8.tar.gz
1 files changed, 227 insertions, 41 deletions
diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
index 011e305..d13ece5 100644
--- a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
+++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
@@ -26,6 +26,9 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <sys/param.h>
 #include <sys/mbuf.h>
 
@@ -50,6 +53,8 @@ static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_process_offer(void *context);
+struct hv_vmbus_channel*
+    vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
 
 /**
  * Channel message dispatch table
@@ -233,6 +238,9 @@ hv_vmbus_allocate_channel(void)
 	    return (NULL);
 
 	mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
+	mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
+
+	TAILQ_INIT(&channel->sc_list_anchor);
 
 	channel->control_work_queue = hv_work_queue_create("control");
 
@@ -262,6 +270,7 @@ ReleaseVmbusChannel(void *context)
 void
 hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
 {
+	mtx_destroy(&channel->sc_lock);
 	mtx_destroy(&channel->inbound_lock);
 	/*
 	 * We have to release the channel's workqueue/thread in
@@ -279,10 +288,10 @@ hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
 static void
 vmbus_channel_process_offer(void *context)
 {
-	int			ret;
 	hv_vmbus_channel*	new_channel;
 	boolean_t		f_new;
 	hv_vmbus_channel*	channel;
+	int			ret;
 
 	new_channel = (hv_vmbus_channel*) context;
 	f_new = TRUE;
@@ -291,38 +300,76 @@ vmbus_channel_process_offer(void *context)
 	/*
 	 * Make sure this is a new offer
 	 */
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 
 	TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
 	    list_entry)
 	{
-	    if (!memcmp(
-		&channel->offer_msg.offer.interface_type,
-		&new_channel->offer_msg.offer.interface_type,
-		sizeof(hv_guid))
-		&& !memcmp(
-		    &channel->offer_msg.offer.interface_instance,
+		if (memcmp(&channel->offer_msg.offer.interface_type,
+		    &new_channel->offer_msg.offer.interface_type,
+		    sizeof(hv_guid)) == 0 &&
+		    memcmp(&channel->offer_msg.offer.interface_instance,
 		    &new_channel->offer_msg.offer.interface_instance,
-		    sizeof(hv_guid))) {
-		f_new = FALSE;
-		break;
-	    }
+		    sizeof(hv_guid)) == 0) {
+			f_new = FALSE;
+			break;
+		}
 	}
 
 	if (f_new) {
-	    /* Insert at tail */
-	    TAILQ_INSERT_TAIL(
-		&hv_vmbus_g_connection.channel_anchor,
-		new_channel,
-		list_entry);
+		/* Insert at tail */
+		TAILQ_INSERT_TAIL(
+		    &hv_vmbus_g_connection.channel_anchor,
+		    new_channel,
+		    list_entry);
 	}
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+
+	/*XXX add new channel to percpu_list */
 
 	if (!f_new) {
+		/*
+		 * Check if this is a sub channel.
+		 */
+		if (new_channel->offer_msg.offer.sub_channel_index != 0) {
+			/*
+			 * It is a sub channel offer, process it.
+			 */
+			new_channel->primary_channel = channel;
+			mtx_lock(&channel->sc_lock);
+			TAILQ_INSERT_TAIL(
+			    &channel->sc_list_anchor,
+			    new_channel,
+			    sc_list_entry);
+			mtx_unlock(&channel->sc_lock);
+
+			/* Insert new channel into channel_anchor. */
+			printf("Storvsc get multi-channel offer, rel=%u.\n",
+			    new_channel->offer_msg.child_rel_id);	
+			mtx_lock(&hv_vmbus_g_connection.channel_lock);
+			TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
+			    new_channel, list_entry);				
+			mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+
+			if(bootverbose)
+				printf("VMBUS: new multi-channel offer <%p>.\n",
+				    new_channel);
+
+			/*XXX add it to percpu_list */
+
+			new_channel->state = HV_CHANNEL_OPEN_STATE;
+			if (channel->sc_creation_callback != NULL) {
+				channel->sc_creation_callback(new_channel);
+			}
+			return;
+		}
+
 	    hv_vmbus_free_vmbus_channel(new_channel);
 	    return;
 	}
 
+	new_channel->state = HV_CHANNEL_OPEN_STATE;
+
 	/*
 	 * Start the process of binding this offer to the driver
 	 * (We need to set the device field before calling
@@ -333,35 +380,86 @@ vmbus_channel_process_offer(void *context)
 	    new_channel->offer_msg.offer.interface_instance, new_channel);
 
 	/*
-	 *  TODO - the HV_CHANNEL_OPEN_STATE flag should not be set below
-	 *  but in the "open" channel request. The ret != 0 logic below
-	 *  doesn't take into account that a channel
-	 *  may have been opened successfully
-	 */
-
-	/*
 	 * Add the new device to the bus. This will kick off device-driver
 	 * binding which eventually invokes the device driver's AddDevice()
 	 * method.
 	 */
 	ret = hv_vmbus_child_device_register(new_channel->device);
 	if (ret != 0) {
-	    mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
-	    TAILQ_REMOVE(
-		&hv_vmbus_g_connection.channel_anchor,
-		new_channel,
-		list_entry);
-	    mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
-	    hv_vmbus_free_vmbus_channel(new_channel);
-	} else {
-	    /*
-	     * This state is used to indicate a successful open
-	     * so that when we do close the channel normally,
-	     * we can clean up properly
-	     */
-	    new_channel->state = HV_CHANNEL_OPEN_STATE;
+		mtx_lock(&hv_vmbus_g_connection.channel_lock);
+		TAILQ_REMOVE(
+		    &hv_vmbus_g_connection.channel_anchor,
+		    new_channel,
+		    list_entry);
+		mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+		hv_vmbus_free_vmbus_channel(new_channel);
+	}
+}
+
+/**
+ * Array of device guids that are performance critical. We try to distribute
+ * the interrupt load for these devices across all online cpus. 
+ */
+static const hv_guid high_perf_devices[] = {
+	{HV_NIC_GUID, },
+	{HV_IDE_GUID, },
+	{HV_SCSI_GUID, },
+};
+
+enum {
+	PERF_CHN_NIC = 0,
+	PERF_CHN_IDE,
+	PERF_CHN_SCSI,
+	MAX_PERF_CHN,
+};
+
+/*
+ * We use this static number to distribute the channel interrupt load.
+ */
+static uint32_t next_vcpu;
+
+/**
+ * Starting with Win8, we can statically distribute the incoming
+ * channel interrupt load by binding a channel to VCPU. We
+ * implement here a simple round robin scheme for distributing
+ * the interrupt load.
+ * We will bind channels that are not performance critical to cpu 0 and
+ * performance critical channels (IDE, SCSI and Network) will be uniformly
+ * distributed across all available CPUs.
+ */
+static void
+vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid)
+{
+	uint32_t current_cpu;
+	int i;
+	boolean_t is_perf_channel = FALSE;
+
+	for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
+		if (memcmp(guid->data, high_perf_devices[i].data,
+		    sizeof(hv_guid)) == 0) {
+			is_perf_channel = TRUE;
+			break;
+		}
+	}
 
+	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) ||
+	    (!is_perf_channel)) {
+		/* Host's view of guest cpu */
+		channel->target_vcpu = 0;
+		/* Guest's own view of cpu */
+		channel->target_cpu = 0;
+		return;
 	}
+	/* mp_ncpus should have the number cpus currently online */
+	current_cpu = (++next_vcpu % mp_ncpus);
+	channel->target_cpu = current_cpu;
+	channel->target_vcpu =
+	    hv_vmbus_g_context.hv_vcpu_index[current_cpu];
+	if (bootverbose)
+		printf("VMBUS: Total online cpus %d, assign perf channel %d "
+		    "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu,
+		    current_cpu);
 }
 
 /**
@@ -391,6 +489,38 @@ vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
 	if (new_channel == NULL)
 	    return;
 
+	/*
+	 * By default we setup state to enable batched
+	 * reading. A specific service can choose to
+	 * disable this prior to opening the channel.
+	 */
+	new_channel->batched_reading = TRUE;
+
+	new_channel->signal_event_param =
+	    (hv_vmbus_input_signal_event *)
+	    (HV_ALIGN_UP((unsigned long)
+		&new_channel->signal_event_buffer,
+		HV_HYPERCALL_PARAM_ALIGN));
+
+ 	new_channel->signal_event_param->connection_id.as_uint32_t = 0;	
+	new_channel->signal_event_param->connection_id.u.id =
+	    HV_VMBUS_EVENT_CONNECTION_ID;
+	new_channel->signal_event_param->flag_number = 0;
+	new_channel->signal_event_param->rsvd_z = 0;
+
+	if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) {
+		new_channel->is_dedicated_interrupt =
+		    (offer->is_dedicated_interrupt != 0);
+		new_channel->signal_event_param->connection_id.u.id =
+		    offer->connection_id;
+	}
+
+	/*
+	 * Bind the channel to a chosen cpu.
+	 */
+	vmbus_channel_select_cpu(new_channel,
+	    &offer->offer.interface_type);
+
 	memcpy(&new_channel->offer_msg, offer,
 	    sizeof(hv_vmbus_channel_offer_channel));
 	new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
@@ -666,7 +796,7 @@ hv_vmbus_release_unattached_channels(void)
 {
 	hv_vmbus_channel *channel;
 
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 
 	while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
 	    channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
@@ -676,5 +806,61 @@ hv_vmbus_release_unattached_channels(void)
 	    hv_vmbus_child_device_unregister(channel->device);
 	    hv_vmbus_free_vmbus_channel(channel);
 	}
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+}
+
+/**
+ * @brief Select the best outgoing channel
+ * 
+ * The channel whose vcpu binding is closest to the currect vcpu will
+ * be selected.
+ * If no multi-channel, always select primary channel
+ * 
+ * @param primary - primary channel
+ */
+struct hv_vmbus_channel *
+vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
+{
+	hv_vmbus_channel *new_channel = NULL;
+	hv_vmbus_channel *outgoing_channel = primary;
+	int old_cpu_distance = 0;
+	int new_cpu_distance = 0;
+	int cur_vcpu = 0;
+	int smp_pro_id = PCPU_GET(cpuid);
+
+	if (TAILQ_EMPTY(&primary->sc_list_anchor)) {
+		return outgoing_channel;
+	}
+
+	if (smp_pro_id >= MAXCPU) {
+		return outgoing_channel;
+	}
+
+	cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id];
+	
+	TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) {
+		if (new_channel->state != HV_CHANNEL_OPENED_STATE){
+			continue;
+		}
+
+		if (new_channel->target_vcpu == cur_vcpu){
+			return new_channel;
+		}
+
+		old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
+		    (outgoing_channel->target_vcpu - cur_vcpu) :
+		    (cur_vcpu - outgoing_channel->target_vcpu));
+
+		new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
+		    (new_channel->target_vcpu - cur_vcpu) :
+		    (cur_vcpu - new_channel->target_vcpu));
+
+		if (old_cpu_distance < new_cpu_distance) {
+			continue;
+		}
+
+		outgoing_channel = new_channel;
+	}
+
+	return(outgoing_channel);
 }
author	whu <whu@FreeBSD.org>	2015-05-22 09:03:55 +0000
committer	whu <whu@FreeBSD.org>	2015-05-22 09:03:55 +0000
commit	30cd3b9808be2ed4002993166f0790b8f07d95d8 (patch)
tree	f2a61a02982282e7de44dcf271ef00cd723d44c5 /sys/dev/hyperv/vmbus/hv_channel_mgmt.c
parent	b453b295750133b89170fcb27025f932be66ad18 (diff)
download	FreeBSD-src-30cd3b9808be2ed4002993166f0790b8f07d95d8.zip FreeBSD-src-30cd3b9808be2ed4002993166f0790b8f07d95d8.tar.gz