1 files changed, 227 insertions, 41 deletions
diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
index 011e305..d13ece5 100644
--- a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
+++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
@@ -26,6 +26,9 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <sys/param.h>
 #include <sys/mbuf.h>
 
@@ -50,6 +53,8 @@ static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_process_offer(void *context);
+struct hv_vmbus_channel*
+    vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
 
 /**
  * Channel message dispatch table
@@ -233,6 +238,9 @@ hv_vmbus_allocate_channel(void)
 	    return (NULL);
 
 	mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
+	mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
+
+	TAILQ_INIT(&channel->sc_list_anchor);
 
 	channel->control_work_queue = hv_work_queue_create("control");
 
@@ -262,6 +270,7 @@ ReleaseVmbusChannel(void *context)
 void
 hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
 {
+	mtx_destroy(&channel->sc_lock);
 	mtx_destroy(&channel->inbound_lock);
 	/*
 	 * We have to release the channel's workqueue/thread in
@@ -279,10 +288,10 @@ hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
 static void
 vmbus_channel_process_offer(void *context)
 {
-	int			ret;
 	hv_vmbus_channel*	new_channel;
 	boolean_t		f_new;
 	hv_vmbus_channel*	channel;
+	int			ret;
 
 	new_channel = (hv_vmbus_channel*) context;
 	f_new = TRUE;
@@ -291,38 +300,76 @@ vmbus_channel_process_offer(void *context)
 	/*
 	 * Make sure this is a new offer
 	 */
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 
 	TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
 	    list_entry)
 	{
-	    if (!memcmp(
-		&channel->offer_msg.offer.interface_type,
-		&new_channel->offer_msg.offer.interface_type,
-		sizeof(hv_guid))
-		&& !memcmp(
-		    &channel->offer_msg.offer.interface_instance,
+		if (memcmp(&channel->offer_msg.offer.interface_type,
+		    &new_channel->offer_msg.offer.interface_type,
+		    sizeof(hv_guid)) == 0 &&
+		    memcmp(&channel->offer_msg.offer.interface_instance,
 		    &new_channel->offer_msg.offer.interface_instance,
-		    sizeof(hv_guid))) {
-		f_new = FALSE;
-		break;
-	    }
+		    sizeof(hv_guid)) == 0) {
+			f_new = FALSE;
+			break;
+		}
 	}
 
 	if (f_new) {
-	    /* Insert at tail */
-	    TAILQ_INSERT_TAIL(
-		&hv_vmbus_g_connection.channel_anchor,
-		new_channel,
-		list_entry);
+		/* Insert at tail */
+		TAILQ_INSERT_TAIL(
+		    &hv_vmbus_g_connection.channel_anchor,
+		    new_channel,
+		    list_entry);
 	}
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+
+	/*XXX add new channel to percpu_list */
 
 	if (!f_new) {
+		/*
+		 * Check if this is a sub channel.
+		 */
+		if (new_channel->offer_msg.offer.sub_channel_index != 0) {
+			/*
+			 * It is a sub channel offer, process it.
+			 */
+			new_channel->primary_channel = channel;
+			mtx_lock(&channel->sc_lock);
+			TAILQ_INSERT_TAIL(
+			    &channel->sc_list_anchor,
+			    new_channel,
+			    sc_list_entry);
+			mtx_unlock(&channel->sc_lock);
+
+			/* Insert new channel into channel_anchor. */
+			printf("Storvsc get multi-channel offer, rel=%u.\n",
+			    new_channel->offer_msg.child_rel_id);	
+			mtx_lock(&hv_vmbus_g_connection.channel_lock);
+			TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
+			    new_channel, list_entry);				
+			mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+
+			if(bootverbose)
+				printf("VMBUS: new multi-channel offer <%p>.\n",
+				    new_channel);
+
+			/*XXX add it to percpu_list */
+
+			new_channel->state = HV_CHANNEL_OPEN_STATE;
+			if (channel->sc_creation_callback != NULL) {
+				channel->sc_creation_callback(new_channel);
+			}
+			return;
+		}
+
 	    hv_vmbus_free_vmbus_channel(new_channel);
 	    return;
 	}
 
+	new_channel->state = HV_CHANNEL_OPEN_STATE;
+
 	/*
 	 * Start the process of binding this offer to the driver
 	 * (We need to set the device field before calling
@@ -333,35 +380,86 @@ vmbus_channel_process_offer(void *context)
 	    new_channel->offer_msg.offer.interface_instance, new_channel);
 
 	/*
-	 *  TODO - the HV_CHANNEL_OPEN_STATE flag should not be set below
-	 *  but in the "open" channel request. The ret != 0 logic below
-	 *  doesn't take into account that a channel
-	 *  may have been opened successfully
-	 */
-
-	/*
 	 * Add the new device to the bus. This will kick off device-driver
 	 * binding which eventually invokes the device driver's AddDevice()
 	 * method.
 	 */
 	ret = hv_vmbus_child_device_register(new_channel->device);
 	if (ret != 0) {
-	    mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
-	    TAILQ_REMOVE(
-		&hv_vmbus_g_connection.channel_anchor,
-		new_channel,
-		list_entry);
-	    mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
-	    hv_vmbus_free_vmbus_channel(new_channel);
-	} else {
-	    /*
-	     * This state is used to indicate a successful open
-	     * so that when we do close the channel normally,
-	     * we can clean up properly
-	     */
-	    new_channel->state = HV_CHANNEL_OPEN_STATE;
+		mtx_lock(&hv_vmbus_g_connection.channel_lock);
+		TAILQ_REMOVE(
+		    &hv_vmbus_g_connection.channel_anchor,
+		    new_channel,
+		    list_entry);
+		mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+		hv_vmbus_free_vmbus_channel(new_channel);
+	}
+}
+
+/**
+ * Array of device guids that are performance critical. We try to distribute
+ * the interrupt load for these devices across all online cpus. 
+ */
+static const hv_guid high_perf_devices[] = {
+	{HV_NIC_GUID, },
+	{HV_IDE_GUID, },
+	{HV_SCSI_GUID, },
+};
+
+enum {
+	PERF_CHN_NIC = 0,
+	PERF_CHN_IDE,
+	PERF_CHN_SCSI,
+	MAX_PERF_CHN,
+};
+
+/*
+ * We use this static number to distribute the channel interrupt load.
+ */
+static uint32_t next_vcpu;
+
+/**
+ * Starting with Win8, we can statically distribute the incoming
+ * channel interrupt load by binding a channel to VCPU. We
+ * implement here a simple round robin scheme for distributing
+ * the interrupt load.
+ * We will bind channels that are not performance critical to cpu 0 and
+ * performance critical channels (IDE, SCSI and Network) will be uniformly
+ * distributed across all available CPUs.
+ */
+static void
+vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid)
+{
+	uint32_t current_cpu;
+	int i;
+	boolean_t is_perf_channel = FALSE;
+
+	for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
+		if (memcmp(guid->data, high_perf_devices[i].data,
+		    sizeof(hv_guid)) == 0) {
+			is_perf_channel = TRUE;
+			break;
+		}
+	}
 
+	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) ||
+	    (!is_perf_channel)) {
+		/* Host's view of guest cpu */
+		channel->target_vcpu = 0;
+		/* Guest's own view of cpu */
+		channel->target_cpu = 0;
+		return;
 	}
+	/* mp_ncpus should have the number cpus currently online */
+	current_cpu = (++next_vcpu % mp_ncpus);
+	channel->target_cpu = current_cpu;
+	channel->target_vcpu =
+	    hv_vmbus_g_context.hv_vcpu_index[current_cpu];
+	if (bootverbose)
+		printf("VMBUS: Total online cpus %d, assign perf channel %d "
+		    "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu,
+		    current_cpu);
 }
 
 /**
@@ -391,6 +489,38 @@ vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
 	if (new_channel == NULL)
 	    return;
 
+	/*
+	 * By default we setup state to enable batched
+	 * reading. A specific service can choose to
+	 * disable this prior to opening the channel.
+	 */
+	new_channel->batched_reading = TRUE;
+
+	new_channel->signal_event_param =
+	    (hv_vmbus_input_signal_event *)
+	    (HV_ALIGN_UP((unsigned long)
+		&new_channel->signal_event_buffer,
+		HV_HYPERCALL_PARAM_ALIGN));
+
+ 	new_channel->signal_event_param->connection_id.as_uint32_t = 0;	
+	new_channel->signal_event_param->connection_id.u.id =
+	    HV_VMBUS_EVENT_CONNECTION_ID;
+	new_channel->signal_event_param->flag_number = 0;
+	new_channel->signal_event_param->rsvd_z = 0;
+
+	if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) {
+		new_channel->is_dedicated_interrupt =
+		    (offer->is_dedicated_interrupt != 0);
+		new_channel->signal_event_param->connection_id.u.id =
+		    offer->connection_id;
+	}
+
+	/*
+	 * Bind the channel to a chosen cpu.
+	 */
+	vmbus_channel_select_cpu(new_channel,
+	    &offer->offer.interface_type);
+
 	memcpy(&new_channel->offer_msg, offer,
 	    sizeof(hv_vmbus_channel_offer_channel));
 	new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
@@ -666,7 +796,7 @@ hv_vmbus_release_unattached_channels(void)
 {
 	hv_vmbus_channel *channel;
 
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 
 	while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
 	    channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
@@ -676,5 +806,61 @@ hv_vmbus_release_unattached_channels(void)
 	    hv_vmbus_child_device_unregister(channel->device);
 	    hv_vmbus_free_vmbus_channel(channel);
 	}
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+}
+
+/**
+ * @brief Select the best outgoing channel
+ * 
+ * The channel whose vcpu binding is closest to the currect vcpu will
+ * be selected.
+ * If no multi-channel, always select primary channel
+ * 
+ * @param primary - primary channel
+ */
+struct hv_vmbus_channel *
+vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
+{
+	hv_vmbus_channel *new_channel = NULL;
+	hv_vmbus_channel *outgoing_channel = primary;
+	int old_cpu_distance = 0;
+	int new_cpu_distance = 0;
+	int cur_vcpu = 0;
+	int smp_pro_id = PCPU_GET(cpuid);
+
+	if (TAILQ_EMPTY(&primary->sc_list_anchor)) {
+		return outgoing_channel;
+	}
+
+	if (smp_pro_id >= MAXCPU) {
+		return outgoing_channel;
+	}
+
+	cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id];
+	
+	TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) {
+		if (new_channel->state != HV_CHANNEL_OPENED_STATE){
+			continue;
+		}
+
+		if (new_channel->target_vcpu == cur_vcpu){
+			return new_channel;
+		}
+
+		old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
+		    (outgoing_channel->target_vcpu - cur_vcpu) :
+		    (cur_vcpu - outgoing_channel->target_vcpu));
+
+		new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
+		    (new_channel->target_vcpu - cur_vcpu) :
+		    (cur_vcpu - new_channel->target_vcpu));
+
+		if (old_cpu_distance < new_cpu_distance) {
+			continue;
+		}
+
+		outgoing_channel = new_channel;
+	}
+
+	return(outgoing_channel);
 }