summaryrefslogtreecommitdiffstats
path: root/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
diff options
context:
space:
mode:
authorwhu <whu@FreeBSD.org>2015-05-22 09:03:55 +0000
committerwhu <whu@FreeBSD.org>2015-05-22 09:03:55 +0000
commit30cd3b9808be2ed4002993166f0790b8f07d95d8 (patch)
treef2a61a02982282e7de44dcf271ef00cd723d44c5 /sys/dev/hyperv/vmbus/hv_channel_mgmt.c
parentb453b295750133b89170fcb27025f932be66ad18 (diff)
downloadFreeBSD-src-30cd3b9808be2ed4002993166f0790b8f07d95d8.zip
FreeBSD-src-30cd3b9808be2ed4002993166f0790b8f07d95d8.tar.gz
MFC r282212:
Microsoft vmbus, storage and other related driver enhancements for HyperV. - Vmbus multi channel support. - Vector interrupt support. - Signal optimization. - Storvsc driver performance improvement. - Scatter and gather support for storvsc driver. - Minor bug fix for KVP driver. Thanks royger, jhb and delphij from FreeBSD community for the reviews and comments. Also thanks Hovy Xu from NetApp for the contributions to the storvsc driver. PR: 195238 Submitted by: whu Reviewed by: royger Approved by: royger Relnotes: yes Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D2575
Diffstat (limited to 'sys/dev/hyperv/vmbus/hv_channel_mgmt.c')
-rw-r--r--sys/dev/hyperv/vmbus/hv_channel_mgmt.c268
1 files changed, 227 insertions, 41 deletions
diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
index 011e305..d13ece5 100644
--- a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
+++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
@@ -26,6 +26,9 @@
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
#include <sys/param.h>
#include <sys/mbuf.h>
@@ -50,6 +53,8 @@ static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_process_offer(void *context);
+struct hv_vmbus_channel*
+ vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
/**
* Channel message dispatch table
@@ -233,6 +238,9 @@ hv_vmbus_allocate_channel(void)
return (NULL);
mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
+ mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
+
+ TAILQ_INIT(&channel->sc_list_anchor);
channel->control_work_queue = hv_work_queue_create("control");
@@ -262,6 +270,7 @@ ReleaseVmbusChannel(void *context)
void
hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
{
+ mtx_destroy(&channel->sc_lock);
mtx_destroy(&channel->inbound_lock);
/*
* We have to release the channel's workqueue/thread in
@@ -279,10 +288,10 @@ hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
static void
vmbus_channel_process_offer(void *context)
{
- int ret;
hv_vmbus_channel* new_channel;
boolean_t f_new;
hv_vmbus_channel* channel;
+ int ret;
new_channel = (hv_vmbus_channel*) context;
f_new = TRUE;
@@ -291,38 +300,76 @@ vmbus_channel_process_offer(void *context)
/*
* Make sure this is a new offer
*/
- mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_lock);
TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
list_entry)
{
- if (!memcmp(
- &channel->offer_msg.offer.interface_type,
- &new_channel->offer_msg.offer.interface_type,
- sizeof(hv_guid))
- && !memcmp(
- &channel->offer_msg.offer.interface_instance,
+ if (memcmp(&channel->offer_msg.offer.interface_type,
+ &new_channel->offer_msg.offer.interface_type,
+ sizeof(hv_guid)) == 0 &&
+ memcmp(&channel->offer_msg.offer.interface_instance,
&new_channel->offer_msg.offer.interface_instance,
- sizeof(hv_guid))) {
- f_new = FALSE;
- break;
- }
+ sizeof(hv_guid)) == 0) {
+ f_new = FALSE;
+ break;
+ }
}
if (f_new) {
- /* Insert at tail */
- TAILQ_INSERT_TAIL(
- &hv_vmbus_g_connection.channel_anchor,
- new_channel,
- list_entry);
+ /* Insert at tail */
+ TAILQ_INSERT_TAIL(
+ &hv_vmbus_g_connection.channel_anchor,
+ new_channel,
+ list_entry);
}
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+
+ /*XXX add new channel to percpu_list */
if (!f_new) {
+ /*
+ * Check if this is a sub channel.
+ */
+ if (new_channel->offer_msg.offer.sub_channel_index != 0) {
+ /*
+ * It is a sub channel offer, process it.
+ */
+ new_channel->primary_channel = channel;
+ mtx_lock(&channel->sc_lock);
+ TAILQ_INSERT_TAIL(
+ &channel->sc_list_anchor,
+ new_channel,
+ sc_list_entry);
+ mtx_unlock(&channel->sc_lock);
+
+ /* Insert new channel into channel_anchor. */
+ printf("Storvsc get multi-channel offer, rel=%u.\n",
+ new_channel->offer_msg.child_rel_id);
+ mtx_lock(&hv_vmbus_g_connection.channel_lock);
+ TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
+ new_channel, list_entry);
+ mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+
+ if(bootverbose)
+ printf("VMBUS: new multi-channel offer <%p>.\n",
+ new_channel);
+
+ /*XXX add it to percpu_list */
+
+ new_channel->state = HV_CHANNEL_OPEN_STATE;
+ if (channel->sc_creation_callback != NULL) {
+ channel->sc_creation_callback(new_channel);
+ }
+ return;
+ }
+
hv_vmbus_free_vmbus_channel(new_channel);
return;
}
+ new_channel->state = HV_CHANNEL_OPEN_STATE;
+
/*
* Start the process of binding this offer to the driver
* (We need to set the device field before calling
@@ -333,35 +380,86 @@ vmbus_channel_process_offer(void *context)
new_channel->offer_msg.offer.interface_instance, new_channel);
/*
- * TODO - the HV_CHANNEL_OPEN_STATE flag should not be set below
- * but in the "open" channel request. The ret != 0 logic below
- * doesn't take into account that a channel
- * may have been opened successfully
- */
-
- /*
* Add the new device to the bus. This will kick off device-driver
* binding which eventually invokes the device driver's AddDevice()
* method.
*/
ret = hv_vmbus_child_device_register(new_channel->device);
if (ret != 0) {
- mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
- TAILQ_REMOVE(
- &hv_vmbus_g_connection.channel_anchor,
- new_channel,
- list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
- hv_vmbus_free_vmbus_channel(new_channel);
- } else {
- /*
- * This state is used to indicate a successful open
- * so that when we do close the channel normally,
- * we can clean up properly
- */
- new_channel->state = HV_CHANNEL_OPEN_STATE;
+ mtx_lock(&hv_vmbus_g_connection.channel_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_anchor,
+ new_channel,
+ list_entry);
+ mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+ hv_vmbus_free_vmbus_channel(new_channel);
+ }
+}
+
+/**
+ * Array of device guids that are performance critical. We try to distribute
+ * the interrupt load for these devices across all online cpus.
+ */
+static const hv_guid high_perf_devices[] = {
+ {HV_NIC_GUID, },
+ {HV_IDE_GUID, },
+ {HV_SCSI_GUID, },
+};
+
+enum {
+ PERF_CHN_NIC = 0,
+ PERF_CHN_IDE,
+ PERF_CHN_SCSI,
+ MAX_PERF_CHN,
+};
+
+/*
+ * We use this static number to distribute the channel interrupt load.
+ */
+static uint32_t next_vcpu;
+
+/**
+ * Starting with Win8, we can statically distribute the incoming
+ * channel interrupt load by binding a channel to VCPU. We
+ * implement here a simple round robin scheme for distributing
+ * the interrupt load.
+ * We will bind channels that are not performance critical to cpu 0 and
+ * performance critical channels (IDE, SCSI and Network) will be uniformly
+ * distributed across all available CPUs.
+ */
+static void
+vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid)
+{
+ uint32_t current_cpu;
+ int i;
+ boolean_t is_perf_channel = FALSE;
+
+ for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
+ if (memcmp(guid->data, high_perf_devices[i].data,
+ sizeof(hv_guid)) == 0) {
+ is_perf_channel = TRUE;
+ break;
+ }
+ }
+ if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+ (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) ||
+ (!is_perf_channel)) {
+ /* Host's view of guest cpu */
+ channel->target_vcpu = 0;
+ /* Guest's own view of cpu */
+ channel->target_cpu = 0;
+ return;
}
+ /* mp_ncpus should have the number cpus currently online */
+ current_cpu = (++next_vcpu % mp_ncpus);
+ channel->target_cpu = current_cpu;
+ channel->target_vcpu =
+ hv_vmbus_g_context.hv_vcpu_index[current_cpu];
+ if (bootverbose)
+ printf("VMBUS: Total online cpus %d, assign perf channel %d "
+ "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu,
+ current_cpu);
}
/**
@@ -391,6 +489,38 @@ vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
if (new_channel == NULL)
return;
+ /*
+ * By default we setup state to enable batched
+ * reading. A specific service can choose to
+ * disable this prior to opening the channel.
+ */
+ new_channel->batched_reading = TRUE;
+
+ new_channel->signal_event_param =
+ (hv_vmbus_input_signal_event *)
+ (HV_ALIGN_UP((unsigned long)
+ &new_channel->signal_event_buffer,
+ HV_HYPERCALL_PARAM_ALIGN));
+
+ new_channel->signal_event_param->connection_id.as_uint32_t = 0;
+ new_channel->signal_event_param->connection_id.u.id =
+ HV_VMBUS_EVENT_CONNECTION_ID;
+ new_channel->signal_event_param->flag_number = 0;
+ new_channel->signal_event_param->rsvd_z = 0;
+
+ if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) {
+ new_channel->is_dedicated_interrupt =
+ (offer->is_dedicated_interrupt != 0);
+ new_channel->signal_event_param->connection_id.u.id =
+ offer->connection_id;
+ }
+
+ /*
+ * Bind the channel to a chosen cpu.
+ */
+ vmbus_channel_select_cpu(new_channel,
+ &offer->offer.interface_type);
+
memcpy(&new_channel->offer_msg, offer,
sizeof(hv_vmbus_channel_offer_channel));
new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
@@ -666,7 +796,7 @@ hv_vmbus_release_unattached_channels(void)
{
hv_vmbus_channel *channel;
- mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_lock);
while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
@@ -676,5 +806,61 @@ hv_vmbus_release_unattached_channels(void)
hv_vmbus_child_device_unregister(channel->device);
hv_vmbus_free_vmbus_channel(channel);
}
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+}
+
+/**
+ * @brief Select the best outgoing channel
+ *
+ * The channel whose vcpu binding is closest to the currect vcpu will
+ * be selected.
+ * If no multi-channel, always select primary channel
+ *
+ * @param primary - primary channel
+ */
+struct hv_vmbus_channel *
+vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
+{
+ hv_vmbus_channel *new_channel = NULL;
+ hv_vmbus_channel *outgoing_channel = primary;
+ int old_cpu_distance = 0;
+ int new_cpu_distance = 0;
+ int cur_vcpu = 0;
+ int smp_pro_id = PCPU_GET(cpuid);
+
+ if (TAILQ_EMPTY(&primary->sc_list_anchor)) {
+ return outgoing_channel;
+ }
+
+ if (smp_pro_id >= MAXCPU) {
+ return outgoing_channel;
+ }
+
+ cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id];
+
+ TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) {
+ if (new_channel->state != HV_CHANNEL_OPENED_STATE){
+ continue;
+ }
+
+ if (new_channel->target_vcpu == cur_vcpu){
+ return new_channel;
+ }
+
+ old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
+ (outgoing_channel->target_vcpu - cur_vcpu) :
+ (cur_vcpu - outgoing_channel->target_vcpu));
+
+ new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
+ (new_channel->target_vcpu - cur_vcpu) :
+ (cur_vcpu - new_channel->target_vcpu));
+
+ if (old_cpu_distance < new_cpu_distance) {
+ continue;
+ }
+
+ outgoing_channel = new_channel;
+ }
+
+ return(outgoing_channel);
}
OpenPOWER on IntegriCloud