diff options
author | whu <whu@FreeBSD.org> | 2015-05-22 09:03:55 +0000 |
---|---|---|
committer | whu <whu@FreeBSD.org> | 2015-05-22 09:03:55 +0000 |
commit | 30cd3b9808be2ed4002993166f0790b8f07d95d8 (patch) | |
tree | f2a61a02982282e7de44dcf271ef00cd723d44c5 /sys/dev/hyperv/vmbus/hv_channel_mgmt.c | |
parent | b453b295750133b89170fcb27025f932be66ad18 (diff) | |
download | FreeBSD-src-30cd3b9808be2ed4002993166f0790b8f07d95d8.zip FreeBSD-src-30cd3b9808be2ed4002993166f0790b8f07d95d8.tar.gz |
MFC r282212:
Microsoft vmbus, storage and other related driver enhancements for HyperV.
- Vmbus multi channel support.
- Vector interrupt support.
- Signal optimization.
- Storvsc driver performance improvement.
- Scatter and gather support for storvsc driver.
- Minor bug fix for KVP driver.
Thanks royger, jhb and delphij from FreeBSD community for the reviews
and comments. Also thanks Hovy Xu from NetApp for the contributions to
the storvsc driver.
PR: 195238
Submitted by: whu
Reviewed by: royger
Approved by: royger
Relnotes: yes
Sponsored by: Microsoft OSTC
Differential Revision: https://reviews.freebsd.org/D2575
Diffstat (limited to 'sys/dev/hyperv/vmbus/hv_channel_mgmt.c')
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_channel_mgmt.c | 268 |
1 files changed, 227 insertions, 41 deletions
diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c index 011e305..d13ece5 100644 --- a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c +++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c @@ -26,6 +26,9 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #include <sys/param.h> #include <sys/mbuf.h> @@ -50,6 +53,8 @@ static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_process_offer(void *context); +struct hv_vmbus_channel* + vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary); /** * Channel message dispatch table @@ -233,6 +238,9 @@ hv_vmbus_allocate_channel(void) return (NULL); mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF); + mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF); + + TAILQ_INIT(&channel->sc_list_anchor); channel->control_work_queue = hv_work_queue_create("control"); @@ -262,6 +270,7 @@ ReleaseVmbusChannel(void *context) void hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel) { + mtx_destroy(&channel->sc_lock); mtx_destroy(&channel->inbound_lock); /* * We have to release the channel's workqueue/thread in @@ -279,10 +288,10 @@ hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel) static void vmbus_channel_process_offer(void *context) { - int ret; hv_vmbus_channel* new_channel; boolean_t f_new; hv_vmbus_channel* channel; + int ret; new_channel = (hv_vmbus_channel*) context; f_new = TRUE; @@ -291,38 +300,76 @@ vmbus_channel_process_offer(void *context) /* * Make sure this is a new offer */ - mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); + mtx_lock(&hv_vmbus_g_connection.channel_lock); TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor, list_entry) { - if (!memcmp( - &channel->offer_msg.offer.interface_type, - &new_channel->offer_msg.offer.interface_type, - sizeof(hv_guid)) - && !memcmp( - &channel->offer_msg.offer.interface_instance, + if (memcmp(&channel->offer_msg.offer.interface_type, + &new_channel->offer_msg.offer.interface_type, + sizeof(hv_guid)) == 0 && + memcmp(&channel->offer_msg.offer.interface_instance, &new_channel->offer_msg.offer.interface_instance, - sizeof(hv_guid))) { - f_new = FALSE; - break; - } + sizeof(hv_guid)) == 0) { + f_new = FALSE; + break; + } } if (f_new) { - /* Insert at tail */ - TAILQ_INSERT_TAIL( - &hv_vmbus_g_connection.channel_anchor, - new_channel, - list_entry); + /* Insert at tail */ + TAILQ_INSERT_TAIL( + &hv_vmbus_g_connection.channel_anchor, + new_channel, + list_entry); } - mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); + mtx_unlock(&hv_vmbus_g_connection.channel_lock); + + /*XXX add new channel to percpu_list */ if (!f_new) { + /* + * Check if this is a sub channel. + */ + if (new_channel->offer_msg.offer.sub_channel_index != 0) { + /* + * It is a sub channel offer, process it. + */ + new_channel->primary_channel = channel; + mtx_lock(&channel->sc_lock); + TAILQ_INSERT_TAIL( + &channel->sc_list_anchor, + new_channel, + sc_list_entry); + mtx_unlock(&channel->sc_lock); + + /* Insert new channel into channel_anchor. */ + printf("Storvsc get multi-channel offer, rel=%u.\n", + new_channel->offer_msg.child_rel_id); + mtx_lock(&hv_vmbus_g_connection.channel_lock); + TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor, + new_channel, list_entry); + mtx_unlock(&hv_vmbus_g_connection.channel_lock); + + if(bootverbose) + printf("VMBUS: new multi-channel offer <%p>.\n", + new_channel); + + /*XXX add it to percpu_list */ + + new_channel->state = HV_CHANNEL_OPEN_STATE; + if (channel->sc_creation_callback != NULL) { + channel->sc_creation_callback(new_channel); + } + return; + } + hv_vmbus_free_vmbus_channel(new_channel); return; } + new_channel->state = HV_CHANNEL_OPEN_STATE; + /* * Start the process of binding this offer to the driver * (We need to set the device field before calling @@ -333,35 +380,86 @@ vmbus_channel_process_offer(void *context) new_channel->offer_msg.offer.interface_instance, new_channel); /* - * TODO - the HV_CHANNEL_OPEN_STATE flag should not be set below - * but in the "open" channel request. The ret != 0 logic below - * doesn't take into account that a channel - * may have been opened successfully - */ - - /* * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. */ ret = hv_vmbus_child_device_register(new_channel->device); if (ret != 0) { - mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); - TAILQ_REMOVE( - &hv_vmbus_g_connection.channel_anchor, - new_channel, - list_entry); - mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); - hv_vmbus_free_vmbus_channel(new_channel); - } else { - /* - * This state is used to indicate a successful open - * so that when we do close the channel normally, - * we can clean up properly - */ - new_channel->state = HV_CHANNEL_OPEN_STATE; + mtx_lock(&hv_vmbus_g_connection.channel_lock); + TAILQ_REMOVE( + &hv_vmbus_g_connection.channel_anchor, + new_channel, + list_entry); + mtx_unlock(&hv_vmbus_g_connection.channel_lock); + hv_vmbus_free_vmbus_channel(new_channel); + } +} + +/** + * Array of device guids that are performance critical. We try to distribute + * the interrupt load for these devices across all online cpus. + */ +static const hv_guid high_perf_devices[] = { + {HV_NIC_GUID, }, + {HV_IDE_GUID, }, + {HV_SCSI_GUID, }, +}; + +enum { + PERF_CHN_NIC = 0, + PERF_CHN_IDE, + PERF_CHN_SCSI, + MAX_PERF_CHN, +}; + +/* + * We use this static number to distribute the channel interrupt load. + */ +static uint32_t next_vcpu; + +/** + * Starting with Win8, we can statically distribute the incoming + * channel interrupt load by binding a channel to VCPU. We + * implement here a simple round robin scheme for distributing + * the interrupt load. + * We will bind channels that are not performance critical to cpu 0 and + * performance critical channels (IDE, SCSI and Network) will be uniformly + * distributed across all available CPUs. + */ +static void +vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid) +{ + uint32_t current_cpu; + int i; + boolean_t is_perf_channel = FALSE; + + for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) { + if (memcmp(guid->data, high_perf_devices[i].data, + sizeof(hv_guid)) == 0) { + is_perf_channel = TRUE; + break; + } + } + if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || + (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) || + (!is_perf_channel)) { + /* Host's view of guest cpu */ + channel->target_vcpu = 0; + /* Guest's own view of cpu */ + channel->target_cpu = 0; + return; } + /* mp_ncpus should have the number cpus currently online */ + current_cpu = (++next_vcpu % mp_ncpus); + channel->target_cpu = current_cpu; + channel->target_vcpu = + hv_vmbus_g_context.hv_vcpu_index[current_cpu]; + if (bootverbose) + printf("VMBUS: Total online cpus %d, assign perf channel %d " + "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu, + current_cpu); } /** @@ -391,6 +489,38 @@ vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr) if (new_channel == NULL) return; + /* + * By default we setup state to enable batched + * reading. A specific service can choose to + * disable this prior to opening the channel. + */ + new_channel->batched_reading = TRUE; + + new_channel->signal_event_param = + (hv_vmbus_input_signal_event *) + (HV_ALIGN_UP((unsigned long) + &new_channel->signal_event_buffer, + HV_HYPERCALL_PARAM_ALIGN)); + + new_channel->signal_event_param->connection_id.as_uint32_t = 0; + new_channel->signal_event_param->connection_id.u.id = + HV_VMBUS_EVENT_CONNECTION_ID; + new_channel->signal_event_param->flag_number = 0; + new_channel->signal_event_param->rsvd_z = 0; + + if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) { + new_channel->is_dedicated_interrupt = + (offer->is_dedicated_interrupt != 0); + new_channel->signal_event_param->connection_id.u.id = + offer->connection_id; + } + + /* + * Bind the channel to a chosen cpu. + */ + vmbus_channel_select_cpu(new_channel, + &offer->offer.interface_type); + memcpy(&new_channel->offer_msg, offer, sizeof(hv_vmbus_channel_offer_channel)); new_channel->monitor_group = (uint8_t) offer->monitor_id / 32; @@ -666,7 +796,7 @@ hv_vmbus_release_unattached_channels(void) { hv_vmbus_channel *channel; - mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); + mtx_lock(&hv_vmbus_g_connection.channel_lock); while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) { channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor); @@ -676,5 +806,61 @@ hv_vmbus_release_unattached_channels(void) hv_vmbus_child_device_unregister(channel->device); hv_vmbus_free_vmbus_channel(channel); } - mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); + mtx_unlock(&hv_vmbus_g_connection.channel_lock); +} + +/** + * @brief Select the best outgoing channel + * + * The channel whose vcpu binding is closest to the currect vcpu will + * be selected. + * If no multi-channel, always select primary channel + * + * @param primary - primary channel + */ +struct hv_vmbus_channel * +vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary) +{ + hv_vmbus_channel *new_channel = NULL; + hv_vmbus_channel *outgoing_channel = primary; + int old_cpu_distance = 0; + int new_cpu_distance = 0; + int cur_vcpu = 0; + int smp_pro_id = PCPU_GET(cpuid); + + if (TAILQ_EMPTY(&primary->sc_list_anchor)) { + return outgoing_channel; + } + + if (smp_pro_id >= MAXCPU) { + return outgoing_channel; + } + + cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id]; + + TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) { + if (new_channel->state != HV_CHANNEL_OPENED_STATE){ + continue; + } + + if (new_channel->target_vcpu == cur_vcpu){ + return new_channel; + } + + old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ? + (outgoing_channel->target_vcpu - cur_vcpu) : + (cur_vcpu - outgoing_channel->target_vcpu)); + + new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ? + (new_channel->target_vcpu - cur_vcpu) : + (cur_vcpu - new_channel->target_vcpu)); + + if (old_cpu_distance < new_cpu_distance) { + continue; + } + + outgoing_channel = new_channel; + } + + return(outgoing_channel); } |