diff options
author | grehan <grehan@FreeBSD.org> | 2013-07-17 06:30:23 +0000 |
---|---|---|
committer | grehan <grehan@FreeBSD.org> | 2013-07-17 06:30:23 +0000 |
commit | c8195f5331ccad33ad4e265362523f51b96abd5c (patch) | |
tree | 5b7b0a05acafc6450fb27259d1c1d5008e1f88f4 /sys/dev/hyperv/vmbus | |
parent | 6a7baaf83640e0eaa135d2f7a3c1d4401f1683bf (diff) | |
download | FreeBSD-src-c8195f5331ccad33ad4e265362523f51b96abd5c.zip FreeBSD-src-c8195f5331ccad33ad4e265362523f51b96abd5c.tar.gz |
Microsoft have changed their policy on how the hyper-v code will
be pulled into FreeBSD. From now, FreeBSD will be considered the
upstream repo.
First step: move the drivers away from the contrib area and into
the base system.
A follow-on commit will include the drivers in the amd64 GENERIC kernel.
Diffstat (limited to 'sys/dev/hyperv/vmbus')
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_channel.c | 842 | ||||
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_channel_mgmt.c | 680 | ||||
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_connection.c | 431 | ||||
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_hv.c | 515 | ||||
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_ring_buffer.c | 440 | ||||
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c | 583 | ||||
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_vmbus_priv.h | 770 |
7 files changed, 4261 insertions, 0 deletions
diff --git a/sys/dev/hyperv/vmbus/hv_channel.c b/sys/dev/hyperv/vmbus/hv_channel.c new file mode 100644 index 0000000..17dfd76 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hv_channel.c @@ -0,0 +1,842 @@ +/*- + * Copyright (c) 2009-2012 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <machine/bus.h> +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> + +#include "hv_vmbus_priv.h" + +static int vmbus_channel_create_gpadl_header( + /* must be phys and virt contiguous*/ + void* contig_buffer, + /* page-size multiple */ + uint32_t size, + hv_vmbus_channel_msg_info** msg_info, + uint32_t* message_count); + +static void vmbus_channel_set_event(hv_vmbus_channel* channel); + +/** + * @brief Trigger an event notification on the specified channel + */ +static void +vmbus_channel_set_event(hv_vmbus_channel *channel) +{ + hv_vmbus_monitor_page *monitor_page; + + if (channel->offer_msg.monitor_allocated) { + /* Each uint32_t represents 32 channels */ + synch_set_bit((channel->offer_msg.child_rel_id & 31), + ((uint32_t *)hv_vmbus_g_connection.send_interrupt_page + + ((channel->offer_msg.child_rel_id >> 5)))); + + monitor_page = (hv_vmbus_monitor_page *) + hv_vmbus_g_connection.monitor_pages; + + monitor_page++; /* Get the child to parent monitor page */ + + synch_set_bit(channel->monitor_bit, + (uint32_t *)&monitor_page-> + trigger_group[channel->monitor_group].pending); + } else { + hv_vmbus_set_event(channel->offer_msg.child_rel_id); + } + +} + +/** + * @brief Open the specified channel + */ +int +hv_vmbus_channel_open( + hv_vmbus_channel* new_channel, + uint32_t send_ring_buffer_size, + uint32_t recv_ring_buffer_size, + void* user_data, + uint32_t user_data_len, + hv_vmbus_pfn_channel_callback pfn_on_channel_callback, + void* context) +{ + + int ret = 0; + void *in, *out; + hv_vmbus_channel_open_channel* open_msg; + hv_vmbus_channel_msg_info* open_info; + + new_channel->on_channel_callback = pfn_on_channel_callback; + new_channel->channel_callback_context = context; + + /* Allocate the ring buffer */ + out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size), + M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); + KASSERT(out != NULL, + ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!")); + if (out == NULL) + return (ENOMEM); + + in = ((uint8_t *) out + send_ring_buffer_size); + + new_channel->ring_buffer_pages = out; + new_channel->ring_buffer_page_count = (send_ring_buffer_size + + recv_ring_buffer_size) >> PAGE_SHIFT; + + hv_vmbus_ring_buffer_init( + &new_channel->outbound, + out, + send_ring_buffer_size); + + hv_vmbus_ring_buffer_init( + &new_channel->inbound, + in, + recv_ring_buffer_size); + + /** + * Establish the gpadl for the ring buffer + */ + new_channel->ring_buffer_gpadl_handle = 0; + + ret = hv_vmbus_channel_establish_gpadl(new_channel, + new_channel->outbound.ring_buffer, + send_ring_buffer_size + recv_ring_buffer_size, + &new_channel->ring_buffer_gpadl_handle); + + /** + * Create and init the channel open message + */ + open_info = (hv_vmbus_channel_msg_info*) malloc( + sizeof(hv_vmbus_channel_msg_info) + + sizeof(hv_vmbus_channel_open_channel), + M_DEVBUF, + M_NOWAIT); + KASSERT(open_info != NULL, + ("Error VMBUS: malloc failed to allocate Open Channel message!")); + + if (open_info == NULL) + return (ENOMEM); + + sema_init(&open_info->wait_sema, 0, "Open Info Sema"); + + open_msg = (hv_vmbus_channel_open_channel*) open_info->msg; + open_msg->header.message_type = HV_CHANNEL_MESSAGE_OPEN_CHANNEL; + open_msg->open_id = new_channel->offer_msg.child_rel_id; + open_msg->child_rel_id = new_channel->offer_msg.child_rel_id; + open_msg->ring_buffer_gpadl_handle = + new_channel->ring_buffer_gpadl_handle; + open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size + >> PAGE_SHIFT; + open_msg->server_context_area_gpadl_handle = 0; + + if (user_data_len) + memcpy(open_msg->user_data, user_data, user_data_len); + + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + TAILQ_INSERT_TAIL( + &hv_vmbus_g_connection.channel_msg_anchor, + open_info, + msg_list_entry); + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + + ret = hv_vmbus_post_message( + open_msg, sizeof(hv_vmbus_channel_open_channel)); + + if (ret != 0) + goto cleanup; + + ret = sema_timedwait(&open_info->wait_sema, 500); /* KYS 5 seconds */ + + if (ret) + goto cleanup; + + if (open_info->response.open_result.status == 0) { + if(bootverbose) + printf("VMBUS: channel <%p> open success.\n", new_channel); + } else { + if(bootverbose) + printf("Error VMBUS: channel <%p> open failed - %d!\n", + new_channel, open_info->response.open_result.status); + } + + cleanup: + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + TAILQ_REMOVE( + &hv_vmbus_g_connection.channel_msg_anchor, + open_info, + msg_list_entry); + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + sema_destroy(&open_info->wait_sema); + free(open_info, M_DEVBUF); + + return (ret); +} + +/** + * @brief Create a gpadl for the specified buffer + */ +static int +vmbus_channel_create_gpadl_header( + void* contig_buffer, + uint32_t size, /* page-size multiple */ + hv_vmbus_channel_msg_info** msg_info, + uint32_t* message_count) +{ + int i; + int page_count; + unsigned long long pfn; + uint32_t msg_size; + hv_vmbus_channel_gpadl_header* gpa_header; + hv_vmbus_channel_gpadl_body* gpadl_body; + hv_vmbus_channel_msg_info* msg_header; + hv_vmbus_channel_msg_info* msg_body; + + int pfnSum, pfnCount, pfnLeft, pfnCurr, pfnSize; + + page_count = size >> PAGE_SHIFT; + pfn = hv_get_phys_addr(contig_buffer) >> PAGE_SHIFT; + + /*do we need a gpadl body msg */ + pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE + - sizeof(hv_vmbus_channel_gpadl_header) + - sizeof(hv_gpa_range); + pfnCount = pfnSize / sizeof(uint64_t); + + if (page_count > pfnCount) { /* if(we need a gpadl body) */ + /* fill in the header */ + msg_size = sizeof(hv_vmbus_channel_msg_info) + + sizeof(hv_vmbus_channel_gpadl_header) + + sizeof(hv_gpa_range) + + pfnCount * sizeof(uint64_t); + msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO); + KASSERT( + msg_header != NULL, + ("Error VMBUS: malloc failed to allocate Gpadl Message!")); + if (msg_header == NULL) + return (ENOMEM); + + TAILQ_INIT(&msg_header->sub_msg_list_anchor); + msg_header->message_size = msg_size; + + gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg; + gpa_header->range_count = 1; + gpa_header->range_buf_len = sizeof(hv_gpa_range) + + page_count * sizeof(uint64_t); + gpa_header->range[0].byte_offset = 0; + gpa_header->range[0].byte_count = size; + for (i = 0; i < pfnCount; i++) { + gpa_header->range[0].pfn_array[i] = pfn + i; + } + *msg_info = msg_header; + *message_count = 1; + + pfnSum = pfnCount; + pfnLeft = page_count - pfnCount; + + /* + * figure out how many pfns we can fit + */ + pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE + - sizeof(hv_vmbus_channel_gpadl_body); + pfnCount = pfnSize / sizeof(uint64_t); + + /* + * fill in the body + */ + while (pfnLeft) { + if (pfnLeft > pfnCount) { + pfnCurr = pfnCount; + } else { + pfnCurr = pfnLeft; + } + + msg_size = sizeof(hv_vmbus_channel_msg_info) + + sizeof(hv_vmbus_channel_gpadl_body) + + pfnCurr * sizeof(uint64_t); + msg_body = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO); + KASSERT( + msg_body != NULL, + ("Error VMBUS: malloc failed to allocate Gpadl msg_body!")); + if (msg_body == NULL) + return (ENOMEM); + + msg_body->message_size = msg_size; + (*message_count)++; + gpadl_body = + (hv_vmbus_channel_gpadl_body*) msg_body->msg; + /* + * gpadl_body->gpadl = kbuffer; + */ + for (i = 0; i < pfnCurr; i++) { + gpadl_body->pfn[i] = pfn + pfnSum + i; + } + + TAILQ_INSERT_TAIL( + &msg_header->sub_msg_list_anchor, + msg_body, + msg_list_entry); + pfnSum += pfnCurr; + pfnLeft -= pfnCurr; + } + } else { /* else everything fits in a header */ + + msg_size = sizeof(hv_vmbus_channel_msg_info) + + sizeof(hv_vmbus_channel_gpadl_header) + + sizeof(hv_gpa_range) + + page_count * sizeof(uint64_t); + msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO); + KASSERT( + msg_header != NULL, + ("Error VMBUS: malloc failed to allocate Gpadl Message!")); + if (msg_header == NULL) + return (ENOMEM); + + msg_header->message_size = msg_size; + + gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg; + gpa_header->range_count = 1; + gpa_header->range_buf_len = sizeof(hv_gpa_range) + + page_count * sizeof(uint64_t); + gpa_header->range[0].byte_offset = 0; + gpa_header->range[0].byte_count = size; + for (i = 0; i < page_count; i++) { + gpa_header->range[0].pfn_array[i] = pfn + i; + } + + *msg_info = msg_header; + *message_count = 1; + } + + return (0); +} + +/** + * @brief Establish a GPADL for the specified buffer + */ +int +hv_vmbus_channel_establish_gpadl( + hv_vmbus_channel* channel, + void* contig_buffer, + uint32_t size, /* page-size multiple */ + uint32_t* gpadl_handle) + +{ + int ret = 0; + hv_vmbus_channel_gpadl_header* gpadl_msg; + hv_vmbus_channel_gpadl_body* gpadl_body; + hv_vmbus_channel_msg_info* msg_info; + hv_vmbus_channel_msg_info* sub_msg_info; + uint32_t msg_count; + hv_vmbus_channel_msg_info* curr; + uint32_t next_gpadl_handle; + + next_gpadl_handle = hv_vmbus_g_connection.next_gpadl_handle; + atomic_add_int((int*) &hv_vmbus_g_connection.next_gpadl_handle, 1); + + ret = vmbus_channel_create_gpadl_header( + contig_buffer, size, &msg_info, &msg_count); + + if(ret != 0) { /* if(allocation failed) return immediately */ + /* reverse atomic_add_int above */ + atomic_subtract_int((int*) + &hv_vmbus_g_connection.next_gpadl_handle, 1); + return ret; + } + + sema_init(&msg_info->wait_sema, 0, "Open Info Sema"); + gpadl_msg = (hv_vmbus_channel_gpadl_header*) msg_info->msg; + gpadl_msg->header.message_type = HV_CHANNEL_MESSAGEL_GPADL_HEADER; + gpadl_msg->child_rel_id = channel->offer_msg.child_rel_id; + gpadl_msg->gpadl = next_gpadl_handle; + + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + TAILQ_INSERT_TAIL( + &hv_vmbus_g_connection.channel_msg_anchor, + msg_info, + msg_list_entry); + + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + + ret = hv_vmbus_post_message( + gpadl_msg, + msg_info->message_size - + (uint32_t) sizeof(hv_vmbus_channel_msg_info)); + + if (ret != 0) + goto cleanup; + + if (msg_count > 1) { + TAILQ_FOREACH(curr, + &msg_info->sub_msg_list_anchor, msg_list_entry) { + sub_msg_info = curr; + gpadl_body = + (hv_vmbus_channel_gpadl_body*) sub_msg_info->msg; + + gpadl_body->header.message_type = + HV_CHANNEL_MESSAGE_GPADL_BODY; + gpadl_body->gpadl = next_gpadl_handle; + + ret = hv_vmbus_post_message( + gpadl_body, + sub_msg_info->message_size + - (uint32_t) sizeof(hv_vmbus_channel_msg_info)); + /* if (the post message failed) give up and clean up */ + if(ret != 0) + goto cleanup; + } + } + + ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds*/ + if (ret != 0) + goto cleanup; + + *gpadl_handle = gpadl_msg->gpadl; + +cleanup: + + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor, + msg_info, msg_list_entry); + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + + sema_destroy(&msg_info->wait_sema); + free(msg_info, M_DEVBUF); + + return (ret); +} + +/** + * @brief Teardown the specified GPADL handle + */ +int +hv_vmbus_channel_teardown_gpdal( + hv_vmbus_channel* channel, + uint32_t gpadl_handle) +{ + int ret = 0; + hv_vmbus_channel_gpadl_teardown* msg; + hv_vmbus_channel_msg_info* info; + + info = (hv_vmbus_channel_msg_info *) + malloc( sizeof(hv_vmbus_channel_msg_info) + + sizeof(hv_vmbus_channel_gpadl_teardown), + M_DEVBUF, M_NOWAIT); + KASSERT(info != NULL, + ("Error VMBUS: malloc failed to allocate Gpadl Teardown Msg!")); + if (info == NULL) { + ret = ENOMEM; + goto cleanup; + } + + sema_init(&info->wait_sema, 0, "Open Info Sema"); + + msg = (hv_vmbus_channel_gpadl_teardown*) info->msg; + + msg->header.message_type = HV_CHANNEL_MESSAGE_GPADL_TEARDOWN; + msg->child_rel_id = channel->offer_msg.child_rel_id; + msg->gpadl = gpadl_handle; + + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_msg_anchor, + info, msg_list_entry); + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + + ret = hv_vmbus_post_message(msg, + sizeof(hv_vmbus_channel_gpadl_teardown)); + if (ret != 0) + goto cleanup; + + ret = sema_timedwait(&info->wait_sema, 500); /* KYS 5 seconds */ + +cleanup: + /* + * Received a torndown response + */ + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor, + info, msg_list_entry); + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + sema_destroy(&info->wait_sema); + free(info, M_DEVBUF); + + return (ret); +} + +/** + * @brief Close the specified channel + */ +void +hv_vmbus_channel_close(hv_vmbus_channel *channel) +{ + int ret = 0; + hv_vmbus_channel_close_channel* msg; + hv_vmbus_channel_msg_info* info; + + mtx_lock(&channel->inbound_lock); + channel->on_channel_callback = NULL; + mtx_unlock(&channel->inbound_lock); + + /** + * Send a closing message + */ + info = (hv_vmbus_channel_msg_info *) + malloc( sizeof(hv_vmbus_channel_msg_info) + + sizeof(hv_vmbus_channel_close_channel), + M_DEVBUF, M_NOWAIT); + KASSERT(info != NULL, ("VMBUS: malloc failed hv_vmbus_channel_close!")); + if(info == NULL) + return; + + msg = (hv_vmbus_channel_close_channel*) info->msg; + msg->header.message_type = HV_CHANNEL_MESSAGE_CLOSE_CHANNEL; + msg->child_rel_id = channel->offer_msg.child_rel_id; + + ret = hv_vmbus_post_message( + msg, sizeof(hv_vmbus_channel_close_channel)); + + /* Tear down the gpadl for the channel's ring buffer */ + if (channel->ring_buffer_gpadl_handle) { + hv_vmbus_channel_teardown_gpdal(channel, + channel->ring_buffer_gpadl_handle); + } + + /* TODO: Send a msg to release the childRelId */ + + /* cleanup the ring buffers for this channel */ + hv_ring_buffer_cleanup(&channel->outbound); + hv_ring_buffer_cleanup(&channel->inbound); + + contigfree( + channel->ring_buffer_pages, + channel->ring_buffer_page_count, + M_DEVBUF); + + free(info, M_DEVBUF); + + /* + * If we are closing the channel during an error path in + * opening the channel, don't free the channel + * since the caller will free the channel + */ + if (channel->state == HV_CHANNEL_OPEN_STATE) { + mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); + TAILQ_REMOVE( + &hv_vmbus_g_connection.channel_anchor, + channel, + list_entry); + mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); + + hv_vmbus_free_vmbus_channel(channel); + } + +} + +/** + * @brief Send the specified buffer on the given channel + */ +int +hv_vmbus_channel_send_packet( + hv_vmbus_channel* channel, + void* buffer, + uint32_t buffer_len, + uint64_t request_id, + hv_vmbus_packet_type type, + uint32_t flags) +{ + int ret = 0; + hv_vm_packet_descriptor desc; + uint32_t packet_len; + uint64_t aligned_data; + uint32_t packet_len_aligned; + hv_vmbus_sg_buffer_list buffer_list[3]; + + packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len; + packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t)); + aligned_data = 0; + + /* Setup the descriptor */ + desc.type = type; /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND; */ + desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */ + /* in 8-bytes granularity */ + desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3; + desc.length8 = (uint16_t) (packet_len_aligned >> 3); + desc.transaction_id = request_id; + + buffer_list[0].data = &desc; + buffer_list[0].length = sizeof(hv_vm_packet_descriptor); + + buffer_list[1].data = buffer; + buffer_list[1].length = buffer_len; + + buffer_list[2].data = &aligned_data; + buffer_list[2].length = packet_len_aligned - packet_len; + + ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3); + + /* TODO: We should determine if this is optional */ + if (ret == 0 + && !hv_vmbus_get_ring_buffer_interrupt_mask( + &channel->outbound)) { + vmbus_channel_set_event(channel); + } + + return (ret); +} + +/** + * @brief Send a range of single-page buffer packets using + * a GPADL Direct packet type + */ +int +hv_vmbus_channel_send_packet_pagebuffer( + hv_vmbus_channel* channel, + hv_vmbus_page_buffer page_buffers[], + uint32_t page_count, + void* buffer, + uint32_t buffer_len, + uint64_t request_id) +{ + + int ret = 0; + int i = 0; + uint32_t packet_len; + uint32_t packetLen_aligned; + hv_vmbus_sg_buffer_list buffer_list[3]; + hv_vmbus_channel_packet_page_buffer desc; + uint32_t descSize; + uint64_t alignedData = 0; + + if (page_count > HV_MAX_PAGE_BUFFER_COUNT) + return (EINVAL); + + /* + * Adjust the size down since hv_vmbus_channel_packet_page_buffer + * is the largest size we support + */ + descSize = sizeof(hv_vmbus_channel_packet_page_buffer) - + ((HV_MAX_PAGE_BUFFER_COUNT - page_count) * + sizeof(hv_vmbus_page_buffer)); + packet_len = descSize + buffer_len; + packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t)); + + /* Setup the descriptor */ + desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT; + desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + desc.data_offset8 = descSize >> 3; /* in 8-bytes granularity */ + desc.length8 = (uint16_t) (packetLen_aligned >> 3); + desc.transaction_id = request_id; + desc.range_count = page_count; + + for (i = 0; i < page_count; i++) { + desc.range[i].length = page_buffers[i].length; + desc.range[i].offset = page_buffers[i].offset; + desc.range[i].pfn = page_buffers[i].pfn; + } + + buffer_list[0].data = &desc; + buffer_list[0].length = descSize; + + buffer_list[1].data = buffer; + buffer_list[1].length = buffer_len; + + buffer_list[2].data = &alignedData; + buffer_list[2].length = packetLen_aligned - packet_len; + + ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3); + + /* TODO: We should determine if this is optional */ + if (ret == 0 && + !hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) { + vmbus_channel_set_event(channel); + } + + return (ret); +} + +/** + * @brief Send a multi-page buffer packet using a GPADL Direct packet type + */ +int +hv_vmbus_channel_send_packet_multipagebuffer( + hv_vmbus_channel* channel, + hv_vmbus_multipage_buffer* multi_page_buffer, + void* buffer, + uint32_t buffer_len, + uint64_t request_id) +{ + + int ret = 0; + uint32_t desc_size; + uint32_t packet_len; + uint32_t packet_len_aligned; + uint32_t pfn_count; + uint64_t aligned_data = 0; + hv_vmbus_sg_buffer_list buffer_list[3]; + hv_vmbus_channel_packet_multipage_buffer desc; + + pfn_count = + HV_NUM_PAGES_SPANNED( + multi_page_buffer->offset, + multi_page_buffer->length); + + if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT)) + return (EINVAL); + /* + * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer + * is the largest size we support + */ + desc_size = + sizeof(hv_vmbus_channel_packet_multipage_buffer) - + ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) * + sizeof(uint64_t)); + packet_len = desc_size + buffer_len; + packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t)); + + /* + * Setup the descriptor + */ + desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT; + desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */ + desc.length8 = (uint16_t) (packet_len_aligned >> 3); + desc.transaction_id = request_id; + desc.range_count = 1; + + desc.range.length = multi_page_buffer->length; + desc.range.offset = multi_page_buffer->offset; + + memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array, + pfn_count * sizeof(uint64_t)); + + buffer_list[0].data = &desc; + buffer_list[0].length = desc_size; + + buffer_list[1].data = buffer; + buffer_list[1].length = buffer_len; + + buffer_list[2].data = &aligned_data; + buffer_list[2].length = packet_len_aligned - packet_len; + + ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3); + + /* TODO: We should determine if this is optional */ + if (ret == 0 && + !hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) { + vmbus_channel_set_event(channel); + } + + return (ret); +} + +/** + * @brief Retrieve the user packet on the specified channel + */ +int +hv_vmbus_channel_recv_packet( + hv_vmbus_channel* channel, + void* Buffer, + uint32_t buffer_len, + uint32_t* buffer_actual_len, + uint64_t* request_id) +{ + int ret; + uint32_t user_len; + uint32_t packet_len; + hv_vm_packet_descriptor desc; + + *buffer_actual_len = 0; + *request_id = 0; + + ret = hv_ring_buffer_peek(&channel->inbound, &desc, + sizeof(hv_vm_packet_descriptor)); + if (ret != 0) + return (0); + + packet_len = desc.length8 << 3; + user_len = packet_len - (desc.data_offset8 << 3); + + *buffer_actual_len = user_len; + + if (user_len > buffer_len) + return (EINVAL); + + *request_id = desc.transaction_id; + + /* Copy over the packet to the user buffer */ + ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len, + (desc.data_offset8 << 3)); + + return (0); +} + +/** + * @brief Retrieve the raw packet on the specified channel + */ +int +hv_vmbus_channel_recv_packet_raw( + hv_vmbus_channel* channel, + void* buffer, + uint32_t buffer_len, + uint32_t* buffer_actual_len, + uint64_t* request_id) +{ + int ret; + uint32_t packetLen; + uint32_t userLen; + hv_vm_packet_descriptor desc; + + *buffer_actual_len = 0; + *request_id = 0; + + ret = hv_ring_buffer_peek( + &channel->inbound, &desc, + sizeof(hv_vm_packet_descriptor)); + + if (ret != 0) + return (0); + + packetLen = desc.length8 << 3; + userLen = packetLen - (desc.data_offset8 << 3); + + *buffer_actual_len = packetLen; + + if (packetLen > buffer_len) + return (ENOBUFS); + + *request_id = desc.transaction_id; + + /* Copy over the entire packet to the user buffer */ + ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0); + + return (0); +} diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c new file mode 100644 index 0000000..011e305 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c @@ -0,0 +1,680 @@ +/*- + * Copyright (c) 2009-2012 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/mbuf.h> + +#include "hv_vmbus_priv.h" + +typedef void (*hv_pfn_channel_msg_handler)(hv_vmbus_channel_msg_header* msg); + +typedef struct hv_vmbus_channel_msg_table_entry { + hv_vmbus_channel_msg_type messageType; + hv_pfn_channel_msg_handler messageHandler; +} hv_vmbus_channel_msg_table_entry; + +/* + * Internal functions + */ + +static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr); +static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr); +static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr); +static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr); +static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr); +static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr); +static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr); +static void vmbus_channel_process_offer(void *context); + +/** + * Channel message dispatch table + */ +hv_vmbus_channel_msg_table_entry + g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = { + { HV_CHANNEL_MESSAGE_INVALID, NULL }, + { HV_CHANNEL_MESSAGE_OFFER_CHANNEL, vmbus_channel_on_offer }, + { HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER, + vmbus_channel_on_offer_rescind }, + { HV_CHANNEL_MESSAGE_REQUEST_OFFERS, NULL }, + { HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED, + vmbus_channel_on_offers_delivered }, + { HV_CHANNEL_MESSAGE_OPEN_CHANNEL, NULL }, + { HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT, + vmbus_channel_on_open_result }, + { HV_CHANNEL_MESSAGE_CLOSE_CHANNEL, NULL }, + { HV_CHANNEL_MESSAGEL_GPADL_HEADER, NULL }, + { HV_CHANNEL_MESSAGE_GPADL_BODY, NULL }, + { HV_CHANNEL_MESSAGE_GPADL_CREATED, + vmbus_channel_on_gpadl_created }, + { HV_CHANNEL_MESSAGE_GPADL_TEARDOWN, NULL }, + { HV_CHANNEL_MESSAGE_GPADL_TORNDOWN, + vmbus_channel_on_gpadl_torndown }, + { HV_CHANNEL_MESSAGE_REL_ID_RELEASED, NULL }, + { HV_CHANNEL_MESSAGE_INITIATED_CONTACT, NULL }, + { HV_CHANNEL_MESSAGE_VERSION_RESPONSE, + vmbus_channel_on_version_response }, + { HV_CHANNEL_MESSAGE_UNLOAD, NULL } +}; + + +/** + * Implementation of the work abstraction. + */ +static void +work_item_callback(void *work, int pending) +{ + struct hv_work_item *w = (struct hv_work_item *)work; + + /* + * Serialize work execution. + */ + if (w->wq->work_sema != NULL) { + sema_wait(w->wq->work_sema); + } + + w->callback(w->context); + + if (w->wq->work_sema != NULL) { + sema_post(w->wq->work_sema); + } + + free(w, M_DEVBUF); +} + +struct hv_work_queue* +hv_work_queue_create(char* name) +{ + static unsigned int qid = 0; + char qname[64]; + int pri; + struct hv_work_queue* wq; + + wq = malloc(sizeof(struct hv_work_queue), M_DEVBUF, M_NOWAIT | M_ZERO); + KASSERT(wq != NULL, ("Error VMBUS: Failed to allocate work_queue\n")); + if (wq == NULL) + return (NULL); + + /* + * We use work abstraction to handle messages + * coming from the host and these are typically offers. + * Some FreeBsd drivers appear to have a concurrency issue + * where probe/attach needs to be serialized. We ensure that + * by having only one thread process work elements in a + * specific queue by serializing work execution. + * + */ + if (strcmp(name, "vmbusQ") == 0) { + pri = PI_DISK; + } else { /* control */ + pri = PI_NET; + /* + * Initialize semaphore for this queue by pointing + * to the globale semaphore used for synchronizing all + * control messages. + */ + wq->work_sema = &hv_vmbus_g_connection.control_sema; + } + + sprintf(qname, "hv_%s_%u", name, qid); + + /* + * Fixme: FreeBSD 8.2 has a different prototype for + * taskqueue_create(), and for certain other taskqueue functions. + * We need to research the implications of these changes. + * Fixme: Not sure when the changes were introduced. + */ + wq->queue = taskqueue_create(qname, M_NOWAIT, taskqueue_thread_enqueue, + &wq->queue + #if __FreeBSD_version < 800000 + , &wq->proc + #endif + ); + + if (wq->queue == NULL) { + free(wq, M_DEVBUF); + return (NULL); + } + + if (taskqueue_start_threads(&wq->queue, 1, pri, "%s taskq", qname)) { + taskqueue_free(wq->queue); + free(wq, M_DEVBUF); + return (NULL); + } + + qid++; + + return (wq); +} + +void +hv_work_queue_close(struct hv_work_queue *wq) +{ + /* + * KYS: Need to drain the taskqueue + * before we close the hv_work_queue. + */ + /*KYS: taskqueue_drain(wq->tq, ); */ + taskqueue_free(wq->queue); + free(wq, M_DEVBUF); +} + +/** + * @brief Create work item + */ +int +hv_queue_work_item( + struct hv_work_queue *wq, + void (*callback)(void *), void *context) +{ + struct hv_work_item *w = malloc(sizeof(struct hv_work_item), + M_DEVBUF, M_NOWAIT | M_ZERO); + KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n")); + if (w == NULL) + return (ENOMEM); + + w->callback = callback; + w->context = context; + w->wq = wq; + + TASK_INIT(&w->work, 0, work_item_callback, w); + + return (taskqueue_enqueue(wq->queue, &w->work)); +} + +/** + * @brief Rescind the offer by initiating a device removal + */ +static void +vmbus_channel_process_rescind_offer(void *context) +{ + hv_vmbus_channel* channel = (hv_vmbus_channel*) context; + hv_vmbus_child_device_unregister(channel->device); +} + +/** + * @brief Allocate and initialize a vmbus channel object + */ +hv_vmbus_channel* +hv_vmbus_allocate_channel(void) +{ + hv_vmbus_channel* channel; + + channel = (hv_vmbus_channel*) malloc( + sizeof(hv_vmbus_channel), + M_DEVBUF, + M_NOWAIT | M_ZERO); + KASSERT(channel != NULL, ("Error VMBUS: Failed to allocate channel!")); + if (channel == NULL) + return (NULL); + + mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF); + + channel->control_work_queue = hv_work_queue_create("control"); + + if (channel->control_work_queue == NULL) { + mtx_destroy(&channel->inbound_lock); + free(channel, M_DEVBUF); + return (NULL); + } + + return (channel); +} + +/** + * @brief Release the vmbus channel object itself + */ +static inline void +ReleaseVmbusChannel(void *context) +{ + hv_vmbus_channel* channel = (hv_vmbus_channel*) context; + hv_work_queue_close(channel->control_work_queue); + free(channel, M_DEVBUF); +} + +/** + * @brief Release the resources used by the vmbus channel object + */ +void +hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel) +{ + mtx_destroy(&channel->inbound_lock); + /* + * We have to release the channel's workqueue/thread in + * the vmbus's workqueue/thread context + * ie we can't destroy ourselves + */ + hv_queue_work_item(hv_vmbus_g_connection.work_queue, + ReleaseVmbusChannel, (void *) channel); +} + +/** + * @brief Process the offer by creating a channel/device + * associated with this offer + */ +static void +vmbus_channel_process_offer(void *context) +{ + int ret; + hv_vmbus_channel* new_channel; + boolean_t f_new; + hv_vmbus_channel* channel; + + new_channel = (hv_vmbus_channel*) context; + f_new = TRUE; + channel = NULL; + + /* + * Make sure this is a new offer + */ + mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); + + TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor, + list_entry) + { + if (!memcmp( + &channel->offer_msg.offer.interface_type, + &new_channel->offer_msg.offer.interface_type, + sizeof(hv_guid)) + && !memcmp( + &channel->offer_msg.offer.interface_instance, + &new_channel->offer_msg.offer.interface_instance, + sizeof(hv_guid))) { + f_new = FALSE; + break; + } + } + + if (f_new) { + /* Insert at tail */ + TAILQ_INSERT_TAIL( + &hv_vmbus_g_connection.channel_anchor, + new_channel, + list_entry); + } + mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); + + if (!f_new) { + hv_vmbus_free_vmbus_channel(new_channel); + return; + } + + /* + * Start the process of binding this offer to the driver + * (We need to set the device field before calling + * hv_vmbus_child_device_add()) + */ + new_channel->device = hv_vmbus_child_device_create( + new_channel->offer_msg.offer.interface_type, + new_channel->offer_msg.offer.interface_instance, new_channel); + + /* + * TODO - the HV_CHANNEL_OPEN_STATE flag should not be set below + * but in the "open" channel request. The ret != 0 logic below + * doesn't take into account that a channel + * may have been opened successfully + */ + + /* + * Add the new device to the bus. This will kick off device-driver + * binding which eventually invokes the device driver's AddDevice() + * method. + */ + ret = hv_vmbus_child_device_register(new_channel->device); + if (ret != 0) { + mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); + TAILQ_REMOVE( + &hv_vmbus_g_connection.channel_anchor, + new_channel, + list_entry); + mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); + hv_vmbus_free_vmbus_channel(new_channel); + } else { + /* + * This state is used to indicate a successful open + * so that when we do close the channel normally, + * we can clean up properly + */ + new_channel->state = HV_CHANNEL_OPEN_STATE; + + } +} + +/** + * @brief Handler for channel offers from Hyper-V/Azure + * + * Handler for channel offers from vmbus in parent partition. We ignore + * all offers except network and storage offers. For each network and storage + * offers, we create a channel object and queue a work item to the channel + * object to process the offer synchronously + */ +static void +vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr) +{ + hv_vmbus_channel_offer_channel* offer; + hv_vmbus_channel* new_channel; + + offer = (hv_vmbus_channel_offer_channel*) hdr; + + hv_guid *guidType; + hv_guid *guidInstance; + + guidType = &offer->offer.interface_type; + guidInstance = &offer->offer.interface_instance; + + /* Allocate the channel object and save this offer */ + new_channel = hv_vmbus_allocate_channel(); + if (new_channel == NULL) + return; + + memcpy(&new_channel->offer_msg, offer, + sizeof(hv_vmbus_channel_offer_channel)); + new_channel->monitor_group = (uint8_t) offer->monitor_id / 32; + new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32; + + /* TODO: Make sure the offer comes from our parent partition */ + hv_queue_work_item( + new_channel->control_work_queue, + vmbus_channel_process_offer, + new_channel); +} + +/** + * @brief Rescind offer handler. + * + * We queue a work item to process this offer + * synchronously + */ +static void +vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr) +{ + hv_vmbus_channel_rescind_offer* rescind; + hv_vmbus_channel* channel; + + rescind = (hv_vmbus_channel_rescind_offer*) hdr; + + channel = hv_vmbus_get_channel_from_rel_id(rescind->child_rel_id); + if (channel == NULL) + return; + + hv_queue_work_item(channel->control_work_queue, + vmbus_channel_process_rescind_offer, channel); +} + +/** + * + * @brief Invoked when all offers have been delivered. + */ +static void +vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr) +{ +} + +/** + * @brief Open result handler. + * + * This is invoked when we received a response + * to our channel open request. Find the matching request, copy the + * response and signal the requesting thread. + */ +static void +vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr) +{ + hv_vmbus_channel_open_result* result; + hv_vmbus_channel_msg_info* msg_info; + hv_vmbus_channel_msg_header* requestHeader; + hv_vmbus_channel_open_channel* openMsg; + + result = (hv_vmbus_channel_open_result*) hdr; + + /* + * Find the open msg, copy the result and signal/unblock the wait event + */ + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + + TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, + msg_list_entry) { + requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; + + if (requestHeader->message_type == + HV_CHANNEL_MESSAGE_OPEN_CHANNEL) { + openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg; + if (openMsg->child_rel_id == result->child_rel_id + && openMsg->open_id == result->open_id) { + memcpy(&msg_info->response.open_result, result, + sizeof(hv_vmbus_channel_open_result)); + sema_post(&msg_info->wait_sema); + break; + } + } + } + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + +} + +/** + * @brief GPADL created handler. + * + * This is invoked when we received a response + * to our gpadl create request. Find the matching request, copy the + * response and signal the requesting thread. + */ +static void +vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr) +{ + hv_vmbus_channel_gpadl_created* gpadl_created; + hv_vmbus_channel_msg_info* msg_info; + hv_vmbus_channel_msg_header* request_header; + hv_vmbus_channel_gpadl_header* gpadl_header; + + gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr; + + /* Find the establish msg, copy the result and signal/unblock + * the wait event + */ + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, + msg_list_entry) { + request_header = (hv_vmbus_channel_msg_header*) msg_info->msg; + if (request_header->message_type == + HV_CHANNEL_MESSAGEL_GPADL_HEADER) { + gpadl_header = + (hv_vmbus_channel_gpadl_header*) request_header; + + if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id) + && (gpadl_created->gpadl == gpadl_header->gpadl)) { + memcpy(&msg_info->response.gpadl_created, + gpadl_created, + sizeof(hv_vmbus_channel_gpadl_created)); + sema_post(&msg_info->wait_sema); + break; + } + } + } + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); +} + +/** + * @brief GPADL torndown handler. + * + * This is invoked when we received a respons + * to our gpadl teardown request. Find the matching request, copy the + * response and signal the requesting thread + */ +static void +vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr) +{ + hv_vmbus_channel_gpadl_torndown* gpadl_torndown; + hv_vmbus_channel_msg_info* msg_info; + hv_vmbus_channel_msg_header* requestHeader; + hv_vmbus_channel_gpadl_teardown* gpadlTeardown; + + gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr; + + /* + * Find the open msg, copy the result and signal/unblock the + * wait event. + */ + + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + + TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, + msg_list_entry) { + requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; + + if (requestHeader->message_type + == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) { + gpadlTeardown = + (hv_vmbus_channel_gpadl_teardown*) requestHeader; + + if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) { + memcpy(&msg_info->response.gpadl_torndown, + gpadl_torndown, + sizeof(hv_vmbus_channel_gpadl_torndown)); + sema_post(&msg_info->wait_sema); + break; + } + } + } + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); +} + +/** + * @brief Version response handler. + * + * This is invoked when we received a response + * to our initiate contact request. Find the matching request, copy th + * response and signal the requesting thread. + */ +static void +vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr) +{ + hv_vmbus_channel_msg_info* msg_info; + hv_vmbus_channel_msg_header* requestHeader; + hv_vmbus_channel_initiate_contact* initiate; + hv_vmbus_channel_version_response* versionResponse; + + versionResponse = (hv_vmbus_channel_version_response*)hdr; + + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, + msg_list_entry) { + requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; + if (requestHeader->message_type + == HV_CHANNEL_MESSAGE_INITIATED_CONTACT) { + initiate = + (hv_vmbus_channel_initiate_contact*) requestHeader; + memcpy(&msg_info->response.version_response, + versionResponse, + sizeof(hv_vmbus_channel_version_response)); + sema_post(&msg_info->wait_sema); + } + } + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + +} + +/** + * @brief Handler for channel protocol messages. + * + * This is invoked in the vmbus worker thread context. + */ +void +hv_vmbus_on_channel_message(void *context) +{ + hv_vmbus_message* msg; + hv_vmbus_channel_msg_header* hdr; + int size; + + msg = (hv_vmbus_message*) context; + hdr = (hv_vmbus_channel_msg_header*) msg->u.payload; + size = msg->header.payload_size; + + if (hdr->message_type >= HV_CHANNEL_MESSAGE_COUNT) { + free(msg, M_DEVBUF); + return; + } + + if (g_channel_message_table[hdr->message_type].messageHandler) { + g_channel_message_table[hdr->message_type].messageHandler(hdr); + } + + /* Free the msg that was allocated in VmbusOnMsgDPC() */ + free(msg, M_DEVBUF); +} + +/** + * @brief Send a request to get all our pending offers. + */ +int +hv_vmbus_request_channel_offers(void) +{ + int ret; + hv_vmbus_channel_msg_header* msg; + hv_vmbus_channel_msg_info* msg_info; + + msg_info = (hv_vmbus_channel_msg_info *) + malloc(sizeof(hv_vmbus_channel_msg_info) + + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT); + + if (msg_info == NULL) { + if(bootverbose) + printf("Error VMBUS: malloc failed for Request Offers\n"); + return (ENOMEM); + } + + msg = (hv_vmbus_channel_msg_header*) msg_info->msg; + msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS; + + ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header)); + + if (msg_info) + free(msg_info, M_DEVBUF); + + return (ret); +} + +/** + * @brief Release channels that are unattached/unconnected (i.e., no drivers associated) + */ +void +hv_vmbus_release_unattached_channels(void) +{ + hv_vmbus_channel *channel; + + mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); + + while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) { + channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor); + TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor, + channel, list_entry); + + hv_vmbus_child_device_unregister(channel->device); + hv_vmbus_free_vmbus_channel(channel); + } + mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); +} diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c new file mode 100644 index 0000000..c8e0b48 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hv_connection.c @@ -0,0 +1,431 @@ +/*- + * Copyright (c) 2009-2012 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/systm.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <machine/bus.h> +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> + +#include "hv_vmbus_priv.h" + +/* + * Globals + */ +hv_vmbus_connection hv_vmbus_g_connection = + { .connect_state = HV_DISCONNECTED, + .next_gpadl_handle = 0xE1E10, }; + +/** + * Send a connect request on the partition service connection + */ +int +hv_vmbus_connect(void) { + int ret = 0; + hv_vmbus_channel_msg_info* msg_info = NULL; + hv_vmbus_channel_initiate_contact* msg; + + /** + * Make sure we are not connecting or connected + */ + if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) { + return (-1); + } + + /** + * Initialize the vmbus connection + */ + hv_vmbus_g_connection.connect_state = HV_CONNECTING; + hv_vmbus_g_connection.work_queue = hv_work_queue_create("vmbusQ"); + sema_init(&hv_vmbus_g_connection.control_sema, 1, "control_sema"); + + TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor); + mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg", + NULL, MTX_SPIN); + + TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor); + mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel", + NULL, MTX_SPIN); + + /** + * Setup the vmbus event connection for channel interrupt abstraction + * stuff + */ + hv_vmbus_g_connection.interrupt_page = contigmalloc( + PAGE_SIZE, M_DEVBUF, + M_NOWAIT | M_ZERO, 0UL, + BUS_SPACE_MAXADDR, + PAGE_SIZE, 0); + KASSERT(hv_vmbus_g_connection.interrupt_page != NULL, + ("Error VMBUS: malloc failed to allocate Channel" + " Request Event message!")); + if (hv_vmbus_g_connection.interrupt_page == NULL) { + ret = ENOMEM; + goto cleanup; + } + + hv_vmbus_g_connection.recv_interrupt_page = + hv_vmbus_g_connection.interrupt_page; + + hv_vmbus_g_connection.send_interrupt_page = + ((uint8_t *) hv_vmbus_g_connection.interrupt_page + + (PAGE_SIZE >> 1)); + + /** + * Set up the monitor notification facility. The 1st page for + * parent->child and the 2nd page for child->parent + */ + hv_vmbus_g_connection.monitor_pages = contigmalloc( + 2 * PAGE_SIZE, + M_DEVBUF, + M_NOWAIT | M_ZERO, + 0UL, + BUS_SPACE_MAXADDR, + PAGE_SIZE, + 0); + KASSERT(hv_vmbus_g_connection.monitor_pages != NULL, + ("Error VMBUS: malloc failed to allocate Monitor Pages!")); + if (hv_vmbus_g_connection.monitor_pages == NULL) { + ret = ENOMEM; + goto cleanup; + } + + msg_info = (hv_vmbus_channel_msg_info*) + malloc(sizeof(hv_vmbus_channel_msg_info) + + sizeof(hv_vmbus_channel_initiate_contact), + M_DEVBUF, M_NOWAIT | M_ZERO); + KASSERT(msg_info != NULL, + ("Error VMBUS: malloc failed for Initiate Contact message!")); + if (msg_info == NULL) { + ret = ENOMEM; + goto cleanup; + } + + sema_init(&msg_info->wait_sema, 0, "Msg Info Sema"); + msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg; + + msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT; + msg->vmbus_version_requested = HV_VMBUS_REVISION_NUMBER; + + msg->interrupt_page = hv_get_phys_addr( + hv_vmbus_g_connection.interrupt_page); + + msg->monitor_page_1 = hv_get_phys_addr( + hv_vmbus_g_connection.monitor_pages); + + msg->monitor_page_2 = + hv_get_phys_addr( + ((uint8_t *) hv_vmbus_g_connection.monitor_pages + + PAGE_SIZE)); + + /** + * Add to list before we send the request since we may receive the + * response before returning from this routine + */ + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + + TAILQ_INSERT_TAIL( + &hv_vmbus_g_connection.channel_msg_anchor, + msg_info, + msg_list_entry); + + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + + ret = hv_vmbus_post_message( + msg, + sizeof(hv_vmbus_channel_initiate_contact)); + + if (ret != 0) { + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + TAILQ_REMOVE( + &hv_vmbus_g_connection.channel_msg_anchor, + msg_info, + msg_list_entry); + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + goto cleanup; + } + + /** + * Wait for the connection response + */ + ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */ + + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + TAILQ_REMOVE( + &hv_vmbus_g_connection.channel_msg_anchor, + msg_info, + msg_list_entry); + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + + /** + * Check if successful + */ + if (msg_info->response.version_response.version_supported) { + hv_vmbus_g_connection.connect_state = HV_CONNECTED; + } else { + ret = ECONNREFUSED; + goto cleanup; + } + + sema_destroy(&msg_info->wait_sema); + free(msg_info, M_DEVBUF); + + return (0); + + /* + * Cleanup after failure! + */ + cleanup: + + hv_vmbus_g_connection.connect_state = HV_DISCONNECTED; + + hv_work_queue_close(hv_vmbus_g_connection.work_queue); + sema_destroy(&hv_vmbus_g_connection.control_sema); + mtx_destroy(&hv_vmbus_g_connection.channel_lock); + mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock); + + if (hv_vmbus_g_connection.interrupt_page != NULL) { + contigfree( + hv_vmbus_g_connection.interrupt_page, + PAGE_SIZE, + M_DEVBUF); + hv_vmbus_g_connection.interrupt_page = NULL; + } + + if (hv_vmbus_g_connection.monitor_pages != NULL) { + contigfree( + hv_vmbus_g_connection.monitor_pages, + 2 * PAGE_SIZE, + M_DEVBUF); + hv_vmbus_g_connection.monitor_pages = NULL; + } + + if (msg_info) { + sema_destroy(&msg_info->wait_sema); + free(msg_info, M_DEVBUF); + } + + return (ret); +} + +/** + * Send a disconnect request on the partition service connection + */ +int +hv_vmbus_disconnect(void) { + int ret = 0; + hv_vmbus_channel_unload* msg; + + msg = malloc(sizeof(hv_vmbus_channel_unload), + M_DEVBUF, M_NOWAIT | M_ZERO); + KASSERT(msg != NULL, + ("Error VMBUS: malloc failed to allocate Channel Unload Msg!")); + if (msg == NULL) + return (ENOMEM); + + msg->message_type = HV_CHANNEL_MESSAGE_UNLOAD; + + ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_unload)); + + + contigfree(hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF); + + mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock); + + hv_work_queue_close(hv_vmbus_g_connection.work_queue); + sema_destroy(&hv_vmbus_g_connection.control_sema); + + hv_vmbus_g_connection.connect_state = HV_DISCONNECTED; + + free(msg, M_DEVBUF); + + return (ret); +} + +/** + * Get the channel object given its child relative id (ie channel id) + */ +hv_vmbus_channel* +hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) { + + hv_vmbus_channel* channel; + hv_vmbus_channel* foundChannel = NULL; + + /* + * TODO: + * Consider optimization where relids are stored in a fixed size array + * and channels are accessed without the need to take this lock or search + * the list. + */ + mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); + TAILQ_FOREACH(channel, + &hv_vmbus_g_connection.channel_anchor, list_entry) { + + if (channel->offer_msg.child_rel_id == rel_id) { + foundChannel = channel; + break; + } + } + mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); + + return (foundChannel); +} + +/** + * Process a channel event notification + */ +static void +VmbusProcessChannelEvent(uint32_t relid) +{ + hv_vmbus_channel* channel; + + /** + * Find the channel based on this relid and invokes + * the channel callback to process the event + */ + + channel = hv_vmbus_get_channel_from_rel_id(relid); + + if (channel == NULL) { + return; + } + /** + * To deal with the race condition where we might + * receive a packet while the relevant driver is + * being unloaded, dispatch the callback while + * holding the channel lock. The unloading driver + * will acquire the same channel lock to set the + * callback to NULL. This closes the window. + */ + + mtx_lock(&channel->inbound_lock); + if (channel->on_channel_callback != NULL) { + channel->on_channel_callback(channel->channel_callback_context); + } + mtx_unlock(&channel->inbound_lock); +} + +/** + * Handler for events + */ +void +hv_vmbus_on_events(void *arg) +{ + int dword; + int bit; + int rel_id; + int maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5; + /* int maxdword = PAGE_SIZE >> 3; */ + + /* + * receive size is 1/2 page and divide that by 4 bytes + */ + + uint32_t* recv_interrupt_page = + hv_vmbus_g_connection.recv_interrupt_page; + + /* + * Check events + */ + if (recv_interrupt_page != NULL) { + for (dword = 0; dword < maxdword; dword++) { + if (recv_interrupt_page[dword]) { + for (bit = 0; bit < 32; bit++) { + if (synch_test_and_clear_bit(bit, + (uint32_t *) &recv_interrupt_page[dword])) { + rel_id = (dword << 5) + bit; + if (rel_id == 0) { + /* + * Special case - + * vmbus channel protocol msg. + */ + continue; + } else { + VmbusProcessChannelEvent(rel_id); + + } + } + } + } + } + } + + return; +} + +/** + * Send a msg on the vmbus's message connection + */ +int hv_vmbus_post_message(void *buffer, size_t bufferLen) { + int ret = 0; + hv_vmbus_connection_id connId; + unsigned retries = 0; + + /* NetScaler delays from previous code were consolidated here */ + static int delayAmount[] = {100, 100, 100, 500, 500, 5000, 5000, 5000}; + + /* for(each entry in delayAmount) try to post message, + * delay a little bit before retrying + */ + for (retries = 0; + retries < sizeof(delayAmount)/sizeof(delayAmount[0]); retries++) { + connId.as_uint32_t = 0; + connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID; + ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer, bufferLen); + if (ret != HV_STATUS_INSUFFICIENT_BUFFERS) + break; + /* TODO: KYS We should use a blocking wait call */ + DELAY(delayAmount[retries]); + } + + KASSERT(ret == 0, ("Error VMBUS: Message Post Failed\n")); + + return (ret); +} + +/** + * Send an event notification to the parent + */ +int +hv_vmbus_set_event(uint32_t child_rel_id) { + int ret = 0; + + /* Each uint32_t represents 32 channels */ + + synch_set_bit(child_rel_id & 31, + (((uint32_t *)hv_vmbus_g_connection.send_interrupt_page + + (child_rel_id >> 5)))); + ret = hv_vmbus_signal_event(); + + return (ret); +} + diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c new file mode 100644 index 0000000..0e73bdc --- /dev/null +++ b/sys/dev/hyperv/vmbus/hv_hv.c @@ -0,0 +1,515 @@ +/*- + * Copyright (c) 2009-2012 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Implements low-level interactions with Hypver-V/Azure + */ + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/pcpu.h> +#include <sys/timetc.h> +#include <machine/bus.h> +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> + + +#include "hv_vmbus_priv.h" + +#define HV_X64_MSR_GUEST_OS_ID 0x40000000 +#define HV_X64_CPUID_MIN 0x40000005 +#define HV_X64_CPUID_MAX 0x4000ffff +#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 + +#define HV_NANOSECONDS_PER_SEC 1000000000L + + +static u_int hv_get_timecount(struct timecounter *tc); +static u_int hv_get_timecount(struct timecounter *tc); + +static inline void do_cpuid_inline(unsigned int op, unsigned int *eax, + unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { + __asm__ __volatile__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), + "=d" (*edx) : "0" (op), "c" (ecx)); +} + +/** + * Globals + */ +hv_vmbus_context hv_vmbus_g_context = { + .syn_ic_initialized = FALSE, + .hypercall_page = NULL, + .signal_event_param = NULL, + .signal_event_buffer = NULL, +}; + +static struct timecounter hv_timecounter = { + hv_get_timecount, 0, ~0u, HV_NANOSECONDS_PER_SEC/100, "Hyper-V", HV_NANOSECONDS_PER_SEC/100 +}; + +static u_int +hv_get_timecount(struct timecounter *tc) +{ + u_int now = hv_vmbus_read_msr(HV_X64_MSR_TIME_REF_COUNT); + return (now); +} + +/** + * @brief Query the cpuid for presence of windows hypervisor + */ +int +hv_vmbus_query_hypervisor_presence(void) +{ + u_int regs[4]; + int hyper_v_detected = 0; + do_cpuid(1, regs); + if (regs[2] & 0x80000000) { /* if(a hypervisor is detected) */ + /* make sure this really is Hyper-V */ + /* we look at the CPUID info */ + do_cpuid(HV_X64_MSR_GUEST_OS_ID, regs); + hyper_v_detected = + regs[0] >= HV_X64_CPUID_MIN && + regs[0] <= HV_X64_CPUID_MAX && + !memcmp("Microsoft Hv", ®s[1], 12); + } + return (hyper_v_detected); +} + +/** + * @brief Get version of the windows hypervisor + */ +static int +hv_vmbus_get_hypervisor_version(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + unsigned int maxLeaf; + unsigned int op; + + /* + * Its assumed that this is called after confirming that + * Viridian is present + * Query id and revision. + */ + eax = 0; + ebx = 0; + ecx = 0; + edx = 0; + op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION; + do_cpuid_inline(op, &eax, &ebx, &ecx, &edx); + + maxLeaf = eax; + eax = 0; + ebx = 0; + ecx = 0; + edx = 0; + op = HV_CPU_ID_FUNCTION_HV_INTERFACE; + do_cpuid_inline(op, &eax, &ebx, &ecx, &edx); + + if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_VERSION) { + eax = 0; + ebx = 0; + ecx = 0; + edx = 0; + op = HV_CPU_ID_FUNCTION_MS_HV_VERSION; + do_cpuid_inline(op, &eax, &ebx, &ecx, &edx); + } + return (maxLeaf); +} + +/** + * @brief Invoke the specified hypercall + */ +static uint64_t +hv_vmbus_do_hypercall(uint64_t control, void* input, void* output) +{ +#ifdef __x86_64__ + uint64_t hv_status = 0; + uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0; + uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0; + volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page; + + __asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_address): "r8"); + __asm__ __volatile__ ("call *%3" : "=a"(hv_status): + "c" (control), "d" (input_address), + "m" (hypercall_page)); + return (hv_status); +#else + uint32_t control_high = control >> 32; + uint32_t control_low = control & 0xFFFFFFFF; + uint32_t hv_status_high = 1; + uint32_t hv_status_low = 1; + uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0; + uint32_t input_address_high = input_address >> 32; + uint32_t input_address_low = input_address & 0xFFFFFFFF; + uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0; + uint32_t output_address_high = output_address >> 32; + uint32_t output_address_low = output_address & 0xFFFFFFFF; + volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page; + + __asm__ __volatile__ ("call *%8" : "=d"(hv_status_high), + "=a"(hv_status_low) : "d" (control_high), + "a" (control_low), "b" (input_address_high), + "c" (input_address_low), + "D"(output_address_high), + "S"(output_address_low), "m" (hypercall_page)); + return (hv_status_low | ((uint64_t)hv_status_high << 32)); +#endif /* __x86_64__ */ +} + +/** + * @brief Main initialization routine. + * + * This routine must be called + * before any other routines in here are called + */ +int +hv_vmbus_init(void) +{ + int max_leaf; + hv_vmbus_x64_msr_hypercall_contents hypercall_msr; + void* virt_addr = 0; + + memset( + hv_vmbus_g_context.syn_ic_event_page, + 0, + sizeof(hv_vmbus_handle) * MAXCPU); + + memset( + hv_vmbus_g_context.syn_ic_msg_page, + 0, + sizeof(hv_vmbus_handle) * MAXCPU); + + if (!hv_vmbus_query_hypervisor_presence()) + goto cleanup; + + max_leaf = hv_vmbus_get_hypervisor_version(); + + /* + * Write our OS info + */ + uint64_t os_guest_info = HV_FREEBSD_GUEST_ID; + hv_vmbus_write_msr(HV_X64_MSR_GUEST_OS_ID, os_guest_info); + hv_vmbus_g_context.guest_id = os_guest_info; + + /* + * See if the hypercall page is already set + */ + hypercall_msr.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_HYPERCALL); + virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO); + KASSERT(virt_addr != NULL, + ("Error VMBUS: malloc failed to allocate page during init!")); + if (virt_addr == NULL) + goto cleanup; + + hypercall_msr.enable = 1; + hypercall_msr.guest_physical_address = + (hv_get_phys_addr(virt_addr) >> PAGE_SHIFT); + hv_vmbus_write_msr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t); + + /* + * Confirm that hypercall page did get set up + */ + hypercall_msr.as_uint64_t = 0; + hypercall_msr.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_HYPERCALL); + + if (!hypercall_msr.enable) + goto cleanup; + + hv_vmbus_g_context.hypercall_page = virt_addr; + + /* + * Setup the global signal event param for the signal event hypercall + */ + hv_vmbus_g_context.signal_event_buffer = + malloc(sizeof(hv_vmbus_input_signal_event_buffer), M_DEVBUF, + M_ZERO | M_NOWAIT); + KASSERT(hv_vmbus_g_context.signal_event_buffer != NULL, + ("Error VMBUS: Failed to allocate signal_event_buffer\n")); + if (hv_vmbus_g_context.signal_event_buffer == NULL) + goto cleanup; + + hv_vmbus_g_context.signal_event_param = + (hv_vmbus_input_signal_event*) + (HV_ALIGN_UP((unsigned long) + hv_vmbus_g_context.signal_event_buffer, + HV_HYPERCALL_PARAM_ALIGN)); + hv_vmbus_g_context.signal_event_param->connection_id.as_uint32_t = 0; + hv_vmbus_g_context.signal_event_param->connection_id.u.id = + HV_VMBUS_EVENT_CONNECTION_ID; + hv_vmbus_g_context.signal_event_param->flag_number = 0; + hv_vmbus_g_context.signal_event_param->rsvd_z = 0; + + tc_init(&hv_timecounter); /* register virtual timecount */ + + return (0); + + cleanup: + if (virt_addr != NULL) { + if (hypercall_msr.enable) { + hypercall_msr.as_uint64_t = 0; + hv_vmbus_write_msr(HV_X64_MSR_HYPERCALL, + hypercall_msr.as_uint64_t); + } + + free(virt_addr, M_DEVBUF); + } + return (ENOTSUP); +} + +/** + * @brief Cleanup routine, called normally during driver unloading or exiting + */ +void +hv_vmbus_cleanup(void) +{ + hv_vmbus_x64_msr_hypercall_contents hypercall_msr; + + if (hv_vmbus_g_context.signal_event_buffer != NULL) { + free(hv_vmbus_g_context.signal_event_buffer, M_DEVBUF); + hv_vmbus_g_context.signal_event_buffer = NULL; + hv_vmbus_g_context.signal_event_param = NULL; + } + + if (hv_vmbus_g_context.guest_id == HV_FREEBSD_GUEST_ID) { + if (hv_vmbus_g_context.hypercall_page != NULL) { + hypercall_msr.as_uint64_t = 0; + hv_vmbus_write_msr(HV_X64_MSR_HYPERCALL, + hypercall_msr.as_uint64_t); + free(hv_vmbus_g_context.hypercall_page, M_DEVBUF); + hv_vmbus_g_context.hypercall_page = NULL; + } + } +} + +/** + * @brief Post a message using the hypervisor message IPC. + * (This involves a hypercall.) + */ +hv_vmbus_status +hv_vmbus_post_msg_via_msg_ipc( + hv_vmbus_connection_id connection_id, + hv_vmbus_msg_type message_type, + void* payload, + size_t payload_size) +{ + struct alignedinput { + uint64_t alignment8; + hv_vmbus_input_post_message msg; + }; + + hv_vmbus_input_post_message* aligned_msg; + hv_vmbus_status status; + size_t addr; + + if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) + return (EMSGSIZE); + + addr = (size_t) malloc(sizeof(struct alignedinput), M_DEVBUF, + M_ZERO | M_NOWAIT); + KASSERT(addr != 0, + ("Error VMBUS: malloc failed to allocate message buffer!")); + if (addr == 0) + return (ENOMEM); + + aligned_msg = (hv_vmbus_input_post_message*) + (HV_ALIGN_UP(addr, HV_HYPERCALL_PARAM_ALIGN)); + + aligned_msg->connection_id = connection_id; + aligned_msg->message_type = message_type; + aligned_msg->payload_size = payload_size; + memcpy((void*) aligned_msg->payload, payload, payload_size); + + status = hv_vmbus_do_hypercall( + HV_CALL_POST_MESSAGE, aligned_msg, 0) & 0xFFFF; + + free((void *) addr, M_DEVBUF); + return (status); +} + +/** + * @brief Signal an event on the specified connection using the hypervisor + * event IPC. (This involves a hypercall.) + */ +hv_vmbus_status +hv_vmbus_signal_event() +{ + hv_vmbus_status status; + + status = hv_vmbus_do_hypercall( + HV_CALL_SIGNAL_EVENT, + hv_vmbus_g_context.signal_event_param, + 0) & 0xFFFF; + + return (status); +} + +/** + * @brief hv_vmbus_synic_init + */ +void +hv_vmbus_synic_init(void *irq_arg) + +{ + int cpu; + uint32_t irq_vector; + hv_vmbus_synic_simp simp; + hv_vmbus_synic_siefp siefp; + hv_vmbus_synic_scontrol sctrl; + hv_vmbus_synic_sint shared_sint; + uint64_t version; + + irq_vector = *((uint32_t *) (irq_arg)); + cpu = PCPU_GET(cpuid); + + if (hv_vmbus_g_context.hypercall_page == NULL) + return; + + /* + * KYS: Looks like we can only initialize on cpu0; don't we support + * SMP guests? + * + * TODO: Need to add SMP support for FreeBSD V9 + */ + + if (cpu != 0) + return; + + /* + * TODO: Check the version + */ + version = hv_vmbus_read_msr(HV_X64_MSR_SVERSION); + + hv_vmbus_g_context.syn_ic_msg_page[cpu] = + malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO); + KASSERT(hv_vmbus_g_context.syn_ic_msg_page[cpu] != NULL, + ("Error VMBUS: malloc failed for allocating page!")); + if (hv_vmbus_g_context.syn_ic_msg_page[cpu] == NULL) + goto cleanup; + + hv_vmbus_g_context.syn_ic_event_page[cpu] = + malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO); + KASSERT(hv_vmbus_g_context.syn_ic_event_page[cpu] != NULL, + ("Error VMBUS: malloc failed to allocate page!")); + if (hv_vmbus_g_context.syn_ic_event_page[cpu] == NULL) + goto cleanup; + + /* + * Setup the Synic's message page + */ + + simp.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SIMP); + simp.simp_enabled = 1; + simp.base_simp_gpa = ((hv_get_phys_addr( + hv_vmbus_g_context.syn_ic_msg_page[cpu])) >> PAGE_SHIFT); + + hv_vmbus_write_msr(HV_X64_MSR_SIMP, simp.as_uint64_t); + + /* + * Setup the Synic's event page + */ + siefp.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SIEFP); + siefp.siefp_enabled = 1; + siefp.base_siefp_gpa = ((hv_get_phys_addr( + hv_vmbus_g_context.syn_ic_event_page[cpu])) >> PAGE_SHIFT); + + hv_vmbus_write_msr(HV_X64_MSR_SIEFP, siefp.as_uint64_t); + + shared_sint.vector = irq_vector; /*HV_SHARED_SINT_IDT_VECTOR + 0x20; */ + shared_sint.masked = FALSE; + shared_sint.auto_eoi = FALSE; + + hv_vmbus_write_msr( + HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT, + shared_sint.as_uint64_t); + + /* Enable the global synic bit */ + sctrl.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SCONTROL); + sctrl.enable = 1; + + hv_vmbus_write_msr(HV_X64_MSR_SCONTROL, sctrl.as_uint64_t); + + hv_vmbus_g_context.syn_ic_initialized = TRUE; + + return; + + cleanup: + + free(hv_vmbus_g_context.syn_ic_msg_page[cpu], M_DEVBUF); + free(hv_vmbus_g_context.syn_ic_msg_page[cpu], M_DEVBUF); +} + +/** + * @brief Cleanup routine for hv_vmbus_synic_init() + */ +void hv_vmbus_synic_cleanup(void *arg) +{ + hv_vmbus_synic_sint shared_sint; + hv_vmbus_synic_simp simp; + hv_vmbus_synic_siefp siefp; + int cpu = PCPU_GET(cpuid); + + if (!hv_vmbus_g_context.syn_ic_initialized) + return; + + if (cpu != 0) + return; /* TODO: XXXKYS: SMP? */ + + shared_sint.as_uint64_t = hv_vmbus_read_msr( + HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT); + + shared_sint.masked = 1; + + /* + * Disable the interrupt + */ + hv_vmbus_write_msr( + HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT, + shared_sint.as_uint64_t); + + simp.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SIMP); + simp.simp_enabled = 0; + simp.base_simp_gpa = 0; + + hv_vmbus_write_msr(HV_X64_MSR_SIMP, simp.as_uint64_t); + + siefp.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SIEFP); + siefp.siefp_enabled = 0; + siefp.base_siefp_gpa = 0; + + hv_vmbus_write_msr(HV_X64_MSR_SIEFP, siefp.as_uint64_t); + + contigfree(hv_vmbus_g_context.syn_ic_msg_page[cpu], + PAGE_SIZE, M_DEVBUF); + contigfree(hv_vmbus_g_context.syn_ic_event_page[cpu], + PAGE_SIZE, M_DEVBUF); +} + diff --git a/sys/dev/hyperv/vmbus/hv_ring_buffer.c b/sys/dev/hyperv/vmbus/hv_ring_buffer.c new file mode 100644 index 0000000..f7c1965 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hv_ring_buffer.c @@ -0,0 +1,440 @@ +/*- + * Copyright (c) 2009-2012 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/mutex.h> + +#include "hv_vmbus_priv.h" + +/* Amount of space to write to */ +#define HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \ + ((z) - ((w) - (r))):((r) - (w)) + +/** + * @brief Get number of bytes available to read and to write to + * for the specified ring buffer + */ +static inline void +get_ring_buffer_avail_bytes( + hv_vmbus_ring_buffer_info* rbi, + uint32_t* read, + uint32_t* write) +{ + uint32_t read_loc, write_loc; + + /* + * Capture the read/write indices before they changed + */ + read_loc = rbi->ring_buffer->read_index; + write_loc = rbi->ring_buffer->write_index; + + *write = HV_BYTES_AVAIL_TO_WRITE( + read_loc, write_loc, rbi->ring_data_size); + *read = rbi->ring_data_size - *write; +} + +/** + * @brief Get the next write location for the specified ring buffer + */ +static inline uint32_t +get_next_write_location(hv_vmbus_ring_buffer_info* ring_info) +{ + uint32_t next = ring_info->ring_buffer->write_index; + return (next); +} + +/** + * @brief Set the next write location for the specified ring buffer + */ +static inline void +set_next_write_location( + hv_vmbus_ring_buffer_info* ring_info, + uint32_t next_write_location) +{ + ring_info->ring_buffer->write_index = next_write_location; +} + +/** + * @brief Get the next read location for the specified ring buffer + */ +static inline uint32_t +get_next_read_location(hv_vmbus_ring_buffer_info* ring_info) +{ + uint32_t next = ring_info->ring_buffer->read_index; + return (next); +} + +/** + * @brief Get the next read location + offset for the specified ring buffer. + * This allows the caller to skip. + */ +static inline uint32_t +get_next_read_location_with_offset( + hv_vmbus_ring_buffer_info* ring_info, + uint32_t offset) +{ + uint32_t next = ring_info->ring_buffer->read_index; + next += offset; + next %= ring_info->ring_data_size; + return (next); +} + +/** + * @brief Set the next read location for the specified ring buffer + */ +static inline void +set_next_read_location( + hv_vmbus_ring_buffer_info* ring_info, + uint32_t next_read_location) +{ + ring_info->ring_buffer->read_index = next_read_location; +} + +/** + * @brief Get the start of the ring buffer + */ +static inline void * +get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info) +{ + return (void *) ring_info->ring_buffer->buffer; +} + +/** + * @brief Get the size of the ring buffer. + */ +static inline uint32_t +get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info) +{ + return ring_info->ring_data_size; +} + +/** + * Get the read and write indices as uint64_t of the specified ring buffer. + */ +static inline uint64_t +get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info) +{ + return (uint64_t) ring_info->ring_buffer->write_index << 32; +} + +static uint32_t copy_to_ring_buffer( + hv_vmbus_ring_buffer_info* ring_info, + uint32_t start_write_offset, + char* src, + uint32_t src_len); + +static uint32_t copy_from_ring_buffer( + hv_vmbus_ring_buffer_info* ring_info, + char* dest, + uint32_t dest_len, + uint32_t start_read_offset); + + +/** + * @brief Get the interrupt mask for the specified ring buffer. + */ +uint32_t +hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi) +{ + return rbi->ring_buffer->interrupt_mask; +} + +/** + * @brief Initialize the ring buffer. + */ +int +hv_vmbus_ring_buffer_init( + hv_vmbus_ring_buffer_info* ring_info, + void* buffer, + uint32_t buffer_len) +{ + memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info)); + + ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer; + ring_info->ring_buffer->read_index = + ring_info->ring_buffer->write_index = 0; + + ring_info->ring_size = buffer_len; + ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer); + + mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN); + + return (0); +} + +/** + * @brief Cleanup the ring buffer. + */ +void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info) +{ + mtx_destroy(&ring_info->ring_lock); +} + +/** + * @brief Write to the ring buffer. + */ +int +hv_ring_buffer_write( + hv_vmbus_ring_buffer_info* out_ring_info, + hv_vmbus_sg_buffer_list sg_buffers[], + uint32_t sg_buffer_count) +{ + int i = 0; + uint32_t byte_avail_to_write; + uint32_t byte_avail_to_read; + uint32_t total_bytes_to_write = 0; + + volatile uint32_t next_write_location; + uint64_t prev_indices = 0; + + for (i = 0; i < sg_buffer_count; i++) { + total_bytes_to_write += sg_buffers[i].length; + } + + total_bytes_to_write += sizeof(uint64_t); + + mtx_lock_spin(&out_ring_info->ring_lock); + + get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read, + &byte_avail_to_write); + + /* + * If there is only room for the packet, assume it is full. + * Otherwise, the next time around, we think the ring buffer + * is empty since the read index == write index + */ + + if (byte_avail_to_write <= total_bytes_to_write) { + + mtx_unlock_spin(&out_ring_info->ring_lock); + return (EAGAIN); + } + + /* + * Write to the ring buffer + */ + next_write_location = get_next_write_location(out_ring_info); + + for (i = 0; i < sg_buffer_count; i++) { + next_write_location = copy_to_ring_buffer(out_ring_info, + next_write_location, (char *) sg_buffers[i].data, + sg_buffers[i].length); + } + + /* + * Set previous packet start + */ + prev_indices = get_ring_buffer_indices(out_ring_info); + + next_write_location = copy_to_ring_buffer( + out_ring_info, next_write_location, + (char *) &prev_indices, sizeof(uint64_t)); + + /* + * Make sure we flush all writes before updating the writeIndex + */ + wmb(); + + /* + * Now, update the write location + */ + set_next_write_location(out_ring_info, next_write_location); + + mtx_unlock_spin(&out_ring_info->ring_lock); + + return (0); +} + +/** + * @brief Read without advancing the read index. + */ +int +hv_ring_buffer_peek( + hv_vmbus_ring_buffer_info* in_ring_info, + void* buffer, + uint32_t buffer_len) +{ + uint32_t bytesAvailToWrite; + uint32_t bytesAvailToRead; + uint32_t nextReadLocation = 0; + + mtx_lock_spin(&in_ring_info->ring_lock); + + get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead, + &bytesAvailToWrite); + + /* + * Make sure there is something to read + */ + if (bytesAvailToRead < buffer_len) { + mtx_unlock_spin(&in_ring_info->ring_lock); + return (EAGAIN); + } + + /* + * Convert to byte offset + */ + nextReadLocation = get_next_read_location(in_ring_info); + + nextReadLocation = copy_from_ring_buffer( + in_ring_info, (char *)buffer, buffer_len, nextReadLocation); + + mtx_unlock_spin(&in_ring_info->ring_lock); + + return (0); +} + +/** + * @brief Read and advance the read index. + */ +int +hv_ring_buffer_read( + hv_vmbus_ring_buffer_info* in_ring_info, + void* buffer, + uint32_t buffer_len, + uint32_t offset) +{ + uint32_t bytes_avail_to_write; + uint32_t bytes_avail_to_read; + uint32_t next_read_location = 0; + uint64_t prev_indices = 0; + + if (buffer_len <= 0) + return (EINVAL); + + mtx_lock_spin(&in_ring_info->ring_lock); + + get_ring_buffer_avail_bytes( + in_ring_info, &bytes_avail_to_read, + &bytes_avail_to_write); + + /* + * Make sure there is something to read + */ + if (bytes_avail_to_read < buffer_len) { + mtx_unlock_spin(&in_ring_info->ring_lock); + return (EAGAIN); + } + + next_read_location = get_next_read_location_with_offset( + in_ring_info, + offset); + + next_read_location = copy_from_ring_buffer( + in_ring_info, + (char *) buffer, + buffer_len, + next_read_location); + + next_read_location = copy_from_ring_buffer( + in_ring_info, + (char *) &prev_indices, + sizeof(uint64_t), + next_read_location); + + /* + * Make sure all reads are done before we update the read index since + * the writer may start writing to the read area once the read index + * is updated. + */ + wmb(); + + /* + * Update the read index + */ + set_next_read_location(in_ring_info, next_read_location); + + mtx_unlock_spin(&in_ring_info->ring_lock); + + return (0); +} + +/** + * @brief Helper routine to copy from source to ring buffer. + * + * Assume there is enough room. Handles wrap-around in dest case only! + */ +uint32_t +copy_to_ring_buffer( + hv_vmbus_ring_buffer_info* ring_info, + uint32_t start_write_offset, + char* src, + uint32_t src_len) +{ + char *ring_buffer = get_ring_buffer(ring_info); + uint32_t ring_buffer_size = get_ring_buffer_size(ring_info); + uint32_t fragLen; + + if (src_len > ring_buffer_size - start_write_offset) { + /* wrap-around detected! */ + fragLen = ring_buffer_size - start_write_offset; + memcpy(ring_buffer + start_write_offset, src, fragLen); + memcpy(ring_buffer, src + fragLen, src_len - fragLen); + } else { + memcpy(ring_buffer + start_write_offset, src, src_len); + } + + start_write_offset += src_len; + start_write_offset %= ring_buffer_size; + + return (start_write_offset); +} + +/** + * @brief Helper routine to copy to source from ring buffer. + * + * Assume there is enough room. Handles wrap-around in src case only! + */ +uint32_t +copy_from_ring_buffer( + hv_vmbus_ring_buffer_info* ring_info, + char* dest, + uint32_t dest_len, + uint32_t start_read_offset) +{ + uint32_t fragLen; + char *ring_buffer = get_ring_buffer(ring_info); + uint32_t ring_buffer_size = get_ring_buffer_size(ring_info); + + if (dest_len > ring_buffer_size - start_read_offset) { + /* wrap-around detected at the src */ + fragLen = ring_buffer_size - start_read_offset; + memcpy(dest, ring_buffer + start_read_offset, fragLen); + memcpy(dest + fragLen, ring_buffer, dest_len - fragLen); + } else { + memcpy(dest, ring_buffer + start_read_offset, dest_len); + } + + start_read_offset += dest_len; + start_read_offset %= ring_buffer_size; + + return (start_read_offset); +} + diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c new file mode 100644 index 0000000..e010308 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c @@ -0,0 +1,583 @@ +/*- + * Copyright (c) 2009-2012 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * VM Bus Driver Implementation + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/systm.h> +#include <sys/rtprio.h> +#include <sys/interrupt.h> +#include <sys/sx.h> +#include <sys/taskqueue.h> +#include <sys/mutex.h> +#include <sys/smp.h> + +#include <machine/resource.h> +#include <sys/rman.h> + +#include <machine/stdarg.h> +#include <machine/intr_machdep.h> +#include <sys/pcpu.h> + +#include "hv_vmbus_priv.h" + + +#define VMBUS_IRQ 0x5 + +static struct intr_event *hv_msg_intr_event; +static struct intr_event *hv_event_intr_event; +static void *msg_swintr; +static void *event_swintr; +static device_t vmbus_devp; +static void *vmbus_cookiep; +static int vmbus_rid; +struct resource *intr_res; +static int vmbus_irq = VMBUS_IRQ; +static int vmbus_inited; + +/** + * @brief Software interrupt thread routine to handle channel messages from + * the hypervisor. + */ +static void +vmbus_msg_swintr(void *dummy) +{ + int cpu; + void* page_addr; + hv_vmbus_message* msg; + hv_vmbus_message* copied; + + cpu = PCPU_GET(cpuid); + page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; + msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; + + for (;;) { + if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) { + break; /* no message */ + } else { + copied = malloc(sizeof(hv_vmbus_message), + M_DEVBUF, M_NOWAIT); + KASSERT(copied != NULL, + ("Error VMBUS: malloc failed to allocate" + " hv_vmbus_message!")); + if (copied == NULL) + continue; + memcpy(copied, msg, sizeof(hv_vmbus_message)); + hv_queue_work_item(hv_vmbus_g_connection.work_queue, + hv_vmbus_on_channel_message, copied); + } + + msg->header.message_type = HV_MESSAGE_TYPE_NONE; + + /* + * Make sure the write to message_type (ie set to + * HV_MESSAGE_TYPE_NONE) happens before we read the + * message_pending and EOMing. Otherwise, the EOMing will + * not deliver any more messages + * since there is no empty slot + */ + wmb(); + + if (msg->header.message_flags.message_pending) { + /* + * This will cause message queue rescan to possibly + * deliver another msg from the hypervisor + */ + hv_vmbus_write_msr(HV_X64_MSR_EOM, 0); + } + } +} + +/** + * @brief Interrupt filter routine for VMBUS. + * + * The purpose of this routine is to determine the type of VMBUS protocol + * message to process - an event or a channel message. + * As this is an interrupt filter routine, the function runs in a very + * restricted envinronment. From the manpage for bus_setup_intr(9) + * + * In this restricted environment, care must be taken to account for all + * races. A careful analysis of races should be done as well. It is gener- + * ally cheaper to take an extra interrupt, for example, than to protect + * variables with spinlocks. Read, modify, write cycles of hardware regis- + * ters need to be carefully analyzed if other threads are accessing the + * same registers. + */ +static int +hv_vmbus_isr(void *unused) +{ + int cpu; + hv_vmbus_message* msg; + hv_vmbus_synic_event_flags* event; + void* page_addr; + + cpu = PCPU_GET(cpuid); + /* (Temporary limit) */ + KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero")); + + /* + * The Windows team has advised that we check for events + * before checking for messages. This is the way they do it + * in Windows when running as a guest in Hyper-V + */ + + page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; + event = (hv_vmbus_synic_event_flags*) + page_addr + HV_VMBUS_MESSAGE_SINT; + + /* Since we are a child, we only need to check bit 0 */ + if (synch_test_and_clear_bit(0, &event->flags32[0])) { + swi_sched(event_swintr, 0); + } + + /* Check if there are actual msgs to be process */ + page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; + msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; + + if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { + swi_sched(msg_swintr, 0); + } + + return FILTER_HANDLED; +} + +static int +vmbus_read_ivar( + device_t dev, + device_t child, + int index, + uintptr_t* result) +{ + struct hv_device *child_dev_ctx = device_get_ivars(child); + + switch (index) { + + case HV_VMBUS_IVAR_TYPE: + *result = (uintptr_t) &child_dev_ctx->class_id; + return (0); + case HV_VMBUS_IVAR_INSTANCE: + *result = (uintptr_t) &child_dev_ctx->device_id; + return (0); + case HV_VMBUS_IVAR_DEVCTX: + *result = (uintptr_t) child_dev_ctx; + return (0); + case HV_VMBUS_IVAR_NODE: + *result = (uintptr_t) child_dev_ctx->device; + return (0); + } + return (ENOENT); +} + +static int +vmbus_write_ivar( + device_t dev, + device_t child, + int index, + uintptr_t value) +{ + switch (index) { + + case HV_VMBUS_IVAR_TYPE: + case HV_VMBUS_IVAR_INSTANCE: + case HV_VMBUS_IVAR_DEVCTX: + case HV_VMBUS_IVAR_NODE: + /* read-only */ + return (EINVAL); + } + return (ENOENT); +} + +struct hv_device* +hv_vmbus_child_device_create( + hv_guid type, + hv_guid instance, + hv_vmbus_channel* channel) +{ + hv_device* child_dev; + + /* + * Allocate the new child device + */ + child_dev = malloc(sizeof(hv_device), M_DEVBUF, + M_NOWAIT | M_ZERO); + KASSERT(child_dev != NULL, + ("Error VMBUS: malloc failed to allocate hv_device!")); + + if (child_dev == NULL) + return (NULL); + + child_dev->channel = channel; + memcpy(&child_dev->class_id, &type, sizeof(hv_guid)); + memcpy(&child_dev->device_id, &instance, sizeof(hv_guid)); + + return (child_dev); +} + +static void +print_dev_guid(struct hv_device *dev) +{ + int i; + unsigned char guid_name[100]; + for (i = 0; i < 32; i += 2) + sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]); + if(bootverbose) + printf("VMBUS: Class ID: %s\n", guid_name); +} + +int +hv_vmbus_child_device_register(struct hv_device *child_dev) +{ + device_t child; + int ret = 0; + + print_dev_guid(child_dev); + + + child = device_add_child(vmbus_devp, NULL, -1); + child_dev->device = child; + device_set_ivars(child, child_dev); + + mtx_lock(&Giant); + ret = device_probe_and_attach(child); + mtx_unlock(&Giant); + + return (0); +} + +int +hv_vmbus_child_device_unregister(struct hv_device *child_dev) +{ + int ret = 0; + /* + * XXXKYS: Ensure that this is the opposite of + * device_add_child() + */ + mtx_lock(&Giant); + ret = device_delete_child(vmbus_devp, child_dev->device); + mtx_unlock(&Giant); + return(ret); +} + +static void vmbus_identify(driver_t *driver, device_t parent) { + BUS_ADD_CHILD(parent, 0, "vmbus", 0); + if (device_find_child(parent, "vmbus", 0) == NULL) { + BUS_ADD_CHILD(parent, 0, "vmbus", 0); + } +} + +static int +vmbus_probe(device_t dev) { + if(bootverbose) + device_printf(dev, "VMBUS: probe\n"); + + if (!hv_vmbus_query_hypervisor_presence()) + return (ENXIO); + + device_set_desc(dev, "Vmbus Devices"); + + return (0); +} + +/** + * @brief Main vmbus driver initialization routine. + * + * Here, we + * - initialize the vmbus driver context + * - setup various driver entry points + * - invoke the vmbus hv main init routine + * - get the irq resource + * - invoke the vmbus to add the vmbus root device + * - setup the vmbus root device + * - retrieve the channel offers + */ +static int +vmbus_bus_init(void) +{ + struct ioapic_intsrc { + struct intsrc io_intsrc; + u_int io_irq; + u_int io_intpin:8; + u_int io_vector:8; + u_int io_cpu:8; + u_int io_activehi:1; + u_int io_edgetrigger:1; + u_int io_masked:1; + int io_bus:4; + uint32_t io_lowreg; + }; + + int ret; + unsigned int vector = 0; + struct intsrc *isrc; + struct ioapic_intsrc *intpin; + + if (vmbus_inited) + return (0); + + vmbus_inited = 1; + + ret = hv_vmbus_init(); + + if (ret) { + if(bootverbose) + printf("Error VMBUS: Hypervisor Initialization Failed!\n"); + return (ret); + } + + ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr, + NULL, SWI_CLOCK, 0, &msg_swintr); + + if (ret) + goto cleanup; + + /* + * Message SW interrupt handler checks a per-CPU page and + * thus the thread needs to be bound to CPU-0 - which is where + * all interrupts are processed. + */ + ret = intr_event_bind(hv_msg_intr_event, 0); + + if (ret) + goto cleanup1; + + ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events, + NULL, SWI_CLOCK, 0, &event_swintr); + + if (ret) + goto cleanup1; + + intr_res = bus_alloc_resource(vmbus_devp, + SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE); + + if (intr_res == NULL) { + ret = ENOMEM; /* XXXKYS: Need a better errno */ + goto cleanup2; + } + + /* + * Setup interrupt filter handler + */ + ret = bus_setup_intr(vmbus_devp, intr_res, + INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL, + NULL, &vmbus_cookiep); + + if (ret != 0) + goto cleanup3; + + ret = bus_bind_intr(vmbus_devp, intr_res, 0); + if (ret != 0) + goto cleanup4; + + isrc = intr_lookup_source(vmbus_irq); + if ((isrc == NULL) || (isrc->is_event == NULL)) { + ret = EINVAL; + goto cleanup4; + } + + /* vector = isrc->is_event->ie_vector; */ + intpin = (struct ioapic_intsrc *)isrc; + vector = intpin->io_vector; + + if(bootverbose) + printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector); + + /** + * Notify the hypervisor of our irq. + */ + + smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &vector); + + /** + * Connect to VMBus in the root partition + */ + ret = hv_vmbus_connect(); + + if (ret) + goto cleanup4; + + hv_vmbus_request_channel_offers(); + return (ret); + + cleanup4: + + /* + * remove swi, bus and intr resource + */ + bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep); + + cleanup3: + + bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res); + + cleanup2: + swi_remove(event_swintr); + + cleanup1: + swi_remove(msg_swintr); + + cleanup: + hv_vmbus_cleanup(); + + return (ret); +} + +static int +vmbus_attach(device_t dev) +{ + if(bootverbose) + device_printf(dev, "VMBUS: attach dev: %p\n", dev); + vmbus_devp = dev; + + /* + * If the system has already booted and thread + * scheduling is possible indicated by the global + * cold set to zero, we just call the driver + * initialization directly. + */ + if (!cold) + vmbus_bus_init(); + + return (0); +} + +static void +vmbus_init(void) +{ + /* + * If the system has already booted and thread + * scheduling is possible indicated by the global + * cold set to zero, we just call the driver + * initialization directly. + */ + if (!cold) + vmbus_bus_init(); +} + +static void +vmbus_bus_exit(void) +{ + hv_vmbus_release_unattached_channels(); + hv_vmbus_disconnect(); + + smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL); + + hv_vmbus_cleanup(); + + /* remove swi, bus and intr resource */ + bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep); + + bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res); + + swi_remove(msg_swintr); + swi_remove(event_swintr); + + return; +} + +static void +vmbus_exit(void) +{ + vmbus_bus_exit(); +} + +static int +vmbus_detach(device_t dev) +{ + vmbus_exit(); + return (0); +} + +static void +vmbus_mod_load(void) +{ + if(bootverbose) + printf("VMBUS: load\n"); +} + +static void +vmbus_mod_unload(void) +{ + if(bootverbose) + printf("VMBUS: unload\n"); +} + +static int +vmbus_modevent(module_t mod, int what, void *arg) +{ + switch (what) { + + case MOD_LOAD: + vmbus_mod_load(); + break; + case MOD_UNLOAD: + vmbus_mod_unload(); + break; + } + + return (0); +} + +static device_method_t vmbus_methods[] = { + /** Device interface */ + DEVMETHOD(device_identify, vmbus_identify), + DEVMETHOD(device_probe, vmbus_probe), + DEVMETHOD(device_attach, vmbus_attach), + DEVMETHOD(device_detach, vmbus_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + + /** Bus interface */ + DEVMETHOD(bus_add_child, bus_generic_add_child), + DEVMETHOD(bus_print_child, bus_generic_print_child), + DEVMETHOD(bus_read_ivar, vmbus_read_ivar), + DEVMETHOD(bus_write_ivar, vmbus_write_ivar), + + { 0, 0 } }; + +static char driver_name[] = "vmbus"; +static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; + + +devclass_t vmbus_devclass; + +DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); +MODULE_VERSION(vmbus,1); + +/* TODO: We want to be earlier than SI_SUB_VFS */ +SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL); + diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h new file mode 100644 index 0000000..739acb1 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h @@ -0,0 +1,770 @@ +/*- + * Copyright (c) 2009-2012 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HYPERV_PRIV_H__ +#define __HYPERV_PRIV_H__ + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sema.h> + +#include <dev/hyperv/include/hyperv.h> + + +/* + * Status codes for hypervisor operations. + */ + +typedef uint16_t hv_vmbus_status; + +#define HV_MESSAGE_SIZE (256) +#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) +#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) +#define HV_ANY_VP (0xFFFFFFFF) + +/* + * Synthetic interrupt controller flag constants. + */ + +#define HV_EVENT_FLAGS_COUNT (256 * 8) +#define HV_EVENT_FLAGS_BYTE_COUNT (256) +#define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(uint32_t)) + +/* + * MessageId: HV_STATUS_INSUFFICIENT_BUFFERS + * MessageText: + * You did not supply enough message buffers to send a message. + */ + +#define HV_STATUS_INSUFFICIENT_BUFFERS ((uint16_t)0x0013) + +typedef void (*hv_vmbus_channel_callback)(void *context); + +typedef struct { + void* data; + uint32_t length; +} hv_vmbus_sg_buffer_list; + +typedef struct { + uint32_t current_interrupt_mask; + uint32_t current_read_index; + uint32_t current_write_index; + uint32_t bytes_avail_to_read; + uint32_t bytes_avail_to_write; +} hv_vmbus_ring_buffer_debug_info; + +typedef struct { + uint32_t rel_id; + hv_vmbus_channel_state state; + hv_guid interface_type; + hv_guid interface_instance; + uint32_t monitor_id; + uint32_t server_monitor_pending; + uint32_t server_monitor_latency; + uint32_t server_monitor_connection_id; + uint32_t client_monitor_pending; + uint32_t client_monitor_latency; + uint32_t client_monitor_connection_id; + hv_vmbus_ring_buffer_debug_info inbound; + hv_vmbus_ring_buffer_debug_info outbound; +} hv_vmbus_channel_debug_info; + +typedef union { + hv_vmbus_channel_version_supported version_supported; + hv_vmbus_channel_open_result open_result; + hv_vmbus_channel_gpadl_torndown gpadl_torndown; + hv_vmbus_channel_gpadl_created gpadl_created; + hv_vmbus_channel_version_response version_response; +} hv_vmbus_channel_msg_response; + +/* + * Represents each channel msg on the vmbus connection + * This is a variable-size data structure depending on + * the msg type itself + */ +typedef struct hv_vmbus_channel_msg_info { + /* + * Bookkeeping stuff + */ + TAILQ_ENTRY(hv_vmbus_channel_msg_info) msg_list_entry; + /* + * So far, this is only used to handle + * gpadl body message + */ + TAILQ_HEAD(, hv_vmbus_channel_msg_info) sub_msg_list_anchor; + /* + * Synchronize the request/response if + * needed. + * KYS: Use a semaphore for now. + * Not perf critical. + */ + struct sema wait_sema; + hv_vmbus_channel_msg_response response; + uint32_t message_size; + /** + * The channel message that goes out on + * the "wire". It will contain at + * minimum the + * hv_vmbus_channel_msg_header + * header. + */ + unsigned char msg[0]; +} hv_vmbus_channel_msg_info; + +/* + * The format must be the same as hv_vm_data_gpa_direct + */ +typedef struct hv_vmbus_channel_packet_page_buffer { + uint16_t type; + uint16_t data_offset8; + uint16_t length8; + uint16_t flags; + uint64_t transaction_id; + uint32_t reserved; + uint32_t range_count; + hv_vmbus_page_buffer range[HV_MAX_PAGE_BUFFER_COUNT]; +} __packed hv_vmbus_channel_packet_page_buffer; + +/* + * The format must be the same as hv_vm_data_gpa_direct + */ +typedef struct hv_vmbus_channel_packet_multipage_buffer { + uint16_t type; + uint16_t data_offset8; + uint16_t length8; + uint16_t flags; + uint64_t transaction_id; + uint32_t reserved; + uint32_t range_count; /* Always 1 in this case */ + hv_vmbus_multipage_buffer range; +} __packed hv_vmbus_channel_packet_multipage_buffer; + +enum { + HV_VMBUS_MESSAGE_CONNECTION_ID = 1, + HV_VMBUS_MESSAGE_PORT_ID = 1, + HV_VMBUS_EVENT_CONNECTION_ID = 2, + HV_VMBUS_EVENT_PORT_ID = 2, + HV_VMBUS_MONITOR_CONNECTION_ID = 3, + HV_VMBUS_MONITOR_PORT_ID = 3, + HV_VMBUS_MESSAGE_SINT = 2 +}; + +#define HV_PRESENT_BIT 0x80000000 + +#define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t) + +/* + * Connection identifier type + */ +typedef union { + uint32_t as_uint32_t; + struct { + uint32_t id:24; + uint32_t reserved:8; + } u; + +} __packed hv_vmbus_connection_id; + +/* + * Definition of the hv_vmbus_signal_event hypercall input structure + */ +typedef struct { + hv_vmbus_connection_id connection_id; + uint16_t flag_number; + uint16_t rsvd_z; +} __packed hv_vmbus_input_signal_event; + +typedef struct { + uint64_t align8; + hv_vmbus_input_signal_event event; +} __packed hv_vmbus_input_signal_event_buffer; + +typedef struct { + uint64_t guest_id; + void* hypercall_page; + hv_bool_uint8_t syn_ic_initialized; + /* + * This is used as an input param to HV_CALL_SIGNAL_EVENT hypercall. + * The input param is immutable in our usage and + * must be dynamic mem (vs stack or global). + */ + hv_vmbus_input_signal_event_buffer *signal_event_buffer; + /* + * 8-bytes aligned of the buffer above + */ + hv_vmbus_input_signal_event *signal_event_param; + + hv_vmbus_handle syn_ic_msg_page[MAXCPU]; + hv_vmbus_handle syn_ic_event_page[MAXCPU]; +} hv_vmbus_context; + +/* + * Define hypervisor message types + */ +typedef enum { + + HV_MESSAGE_TYPE_NONE = 0x00000000, + + /* + * Memory access messages + */ + HV_MESSAGE_TYPE_UNMAPPED_GPA = 0x80000000, + HV_MESSAGE_TYPE_GPA_INTERCEPT = 0x80000001, + + /* + * Timer notification messages + */ + HV_MESSAGE_TIMER_EXPIRED = 0x80000010, + + /* + * Error messages + */ + HV_MESSAGE_TYPE_INVALID_VP_REGISTER_VALUE = 0x80000020, + HV_MESSAGE_TYPE_UNRECOVERABLE_EXCEPTION = 0x80000021, + HV_MESSAGE_TYPE_UNSUPPORTED_FEATURE = 0x80000022, + + /* + * Trace buffer complete messages + */ + HV_MESSAGE_TYPE_EVENT_LOG_BUFFER_COMPLETE = 0x80000040, + + /* + * Platform-specific processor intercept messages + */ + HV_MESSAGE_TYPE_X64_IO_PORT_INTERCEPT = 0x80010000, + HV_MESSAGE_TYPE_X64_MSR_INTERCEPT = 0x80010001, + HV_MESSAGE_TYPE_X64_CPU_INTERCEPT = 0x80010002, + HV_MESSAGE_TYPE_X64_EXCEPTION_INTERCEPT = 0x80010003, + HV_MESSAGE_TYPE_X64_APIC_EOI = 0x80010004, + HV_MESSAGE_TYPE_X64_LEGACY_FP_ERROR = 0x80010005 + +} hv_vmbus_msg_type; + +/* + * Define port identifier type + */ +typedef union _hv_vmbus_port_id { + uint32_t as_uint32_t; + struct { + uint32_t id:24; + uint32_t reserved:8; + } u ; +} hv_vmbus_port_id; + +/* + * Define synthetic interrupt controller message flag + */ +typedef union { + uint8_t as_uint8_t; + struct { + uint8_t message_pending:1; + uint8_t reserved:7; + }; +} hv_vmbus_msg_flags; + +typedef uint64_t hv_vmbus_partition_id; + +/* + * Define synthetic interrupt controller message header + */ +typedef struct { + hv_vmbus_msg_type message_type; + uint8_t payload_size; + hv_vmbus_msg_flags message_flags; + uint8_t reserved[2]; + union { + hv_vmbus_partition_id sender; + hv_vmbus_port_id port; + } u; +} hv_vmbus_msg_header; + +/* + * Define synthetic interrupt controller message format + */ +typedef struct { + hv_vmbus_msg_header header; + union { + uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; + } u ; +} hv_vmbus_message; + +/* + * Maximum channels is determined by the size of the interrupt + * page which is PAGE_SIZE. 1/2 of PAGE_SIZE is for + * send endpoint interrupt and the other is receive + * endpoint interrupt. + * + * Note: (PAGE_SIZE >> 1) << 3 allocates 16348 channels + */ +#define HV_MAX_NUM_CHANNELS (PAGE_SIZE >> 1) << 3 + +/* + * (The value here must be in multiple of 32) + */ +#define HV_MAX_NUM_CHANNELS_SUPPORTED 256 + +/* + * VM Bus connection states + */ +typedef enum { + HV_DISCONNECTED, + HV_CONNECTING, + HV_CONNECTED, + HV_DISCONNECTING +} hv_vmbus_connect_state; + +#define HV_MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT + + +typedef struct { + hv_vmbus_connect_state connect_state; + uint32_t next_gpadl_handle; + /** + * Represents channel interrupts. Each bit position + * represents a channel. + * When a channel sends an interrupt via VMBUS, it + * finds its bit in the send_interrupt_page, set it and + * calls Hv to generate a port event. The other end + * receives the port event and parse the + * recv_interrupt_page to see which bit is set + */ + void *interrupt_page; + void *send_interrupt_page; + void *recv_interrupt_page; + /* + * 2 pages - 1st page for parent->child + * notification and 2nd is child->parent + * notification + */ + void *monitor_pages; + TAILQ_HEAD(, hv_vmbus_channel_msg_info) channel_msg_anchor; + struct mtx channel_msg_lock; + /** + * List of channels + */ + TAILQ_HEAD(, hv_vmbus_channel) channel_anchor; + struct mtx channel_lock; + + hv_vmbus_handle work_queue; + struct sema control_sema; +} hv_vmbus_connection; + +/* + * Declare the MSR used to identify the guest OS + */ +#define HV_X64_MSR_GUEST_OS_ID 0x40000000 + +typedef union { + uint64_t as_uint64_t; + struct { + uint64_t build_number : 16; + uint64_t service_version : 8; /* Service Pack, etc. */ + uint64_t minor_version : 8; + uint64_t major_version : 8; + /* + * HV_GUEST_OS_MICROSOFT_IDS (If Vendor=MS) + * HV_GUEST_OS_VENDOR + */ + uint64_t os_id : 8; + uint64_t vendor_id : 16; + }; +} hv_vmbus_x64_msr_guest_os_id_contents; + +/* + * Declare the MSR used to setup pages used to communicate with the hypervisor + */ +#define HV_X64_MSR_HYPERCALL 0x40000001 + +typedef union { + uint64_t as_uint64_t; + struct { + uint64_t enable :1; + uint64_t reserved :11; + uint64_t guest_physical_address :52; + }; +} hv_vmbus_x64_msr_hypercall_contents; + +typedef union { + uint32_t as_uint32_t; + struct { + uint32_t group_enable :4; + uint32_t rsvd_z :28; + }; +} hv_vmbus_monitor_trigger_state; + +typedef union { + uint64_t as_uint64_t; + struct { + uint32_t pending; + uint32_t armed; + }; +} hv_vmbus_monitor_trigger_group; + +typedef struct { + hv_vmbus_connection_id connection_id; + uint16_t flag_number; + uint16_t rsvd_z; +} hv_vmbus_monitor_parameter; + +/* + * hv_vmbus_monitor_page Layout + * ------------------------------------------------------ + * | 0 | trigger_state (4 bytes) | Rsvd1 (4 bytes) | + * | 8 | trigger_group[0] | + * | 10 | trigger_group[1] | + * | 18 | trigger_group[2] | + * | 20 | trigger_group[3] | + * | 28 | Rsvd2[0] | + * | 30 | Rsvd2[1] | + * | 38 | Rsvd2[2] | + * | 40 | next_check_time[0][0] | next_check_time[0][1] | + * | ... | + * | 240 | latency[0][0..3] | + * | 340 | Rsvz3[0] | + * | 440 | parameter[0][0] | + * | 448 | parameter[0][1] | + * | ... | + * | 840 | Rsvd4[0] | + * ------------------------------------------------------ + */ + +typedef struct { + hv_vmbus_monitor_trigger_state trigger_state; + uint32_t rsvd_z1; + + hv_vmbus_monitor_trigger_group trigger_group[4]; + uint64_t rsvd_z2[3]; + + int32_t next_check_time[4][32]; + + uint16_t latency[4][32]; + uint64_t rsvd_z3[32]; + + hv_vmbus_monitor_parameter parameter[4][32]; + + uint8_t rsvd_z4[1984]; +} hv_vmbus_monitor_page; + +/* + * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent + * is set by CPUID(HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES). + */ +typedef enum { + HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES = 0x00000001, + HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION = 0x40000000, + HV_CPU_ID_FUNCTION_HV_INTERFACE = 0x40000001, + /* + * The remaining functions depend on the value + * of hv_cpu_id_function_interface + */ + HV_CPU_ID_FUNCTION_MS_HV_VERSION = 0x40000002, + HV_CPU_ID_FUNCTION_MS_HV_FEATURES = 0x40000003, + HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION = 0x40000004, + HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS = 0x40000005 + +} hv_vmbus_cpuid_function; + +/* + * Define the format of the SIMP register + */ +typedef union { + uint64_t as_uint64_t; + struct { + uint64_t simp_enabled : 1; + uint64_t preserved : 11; + uint64_t base_simp_gpa : 52; + }; +} hv_vmbus_synic_simp; + +/* + * Define the format of the SIEFP register + */ +typedef union { + uint64_t as_uint64_t; + struct { + uint64_t siefp_enabled : 1; + uint64_t preserved : 11; + uint64_t base_siefp_gpa : 52; + }; +} hv_vmbus_synic_siefp; + +/* + * Define synthetic interrupt source + */ +typedef union { + uint64_t as_uint64_t; + struct { + uint64_t vector : 8; + uint64_t reserved1 : 8; + uint64_t masked : 1; + uint64_t auto_eoi : 1; + uint64_t reserved2 : 46; + }; +} hv_vmbus_synic_sint; + +/* + * Define syn_ic control register + */ +typedef union _hv_vmbus_synic_scontrol { + uint64_t as_uint64_t; + struct { + uint64_t enable : 1; + uint64_t reserved : 63; + }; +} hv_vmbus_synic_scontrol; + +/* + * Define the hv_vmbus_post_message hypercall input structure + */ +typedef struct { + hv_vmbus_connection_id connection_id; + uint32_t reserved; + hv_vmbus_msg_type message_type; + uint32_t payload_size; + uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; +} hv_vmbus_input_post_message; + +/* + * Define the synthetic interrupt controller event flags format + */ +typedef union { + uint8_t flags8[HV_EVENT_FLAGS_BYTE_COUNT]; + uint32_t flags32[HV_EVENT_FLAGS_DWORD_COUNT]; +} hv_vmbus_synic_event_flags; + + +/* + * Define synthetic interrupt controller model specific registers + */ +#define HV_X64_MSR_SCONTROL (0x40000080) +#define HV_X64_MSR_SVERSION (0x40000081) +#define HV_X64_MSR_SIEFP (0x40000082) +#define HV_X64_MSR_SIMP (0x40000083) +#define HV_X64_MSR_EOM (0x40000084) + +#define HV_X64_MSR_SINT0 (0x40000090) +#define HV_X64_MSR_SINT1 (0x40000091) +#define HV_X64_MSR_SINT2 (0x40000092) +#define HV_X64_MSR_SINT3 (0x40000093) +#define HV_X64_MSR_SINT4 (0x40000094) +#define HV_X64_MSR_SINT5 (0x40000095) +#define HV_X64_MSR_SINT6 (0x40000096) +#define HV_X64_MSR_SINT7 (0x40000097) +#define HV_X64_MSR_SINT8 (0x40000098) +#define HV_X64_MSR_SINT9 (0x40000099) +#define HV_X64_MSR_SINT10 (0x4000009A) +#define HV_X64_MSR_SINT11 (0x4000009B) +#define HV_X64_MSR_SINT12 (0x4000009C) +#define HV_X64_MSR_SINT13 (0x4000009D) +#define HV_X64_MSR_SINT14 (0x4000009E) +#define HV_X64_MSR_SINT15 (0x4000009F) + +/* + * Declare the various hypercall operations + */ +typedef enum { + HV_CALL_POST_MESSAGE = 0x005c, + HV_CALL_SIGNAL_EVENT = 0x005d, +} hv_vmbus_call_code; + +/** + * Global variables + */ + +extern hv_vmbus_context hv_vmbus_g_context; +extern hv_vmbus_connection hv_vmbus_g_connection; + + +/* + * Private, VM Bus functions + */ + +int hv_vmbus_ring_buffer_init( + hv_vmbus_ring_buffer_info *ring_info, + void *buffer, + uint32_t buffer_len); + +void hv_ring_buffer_cleanup( + hv_vmbus_ring_buffer_info *ring_info); + +int hv_ring_buffer_write( + hv_vmbus_ring_buffer_info *ring_info, + hv_vmbus_sg_buffer_list sg_buffers[], + uint32_t sg_buff_count); + +int hv_ring_buffer_peek( + hv_vmbus_ring_buffer_info *ring_info, + void *buffer, + uint32_t buffer_len); + +int hv_ring_buffer_read( + hv_vmbus_ring_buffer_info *ring_info, + void *buffer, + uint32_t buffer_len, + uint32_t offset); + +uint32_t hv_vmbus_get_ring_buffer_interrupt_mask( + hv_vmbus_ring_buffer_info *ring_info); + +void hv_vmbus_dump_ring_info( + hv_vmbus_ring_buffer_info *ring_info, + char *prefix); + +hv_vmbus_channel* hv_vmbus_allocate_channel(void); +void hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel); +void hv_vmbus_on_channel_message(void *context); +int hv_vmbus_request_channel_offers(void); +void hv_vmbus_release_unattached_channels(void); +int hv_vmbus_init(void); +void hv_vmbus_cleanup(void); + +uint16_t hv_vmbus_post_msg_via_msg_ipc( + hv_vmbus_connection_id connection_id, + hv_vmbus_msg_type message_type, + void *payload, + size_t payload_size); + +uint16_t hv_vmbus_signal_event(void); +void hv_vmbus_synic_init(void *irq_arg); +void hv_vmbus_synic_cleanup(void *arg); +int hv_vmbus_query_hypervisor_presence(void); + +struct hv_device* hv_vmbus_child_device_create( + hv_guid device_type, + hv_guid device_instance, + hv_vmbus_channel *channel); + +int hv_vmbus_child_device_register( + struct hv_device *child_dev); +int hv_vmbus_child_device_unregister( + struct hv_device *child_dev); +hv_vmbus_channel* hv_vmbus_get_channel_from_rel_id(uint32_t rel_id); + +/** + * Connection interfaces + */ +int hv_vmbus_connect(void); +int hv_vmbus_disconnect(void); +int hv_vmbus_post_message(void *buffer, size_t buf_size); +int hv_vmbus_set_event(uint32_t child_rel_id); +void hv_vmbus_on_events(void *); + +/* + * static inline functions + * (with some helper macros for reading/writing to model specific registers) + */ + +#ifdef __x86_64__ + +#define HV_VMBUS_READ_MSR(reg, v) { \ + uint32_t h, l; \ + __asm__ __volatile__("rdmsr" \ + : "=a" (l), "=d" (h) \ + : "c" (reg)); \ + v = (((uint64_t)h) << 32) | l; \ +} + +#define HV_VMBUS_WRITE_MSR(reg, v) { \ + uint32_t h, l; \ + l = (uint32_t)(((uint64_t)(v)) & 0xFFFFFFFF); \ + h = (uint32_t)((((uint64_t)(v)) >> 32) & 0xFFFFFFFF); \ + __asm__ __volatile__("wrmsr" \ + : /* no outputs */ \ + : "c" (reg), "a" (l), "d" (h)); \ +} + +#else + +#define HV_VMBUS_READ_MSR(reg, v) \ + __asm__ __volatile__("rdmsr" \ + : "=A" (v) \ + : "c" (reg)) + +#define HV_VMBUS_WRITE_MSR(reg, v) \ + __asm__ __volatile__("wrmsr" \ + : /* no outputs */ \ + : "c" (reg), "A" ((uint64_t)v)) + +#endif + +static inline unsigned long long +hv_vmbus_read_msr(int msr) +{ + unsigned long long val; + HV_VMBUS_READ_MSR(msr, val); + return (val); +} + +static inline +void hv_vmbus_write_msr(int msr, uint64_t val) +{ + HV_VMBUS_WRITE_MSR(msr, val); + return; +} + +/* + * The guest OS needs to register the guest ID with the hypervisor. + * The guest ID is a 64 bit entity and the structure of this ID is + * specified in the Hyper-V specification: + * + * http://msdn.microsoft.com/en-us/library/windows/ + * hardware/ff542653%28v=vs.85%29.aspx + * + * While the current guideline does not specify how FreeBSD guest ID(s) + * need to be generated, our plan is to publish the guidelines for + * FreeBSD and other guest operating systems that currently are hosted + * on Hyper-V. The implementation here conforms to this yet + * unpublished guidelines. + * + * Bit(s) + * 63 - Indicates if the OS is Open Source or not; 1 is Open Source + * 62:56 - Os Type; Linux is 0x100, FreeBSD is 0x200 + * 55:48 - Distro specific identification + * 47:16 - FreeBSD kernel version number + * 15:0 - Distro specific identification + * + */ + +#define HV_FREEBSD_VENDOR_ID 0x8200 +#define HV_FREEBSD_GUEST_ID hv_generate_guest_id(0,0) + +static inline uint64_t hv_generate_guest_id( + uint8_t distro_id_part1, + uint16_t distro_id_part2) +{ + uint64_t guest_id; + guest_id = (((uint64_t)HV_FREEBSD_VENDOR_ID) << 48); + guest_id |= (((uint64_t)(distro_id_part1)) << 48); + guest_id |= (((uint64_t)(__FreeBSD_version)) << 16); /* in param.h */ + guest_id |= ((uint64_t)(distro_id_part2)); + return guest_id; +} + + +#endif /* __HYPERV_PRIV_H__ */ |