summaryrefslogtreecommitdiffstats
path: root/sys/dev/hyperv/vmbus
diff options
context:
space:
mode:
authorgrehan <grehan@FreeBSD.org>2013-07-17 06:30:23 +0000
committergrehan <grehan@FreeBSD.org>2013-07-17 06:30:23 +0000
commitc8195f5331ccad33ad4e265362523f51b96abd5c (patch)
tree5b7b0a05acafc6450fb27259d1c1d5008e1f88f4 /sys/dev/hyperv/vmbus
parent6a7baaf83640e0eaa135d2f7a3c1d4401f1683bf (diff)
downloadFreeBSD-src-c8195f5331ccad33ad4e265362523f51b96abd5c.zip
FreeBSD-src-c8195f5331ccad33ad4e265362523f51b96abd5c.tar.gz
Microsoft have changed their policy on how the hyper-v code will
be pulled into FreeBSD. From now, FreeBSD will be considered the upstream repo. First step: move the drivers away from the contrib area and into the base system. A follow-on commit will include the drivers in the amd64 GENERIC kernel.
Diffstat (limited to 'sys/dev/hyperv/vmbus')
-rw-r--r--sys/dev/hyperv/vmbus/hv_channel.c842
-rw-r--r--sys/dev/hyperv/vmbus/hv_channel_mgmt.c680
-rw-r--r--sys/dev/hyperv/vmbus/hv_connection.c431
-rw-r--r--sys/dev/hyperv/vmbus/hv_hv.c515
-rw-r--r--sys/dev/hyperv/vmbus/hv_ring_buffer.c440
-rw-r--r--sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c583
-rw-r--r--sys/dev/hyperv/vmbus/hv_vmbus_priv.h770
7 files changed, 4261 insertions, 0 deletions
diff --git a/sys/dev/hyperv/vmbus/hv_channel.c b/sys/dev/hyperv/vmbus/hv_channel.c
new file mode 100644
index 0000000..17dfd76
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_channel.c
@@ -0,0 +1,842 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <machine/bus.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include "hv_vmbus_priv.h"
+
+static int vmbus_channel_create_gpadl_header(
+ /* must be phys and virt contiguous*/
+ void* contig_buffer,
+ /* page-size multiple */
+ uint32_t size,
+ hv_vmbus_channel_msg_info** msg_info,
+ uint32_t* message_count);
+
+static void vmbus_channel_set_event(hv_vmbus_channel* channel);
+
+/**
+ * @brief Trigger an event notification on the specified channel
+ */
+static void
+vmbus_channel_set_event(hv_vmbus_channel *channel)
+{
+ hv_vmbus_monitor_page *monitor_page;
+
+ if (channel->offer_msg.monitor_allocated) {
+ /* Each uint32_t represents 32 channels */
+ synch_set_bit((channel->offer_msg.child_rel_id & 31),
+ ((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
+ + ((channel->offer_msg.child_rel_id >> 5))));
+
+ monitor_page = (hv_vmbus_monitor_page *)
+ hv_vmbus_g_connection.monitor_pages;
+
+ monitor_page++; /* Get the child to parent monitor page */
+
+ synch_set_bit(channel->monitor_bit,
+ (uint32_t *)&monitor_page->
+ trigger_group[channel->monitor_group].pending);
+ } else {
+ hv_vmbus_set_event(channel->offer_msg.child_rel_id);
+ }
+
+}
+
+/**
+ * @brief Open the specified channel
+ */
+int
+hv_vmbus_channel_open(
+ hv_vmbus_channel* new_channel,
+ uint32_t send_ring_buffer_size,
+ uint32_t recv_ring_buffer_size,
+ void* user_data,
+ uint32_t user_data_len,
+ hv_vmbus_pfn_channel_callback pfn_on_channel_callback,
+ void* context)
+{
+
+ int ret = 0;
+ void *in, *out;
+ hv_vmbus_channel_open_channel* open_msg;
+ hv_vmbus_channel_msg_info* open_info;
+
+ new_channel->on_channel_callback = pfn_on_channel_callback;
+ new_channel->channel_callback_context = context;
+
+ /* Allocate the ring buffer */
+ out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
+ M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
+ KASSERT(out != NULL,
+ ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!"));
+ if (out == NULL)
+ return (ENOMEM);
+
+ in = ((uint8_t *) out + send_ring_buffer_size);
+
+ new_channel->ring_buffer_pages = out;
+ new_channel->ring_buffer_page_count = (send_ring_buffer_size
+ + recv_ring_buffer_size) >> PAGE_SHIFT;
+
+ hv_vmbus_ring_buffer_init(
+ &new_channel->outbound,
+ out,
+ send_ring_buffer_size);
+
+ hv_vmbus_ring_buffer_init(
+ &new_channel->inbound,
+ in,
+ recv_ring_buffer_size);
+
+ /**
+ * Establish the gpadl for the ring buffer
+ */
+ new_channel->ring_buffer_gpadl_handle = 0;
+
+ ret = hv_vmbus_channel_establish_gpadl(new_channel,
+ new_channel->outbound.ring_buffer,
+ send_ring_buffer_size + recv_ring_buffer_size,
+ &new_channel->ring_buffer_gpadl_handle);
+
+ /**
+ * Create and init the channel open message
+ */
+ open_info = (hv_vmbus_channel_msg_info*) malloc(
+ sizeof(hv_vmbus_channel_msg_info) +
+ sizeof(hv_vmbus_channel_open_channel),
+ M_DEVBUF,
+ M_NOWAIT);
+ KASSERT(open_info != NULL,
+ ("Error VMBUS: malloc failed to allocate Open Channel message!"));
+
+ if (open_info == NULL)
+ return (ENOMEM);
+
+ sema_init(&open_info->wait_sema, 0, "Open Info Sema");
+
+ open_msg = (hv_vmbus_channel_open_channel*) open_info->msg;
+ open_msg->header.message_type = HV_CHANNEL_MESSAGE_OPEN_CHANNEL;
+ open_msg->open_id = new_channel->offer_msg.child_rel_id;
+ open_msg->child_rel_id = new_channel->offer_msg.child_rel_id;
+ open_msg->ring_buffer_gpadl_handle =
+ new_channel->ring_buffer_gpadl_handle;
+ open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size
+ >> PAGE_SHIFT;
+ open_msg->server_context_area_gpadl_handle = 0;
+
+ if (user_data_len)
+ memcpy(open_msg->user_data, user_data, user_data_len);
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_INSERT_TAIL(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ open_info,
+ msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ ret = hv_vmbus_post_message(
+ open_msg, sizeof(hv_vmbus_channel_open_channel));
+
+ if (ret != 0)
+ goto cleanup;
+
+ ret = sema_timedwait(&open_info->wait_sema, 500); /* KYS 5 seconds */
+
+ if (ret)
+ goto cleanup;
+
+ if (open_info->response.open_result.status == 0) {
+ if(bootverbose)
+ printf("VMBUS: channel <%p> open success.\n", new_channel);
+ } else {
+ if(bootverbose)
+ printf("Error VMBUS: channel <%p> open failed - %d!\n",
+ new_channel, open_info->response.open_result.status);
+ }
+
+ cleanup:
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ open_info,
+ msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ sema_destroy(&open_info->wait_sema);
+ free(open_info, M_DEVBUF);
+
+ return (ret);
+}
+
+/**
+ * @brief Create a gpadl for the specified buffer
+ */
+static int
+vmbus_channel_create_gpadl_header(
+ void* contig_buffer,
+ uint32_t size, /* page-size multiple */
+ hv_vmbus_channel_msg_info** msg_info,
+ uint32_t* message_count)
+{
+ int i;
+ int page_count;
+ unsigned long long pfn;
+ uint32_t msg_size;
+ hv_vmbus_channel_gpadl_header* gpa_header;
+ hv_vmbus_channel_gpadl_body* gpadl_body;
+ hv_vmbus_channel_msg_info* msg_header;
+ hv_vmbus_channel_msg_info* msg_body;
+
+ int pfnSum, pfnCount, pfnLeft, pfnCurr, pfnSize;
+
+ page_count = size >> PAGE_SHIFT;
+ pfn = hv_get_phys_addr(contig_buffer) >> PAGE_SHIFT;
+
+ /*do we need a gpadl body msg */
+ pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
+ - sizeof(hv_vmbus_channel_gpadl_header)
+ - sizeof(hv_gpa_range);
+ pfnCount = pfnSize / sizeof(uint64_t);
+
+ if (page_count > pfnCount) { /* if(we need a gpadl body) */
+ /* fill in the header */
+ msg_size = sizeof(hv_vmbus_channel_msg_info)
+ + sizeof(hv_vmbus_channel_gpadl_header)
+ + sizeof(hv_gpa_range)
+ + pfnCount * sizeof(uint64_t);
+ msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(
+ msg_header != NULL,
+ ("Error VMBUS: malloc failed to allocate Gpadl Message!"));
+ if (msg_header == NULL)
+ return (ENOMEM);
+
+ TAILQ_INIT(&msg_header->sub_msg_list_anchor);
+ msg_header->message_size = msg_size;
+
+ gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
+ gpa_header->range_count = 1;
+ gpa_header->range_buf_len = sizeof(hv_gpa_range)
+ + page_count * sizeof(uint64_t);
+ gpa_header->range[0].byte_offset = 0;
+ gpa_header->range[0].byte_count = size;
+ for (i = 0; i < pfnCount; i++) {
+ gpa_header->range[0].pfn_array[i] = pfn + i;
+ }
+ *msg_info = msg_header;
+ *message_count = 1;
+
+ pfnSum = pfnCount;
+ pfnLeft = page_count - pfnCount;
+
+ /*
+ * figure out how many pfns we can fit
+ */
+ pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
+ - sizeof(hv_vmbus_channel_gpadl_body);
+ pfnCount = pfnSize / sizeof(uint64_t);
+
+ /*
+ * fill in the body
+ */
+ while (pfnLeft) {
+ if (pfnLeft > pfnCount) {
+ pfnCurr = pfnCount;
+ } else {
+ pfnCurr = pfnLeft;
+ }
+
+ msg_size = sizeof(hv_vmbus_channel_msg_info) +
+ sizeof(hv_vmbus_channel_gpadl_body) +
+ pfnCurr * sizeof(uint64_t);
+ msg_body = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(
+ msg_body != NULL,
+ ("Error VMBUS: malloc failed to allocate Gpadl msg_body!"));
+ if (msg_body == NULL)
+ return (ENOMEM);
+
+ msg_body->message_size = msg_size;
+ (*message_count)++;
+ gpadl_body =
+ (hv_vmbus_channel_gpadl_body*) msg_body->msg;
+ /*
+ * gpadl_body->gpadl = kbuffer;
+ */
+ for (i = 0; i < pfnCurr; i++) {
+ gpadl_body->pfn[i] = pfn + pfnSum + i;
+ }
+
+ TAILQ_INSERT_TAIL(
+ &msg_header->sub_msg_list_anchor,
+ msg_body,
+ msg_list_entry);
+ pfnSum += pfnCurr;
+ pfnLeft -= pfnCurr;
+ }
+ } else { /* else everything fits in a header */
+
+ msg_size = sizeof(hv_vmbus_channel_msg_info) +
+ sizeof(hv_vmbus_channel_gpadl_header) +
+ sizeof(hv_gpa_range) +
+ page_count * sizeof(uint64_t);
+ msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(
+ msg_header != NULL,
+ ("Error VMBUS: malloc failed to allocate Gpadl Message!"));
+ if (msg_header == NULL)
+ return (ENOMEM);
+
+ msg_header->message_size = msg_size;
+
+ gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
+ gpa_header->range_count = 1;
+ gpa_header->range_buf_len = sizeof(hv_gpa_range) +
+ page_count * sizeof(uint64_t);
+ gpa_header->range[0].byte_offset = 0;
+ gpa_header->range[0].byte_count = size;
+ for (i = 0; i < page_count; i++) {
+ gpa_header->range[0].pfn_array[i] = pfn + i;
+ }
+
+ *msg_info = msg_header;
+ *message_count = 1;
+ }
+
+ return (0);
+}
+
+/**
+ * @brief Establish a GPADL for the specified buffer
+ */
+int
+hv_vmbus_channel_establish_gpadl(
+ hv_vmbus_channel* channel,
+ void* contig_buffer,
+ uint32_t size, /* page-size multiple */
+ uint32_t* gpadl_handle)
+
+{
+ int ret = 0;
+ hv_vmbus_channel_gpadl_header* gpadl_msg;
+ hv_vmbus_channel_gpadl_body* gpadl_body;
+ hv_vmbus_channel_msg_info* msg_info;
+ hv_vmbus_channel_msg_info* sub_msg_info;
+ uint32_t msg_count;
+ hv_vmbus_channel_msg_info* curr;
+ uint32_t next_gpadl_handle;
+
+ next_gpadl_handle = hv_vmbus_g_connection.next_gpadl_handle;
+ atomic_add_int((int*) &hv_vmbus_g_connection.next_gpadl_handle, 1);
+
+ ret = vmbus_channel_create_gpadl_header(
+ contig_buffer, size, &msg_info, &msg_count);
+
+ if(ret != 0) { /* if(allocation failed) return immediately */
+ /* reverse atomic_add_int above */
+ atomic_subtract_int((int*)
+ &hv_vmbus_g_connection.next_gpadl_handle, 1);
+ return ret;
+ }
+
+ sema_init(&msg_info->wait_sema, 0, "Open Info Sema");
+ gpadl_msg = (hv_vmbus_channel_gpadl_header*) msg_info->msg;
+ gpadl_msg->header.message_type = HV_CHANNEL_MESSAGEL_GPADL_HEADER;
+ gpadl_msg->child_rel_id = channel->offer_msg.child_rel_id;
+ gpadl_msg->gpadl = next_gpadl_handle;
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_INSERT_TAIL(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info,
+ msg_list_entry);
+
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ ret = hv_vmbus_post_message(
+ gpadl_msg,
+ msg_info->message_size -
+ (uint32_t) sizeof(hv_vmbus_channel_msg_info));
+
+ if (ret != 0)
+ goto cleanup;
+
+ if (msg_count > 1) {
+ TAILQ_FOREACH(curr,
+ &msg_info->sub_msg_list_anchor, msg_list_entry) {
+ sub_msg_info = curr;
+ gpadl_body =
+ (hv_vmbus_channel_gpadl_body*) sub_msg_info->msg;
+
+ gpadl_body->header.message_type =
+ HV_CHANNEL_MESSAGE_GPADL_BODY;
+ gpadl_body->gpadl = next_gpadl_handle;
+
+ ret = hv_vmbus_post_message(
+ gpadl_body,
+ sub_msg_info->message_size
+ - (uint32_t) sizeof(hv_vmbus_channel_msg_info));
+ /* if (the post message failed) give up and clean up */
+ if(ret != 0)
+ goto cleanup;
+ }
+ }
+
+ ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds*/
+ if (ret != 0)
+ goto cleanup;
+
+ *gpadl_handle = gpadl_msg->gpadl;
+
+cleanup:
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info, msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ sema_destroy(&msg_info->wait_sema);
+ free(msg_info, M_DEVBUF);
+
+ return (ret);
+}
+
+/**
+ * @brief Teardown the specified GPADL handle
+ */
+int
+hv_vmbus_channel_teardown_gpdal(
+ hv_vmbus_channel* channel,
+ uint32_t gpadl_handle)
+{
+ int ret = 0;
+ hv_vmbus_channel_gpadl_teardown* msg;
+ hv_vmbus_channel_msg_info* info;
+
+ info = (hv_vmbus_channel_msg_info *)
+ malloc( sizeof(hv_vmbus_channel_msg_info) +
+ sizeof(hv_vmbus_channel_gpadl_teardown),
+ M_DEVBUF, M_NOWAIT);
+ KASSERT(info != NULL,
+ ("Error VMBUS: malloc failed to allocate Gpadl Teardown Msg!"));
+ if (info == NULL) {
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ sema_init(&info->wait_sema, 0, "Open Info Sema");
+
+ msg = (hv_vmbus_channel_gpadl_teardown*) info->msg;
+
+ msg->header.message_type = HV_CHANNEL_MESSAGE_GPADL_TEARDOWN;
+ msg->child_rel_id = channel->offer_msg.child_rel_id;
+ msg->gpadl = gpadl_handle;
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_msg_anchor,
+ info, msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ ret = hv_vmbus_post_message(msg,
+ sizeof(hv_vmbus_channel_gpadl_teardown));
+ if (ret != 0)
+ goto cleanup;
+
+ ret = sema_timedwait(&info->wait_sema, 500); /* KYS 5 seconds */
+
+cleanup:
+ /*
+ * Received a torndown response
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
+ info, msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ sema_destroy(&info->wait_sema);
+ free(info, M_DEVBUF);
+
+ return (ret);
+}
+
+/**
+ * @brief Close the specified channel
+ */
+void
+hv_vmbus_channel_close(hv_vmbus_channel *channel)
+{
+ int ret = 0;
+ hv_vmbus_channel_close_channel* msg;
+ hv_vmbus_channel_msg_info* info;
+
+ mtx_lock(&channel->inbound_lock);
+ channel->on_channel_callback = NULL;
+ mtx_unlock(&channel->inbound_lock);
+
+ /**
+ * Send a closing message
+ */
+ info = (hv_vmbus_channel_msg_info *)
+ malloc( sizeof(hv_vmbus_channel_msg_info) +
+ sizeof(hv_vmbus_channel_close_channel),
+ M_DEVBUF, M_NOWAIT);
+ KASSERT(info != NULL, ("VMBUS: malloc failed hv_vmbus_channel_close!"));
+ if(info == NULL)
+ return;
+
+ msg = (hv_vmbus_channel_close_channel*) info->msg;
+ msg->header.message_type = HV_CHANNEL_MESSAGE_CLOSE_CHANNEL;
+ msg->child_rel_id = channel->offer_msg.child_rel_id;
+
+ ret = hv_vmbus_post_message(
+ msg, sizeof(hv_vmbus_channel_close_channel));
+
+ /* Tear down the gpadl for the channel's ring buffer */
+ if (channel->ring_buffer_gpadl_handle) {
+ hv_vmbus_channel_teardown_gpdal(channel,
+ channel->ring_buffer_gpadl_handle);
+ }
+
+ /* TODO: Send a msg to release the childRelId */
+
+ /* cleanup the ring buffers for this channel */
+ hv_ring_buffer_cleanup(&channel->outbound);
+ hv_ring_buffer_cleanup(&channel->inbound);
+
+ contigfree(
+ channel->ring_buffer_pages,
+ channel->ring_buffer_page_count,
+ M_DEVBUF);
+
+ free(info, M_DEVBUF);
+
+ /*
+ * If we are closing the channel during an error path in
+ * opening the channel, don't free the channel
+ * since the caller will free the channel
+ */
+ if (channel->state == HV_CHANNEL_OPEN_STATE) {
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_anchor,
+ channel,
+ list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+
+ hv_vmbus_free_vmbus_channel(channel);
+ }
+
+}
+
+/**
+ * @brief Send the specified buffer on the given channel
+ */
+int
+hv_vmbus_channel_send_packet(
+ hv_vmbus_channel* channel,
+ void* buffer,
+ uint32_t buffer_len,
+ uint64_t request_id,
+ hv_vmbus_packet_type type,
+ uint32_t flags)
+{
+ int ret = 0;
+ hv_vm_packet_descriptor desc;
+ uint32_t packet_len;
+ uint64_t aligned_data;
+ uint32_t packet_len_aligned;
+ hv_vmbus_sg_buffer_list buffer_list[3];
+
+ packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
+ packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
+ aligned_data = 0;
+
+ /* Setup the descriptor */
+ desc.type = type; /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND; */
+ desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
+ /* in 8-bytes granularity */
+ desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
+ desc.length8 = (uint16_t) (packet_len_aligned >> 3);
+ desc.transaction_id = request_id;
+
+ buffer_list[0].data = &desc;
+ buffer_list[0].length = sizeof(hv_vm_packet_descriptor);
+
+ buffer_list[1].data = buffer;
+ buffer_list[1].length = buffer_len;
+
+ buffer_list[2].data = &aligned_data;
+ buffer_list[2].length = packet_len_aligned - packet_len;
+
+ ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+
+ /* TODO: We should determine if this is optional */
+ if (ret == 0
+ && !hv_vmbus_get_ring_buffer_interrupt_mask(
+ &channel->outbound)) {
+ vmbus_channel_set_event(channel);
+ }
+
+ return (ret);
+}
+
+/**
+ * @brief Send a range of single-page buffer packets using
+ * a GPADL Direct packet type
+ */
+int
+hv_vmbus_channel_send_packet_pagebuffer(
+ hv_vmbus_channel* channel,
+ hv_vmbus_page_buffer page_buffers[],
+ uint32_t page_count,
+ void* buffer,
+ uint32_t buffer_len,
+ uint64_t request_id)
+{
+
+ int ret = 0;
+ int i = 0;
+ uint32_t packet_len;
+ uint32_t packetLen_aligned;
+ hv_vmbus_sg_buffer_list buffer_list[3];
+ hv_vmbus_channel_packet_page_buffer desc;
+ uint32_t descSize;
+ uint64_t alignedData = 0;
+
+ if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
+ return (EINVAL);
+
+ /*
+ * Adjust the size down since hv_vmbus_channel_packet_page_buffer
+ * is the largest size we support
+ */
+ descSize = sizeof(hv_vmbus_channel_packet_page_buffer) -
+ ((HV_MAX_PAGE_BUFFER_COUNT - page_count) *
+ sizeof(hv_vmbus_page_buffer));
+ packet_len = descSize + buffer_len;
+ packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
+
+ /* Setup the descriptor */
+ desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
+ desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+ desc.data_offset8 = descSize >> 3; /* in 8-bytes granularity */
+ desc.length8 = (uint16_t) (packetLen_aligned >> 3);
+ desc.transaction_id = request_id;
+ desc.range_count = page_count;
+
+ for (i = 0; i < page_count; i++) {
+ desc.range[i].length = page_buffers[i].length;
+ desc.range[i].offset = page_buffers[i].offset;
+ desc.range[i].pfn = page_buffers[i].pfn;
+ }
+
+ buffer_list[0].data = &desc;
+ buffer_list[0].length = descSize;
+
+ buffer_list[1].data = buffer;
+ buffer_list[1].length = buffer_len;
+
+ buffer_list[2].data = &alignedData;
+ buffer_list[2].length = packetLen_aligned - packet_len;
+
+ ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+
+ /* TODO: We should determine if this is optional */
+ if (ret == 0 &&
+ !hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) {
+ vmbus_channel_set_event(channel);
+ }
+
+ return (ret);
+}
+
+/**
+ * @brief Send a multi-page buffer packet using a GPADL Direct packet type
+ */
+int
+hv_vmbus_channel_send_packet_multipagebuffer(
+ hv_vmbus_channel* channel,
+ hv_vmbus_multipage_buffer* multi_page_buffer,
+ void* buffer,
+ uint32_t buffer_len,
+ uint64_t request_id)
+{
+
+ int ret = 0;
+ uint32_t desc_size;
+ uint32_t packet_len;
+ uint32_t packet_len_aligned;
+ uint32_t pfn_count;
+ uint64_t aligned_data = 0;
+ hv_vmbus_sg_buffer_list buffer_list[3];
+ hv_vmbus_channel_packet_multipage_buffer desc;
+
+ pfn_count =
+ HV_NUM_PAGES_SPANNED(
+ multi_page_buffer->offset,
+ multi_page_buffer->length);
+
+ if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
+ return (EINVAL);
+ /*
+ * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
+ * is the largest size we support
+ */
+ desc_size =
+ sizeof(hv_vmbus_channel_packet_multipage_buffer) -
+ ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
+ sizeof(uint64_t));
+ packet_len = desc_size + buffer_len;
+ packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
+
+ /*
+ * Setup the descriptor
+ */
+ desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
+ desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+ desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
+ desc.length8 = (uint16_t) (packet_len_aligned >> 3);
+ desc.transaction_id = request_id;
+ desc.range_count = 1;
+
+ desc.range.length = multi_page_buffer->length;
+ desc.range.offset = multi_page_buffer->offset;
+
+ memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
+ pfn_count * sizeof(uint64_t));
+
+ buffer_list[0].data = &desc;
+ buffer_list[0].length = desc_size;
+
+ buffer_list[1].data = buffer;
+ buffer_list[1].length = buffer_len;
+
+ buffer_list[2].data = &aligned_data;
+ buffer_list[2].length = packet_len_aligned - packet_len;
+
+ ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+
+ /* TODO: We should determine if this is optional */
+ if (ret == 0 &&
+ !hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) {
+ vmbus_channel_set_event(channel);
+ }
+
+ return (ret);
+}
+
+/**
+ * @brief Retrieve the user packet on the specified channel
+ */
+int
+hv_vmbus_channel_recv_packet(
+ hv_vmbus_channel* channel,
+ void* Buffer,
+ uint32_t buffer_len,
+ uint32_t* buffer_actual_len,
+ uint64_t* request_id)
+{
+ int ret;
+ uint32_t user_len;
+ uint32_t packet_len;
+ hv_vm_packet_descriptor desc;
+
+ *buffer_actual_len = 0;
+ *request_id = 0;
+
+ ret = hv_ring_buffer_peek(&channel->inbound, &desc,
+ sizeof(hv_vm_packet_descriptor));
+ if (ret != 0)
+ return (0);
+
+ packet_len = desc.length8 << 3;
+ user_len = packet_len - (desc.data_offset8 << 3);
+
+ *buffer_actual_len = user_len;
+
+ if (user_len > buffer_len)
+ return (EINVAL);
+
+ *request_id = desc.transaction_id;
+
+ /* Copy over the packet to the user buffer */
+ ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
+ (desc.data_offset8 << 3));
+
+ return (0);
+}
+
+/**
+ * @brief Retrieve the raw packet on the specified channel
+ */
+int
+hv_vmbus_channel_recv_packet_raw(
+ hv_vmbus_channel* channel,
+ void* buffer,
+ uint32_t buffer_len,
+ uint32_t* buffer_actual_len,
+ uint64_t* request_id)
+{
+ int ret;
+ uint32_t packetLen;
+ uint32_t userLen;
+ hv_vm_packet_descriptor desc;
+
+ *buffer_actual_len = 0;
+ *request_id = 0;
+
+ ret = hv_ring_buffer_peek(
+ &channel->inbound, &desc,
+ sizeof(hv_vm_packet_descriptor));
+
+ if (ret != 0)
+ return (0);
+
+ packetLen = desc.length8 << 3;
+ userLen = packetLen - (desc.data_offset8 << 3);
+
+ *buffer_actual_len = packetLen;
+
+ if (packetLen > buffer_len)
+ return (ENOBUFS);
+
+ *request_id = desc.transaction_id;
+
+ /* Copy over the entire packet to the user buffer */
+ ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
+
+ return (0);
+}
diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
new file mode 100644
index 0000000..011e305
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
@@ -0,0 +1,680 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+#include "hv_vmbus_priv.h"
+
+typedef void (*hv_pfn_channel_msg_handler)(hv_vmbus_channel_msg_header* msg);
+
+typedef struct hv_vmbus_channel_msg_table_entry {
+ hv_vmbus_channel_msg_type messageType;
+ hv_pfn_channel_msg_handler messageHandler;
+} hv_vmbus_channel_msg_table_entry;
+
+/*
+ * Internal functions
+ */
+
+static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_process_offer(void *context);
+
+/**
+ * Channel message dispatch table
+ */
+hv_vmbus_channel_msg_table_entry
+ g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = {
+ { HV_CHANNEL_MESSAGE_INVALID, NULL },
+ { HV_CHANNEL_MESSAGE_OFFER_CHANNEL, vmbus_channel_on_offer },
+ { HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER,
+ vmbus_channel_on_offer_rescind },
+ { HV_CHANNEL_MESSAGE_REQUEST_OFFERS, NULL },
+ { HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED,
+ vmbus_channel_on_offers_delivered },
+ { HV_CHANNEL_MESSAGE_OPEN_CHANNEL, NULL },
+ { HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT,
+ vmbus_channel_on_open_result },
+ { HV_CHANNEL_MESSAGE_CLOSE_CHANNEL, NULL },
+ { HV_CHANNEL_MESSAGEL_GPADL_HEADER, NULL },
+ { HV_CHANNEL_MESSAGE_GPADL_BODY, NULL },
+ { HV_CHANNEL_MESSAGE_GPADL_CREATED,
+ vmbus_channel_on_gpadl_created },
+ { HV_CHANNEL_MESSAGE_GPADL_TEARDOWN, NULL },
+ { HV_CHANNEL_MESSAGE_GPADL_TORNDOWN,
+ vmbus_channel_on_gpadl_torndown },
+ { HV_CHANNEL_MESSAGE_REL_ID_RELEASED, NULL },
+ { HV_CHANNEL_MESSAGE_INITIATED_CONTACT, NULL },
+ { HV_CHANNEL_MESSAGE_VERSION_RESPONSE,
+ vmbus_channel_on_version_response },
+ { HV_CHANNEL_MESSAGE_UNLOAD, NULL }
+};
+
+
+/**
+ * Implementation of the work abstraction.
+ */
+static void
+work_item_callback(void *work, int pending)
+{
+ struct hv_work_item *w = (struct hv_work_item *)work;
+
+ /*
+ * Serialize work execution.
+ */
+ if (w->wq->work_sema != NULL) {
+ sema_wait(w->wq->work_sema);
+ }
+
+ w->callback(w->context);
+
+ if (w->wq->work_sema != NULL) {
+ sema_post(w->wq->work_sema);
+ }
+
+ free(w, M_DEVBUF);
+}
+
+struct hv_work_queue*
+hv_work_queue_create(char* name)
+{
+ static unsigned int qid = 0;
+ char qname[64];
+ int pri;
+ struct hv_work_queue* wq;
+
+ wq = malloc(sizeof(struct hv_work_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(wq != NULL, ("Error VMBUS: Failed to allocate work_queue\n"));
+ if (wq == NULL)
+ return (NULL);
+
+ /*
+ * We use work abstraction to handle messages
+ * coming from the host and these are typically offers.
+ * Some FreeBsd drivers appear to have a concurrency issue
+ * where probe/attach needs to be serialized. We ensure that
+ * by having only one thread process work elements in a
+ * specific queue by serializing work execution.
+ *
+ */
+ if (strcmp(name, "vmbusQ") == 0) {
+ pri = PI_DISK;
+ } else { /* control */
+ pri = PI_NET;
+ /*
+ * Initialize semaphore for this queue by pointing
+ * to the globale semaphore used for synchronizing all
+ * control messages.
+ */
+ wq->work_sema = &hv_vmbus_g_connection.control_sema;
+ }
+
+ sprintf(qname, "hv_%s_%u", name, qid);
+
+ /*
+ * Fixme: FreeBSD 8.2 has a different prototype for
+ * taskqueue_create(), and for certain other taskqueue functions.
+ * We need to research the implications of these changes.
+ * Fixme: Not sure when the changes were introduced.
+ */
+ wq->queue = taskqueue_create(qname, M_NOWAIT, taskqueue_thread_enqueue,
+ &wq->queue
+ #if __FreeBSD_version < 800000
+ , &wq->proc
+ #endif
+ );
+
+ if (wq->queue == NULL) {
+ free(wq, M_DEVBUF);
+ return (NULL);
+ }
+
+ if (taskqueue_start_threads(&wq->queue, 1, pri, "%s taskq", qname)) {
+ taskqueue_free(wq->queue);
+ free(wq, M_DEVBUF);
+ return (NULL);
+ }
+
+ qid++;
+
+ return (wq);
+}
+
+void
+hv_work_queue_close(struct hv_work_queue *wq)
+{
+ /*
+ * KYS: Need to drain the taskqueue
+ * before we close the hv_work_queue.
+ */
+ /*KYS: taskqueue_drain(wq->tq, ); */
+ taskqueue_free(wq->queue);
+ free(wq, M_DEVBUF);
+}
+
+/**
+ * @brief Create work item
+ */
+int
+hv_queue_work_item(
+ struct hv_work_queue *wq,
+ void (*callback)(void *), void *context)
+{
+ struct hv_work_item *w = malloc(sizeof(struct hv_work_item),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n"));
+ if (w == NULL)
+ return (ENOMEM);
+
+ w->callback = callback;
+ w->context = context;
+ w->wq = wq;
+
+ TASK_INIT(&w->work, 0, work_item_callback, w);
+
+ return (taskqueue_enqueue(wq->queue, &w->work));
+}
+
+/**
+ * @brief Rescind the offer by initiating a device removal
+ */
+static void
+vmbus_channel_process_rescind_offer(void *context)
+{
+ hv_vmbus_channel* channel = (hv_vmbus_channel*) context;
+ hv_vmbus_child_device_unregister(channel->device);
+}
+
+/**
+ * @brief Allocate and initialize a vmbus channel object
+ */
+hv_vmbus_channel*
+hv_vmbus_allocate_channel(void)
+{
+ hv_vmbus_channel* channel;
+
+ channel = (hv_vmbus_channel*) malloc(
+ sizeof(hv_vmbus_channel),
+ M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ KASSERT(channel != NULL, ("Error VMBUS: Failed to allocate channel!"));
+ if (channel == NULL)
+ return (NULL);
+
+ mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
+
+ channel->control_work_queue = hv_work_queue_create("control");
+
+ if (channel->control_work_queue == NULL) {
+ mtx_destroy(&channel->inbound_lock);
+ free(channel, M_DEVBUF);
+ return (NULL);
+ }
+
+ return (channel);
+}
+
+/**
+ * @brief Release the vmbus channel object itself
+ */
+static inline void
+ReleaseVmbusChannel(void *context)
+{
+ hv_vmbus_channel* channel = (hv_vmbus_channel*) context;
+ hv_work_queue_close(channel->control_work_queue);
+ free(channel, M_DEVBUF);
+}
+
+/**
+ * @brief Release the resources used by the vmbus channel object
+ */
+void
+hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
+{
+ mtx_destroy(&channel->inbound_lock);
+ /*
+ * We have to release the channel's workqueue/thread in
+ * the vmbus's workqueue/thread context
+ * ie we can't destroy ourselves
+ */
+ hv_queue_work_item(hv_vmbus_g_connection.work_queue,
+ ReleaseVmbusChannel, (void *) channel);
+}
+
+/**
+ * @brief Process the offer by creating a channel/device
+ * associated with this offer
+ */
+static void
+vmbus_channel_process_offer(void *context)
+{
+ int ret;
+ hv_vmbus_channel* new_channel;
+ boolean_t f_new;
+ hv_vmbus_channel* channel;
+
+ new_channel = (hv_vmbus_channel*) context;
+ f_new = TRUE;
+ channel = NULL;
+
+ /*
+ * Make sure this is a new offer
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+
+ TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
+ list_entry)
+ {
+ if (!memcmp(
+ &channel->offer_msg.offer.interface_type,
+ &new_channel->offer_msg.offer.interface_type,
+ sizeof(hv_guid))
+ && !memcmp(
+ &channel->offer_msg.offer.interface_instance,
+ &new_channel->offer_msg.offer.interface_instance,
+ sizeof(hv_guid))) {
+ f_new = FALSE;
+ break;
+ }
+ }
+
+ if (f_new) {
+ /* Insert at tail */
+ TAILQ_INSERT_TAIL(
+ &hv_vmbus_g_connection.channel_anchor,
+ new_channel,
+ list_entry);
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+
+ if (!f_new) {
+ hv_vmbus_free_vmbus_channel(new_channel);
+ return;
+ }
+
+ /*
+ * Start the process of binding this offer to the driver
+ * (We need to set the device field before calling
+ * hv_vmbus_child_device_add())
+ */
+ new_channel->device = hv_vmbus_child_device_create(
+ new_channel->offer_msg.offer.interface_type,
+ new_channel->offer_msg.offer.interface_instance, new_channel);
+
+ /*
+ * TODO - the HV_CHANNEL_OPEN_STATE flag should not be set below
+ * but in the "open" channel request. The ret != 0 logic below
+ * doesn't take into account that a channel
+ * may have been opened successfully
+ */
+
+ /*
+ * Add the new device to the bus. This will kick off device-driver
+ * binding which eventually invokes the device driver's AddDevice()
+ * method.
+ */
+ ret = hv_vmbus_child_device_register(new_channel->device);
+ if (ret != 0) {
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_anchor,
+ new_channel,
+ list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+ hv_vmbus_free_vmbus_channel(new_channel);
+ } else {
+ /*
+ * This state is used to indicate a successful open
+ * so that when we do close the channel normally,
+ * we can clean up properly
+ */
+ new_channel->state = HV_CHANNEL_OPEN_STATE;
+
+ }
+}
+
+/**
+ * @brief Handler for channel offers from Hyper-V/Azure
+ *
+ * Handler for channel offers from vmbus in parent partition. We ignore
+ * all offers except network and storage offers. For each network and storage
+ * offers, we create a channel object and queue a work item to the channel
+ * object to process the offer synchronously
+ */
+static void
+vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
+{
+ hv_vmbus_channel_offer_channel* offer;
+ hv_vmbus_channel* new_channel;
+
+ offer = (hv_vmbus_channel_offer_channel*) hdr;
+
+ hv_guid *guidType;
+ hv_guid *guidInstance;
+
+ guidType = &offer->offer.interface_type;
+ guidInstance = &offer->offer.interface_instance;
+
+ /* Allocate the channel object and save this offer */
+ new_channel = hv_vmbus_allocate_channel();
+ if (new_channel == NULL)
+ return;
+
+ memcpy(&new_channel->offer_msg, offer,
+ sizeof(hv_vmbus_channel_offer_channel));
+ new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
+ new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
+
+ /* TODO: Make sure the offer comes from our parent partition */
+ hv_queue_work_item(
+ new_channel->control_work_queue,
+ vmbus_channel_process_offer,
+ new_channel);
+}
+
+/**
+ * @brief Rescind offer handler.
+ *
+ * We queue a work item to process this offer
+ * synchronously
+ */
+static void
+vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr)
+{
+ hv_vmbus_channel_rescind_offer* rescind;
+ hv_vmbus_channel* channel;
+
+ rescind = (hv_vmbus_channel_rescind_offer*) hdr;
+
+ channel = hv_vmbus_get_channel_from_rel_id(rescind->child_rel_id);
+ if (channel == NULL)
+ return;
+
+ hv_queue_work_item(channel->control_work_queue,
+ vmbus_channel_process_rescind_offer, channel);
+}
+
+/**
+ *
+ * @brief Invoked when all offers have been delivered.
+ */
+static void
+vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr)
+{
+}
+
+/**
+ * @brief Open result handler.
+ *
+ * This is invoked when we received a response
+ * to our channel open request. Find the matching request, copy the
+ * response and signal the requesting thread.
+ */
+static void
+vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr)
+{
+ hv_vmbus_channel_open_result* result;
+ hv_vmbus_channel_msg_info* msg_info;
+ hv_vmbus_channel_msg_header* requestHeader;
+ hv_vmbus_channel_open_channel* openMsg;
+
+ result = (hv_vmbus_channel_open_result*) hdr;
+
+ /*
+ * Find the open msg, copy the result and signal/unblock the wait event
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_list_entry) {
+ requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
+
+ if (requestHeader->message_type ==
+ HV_CHANNEL_MESSAGE_OPEN_CHANNEL) {
+ openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg;
+ if (openMsg->child_rel_id == result->child_rel_id
+ && openMsg->open_id == result->open_id) {
+ memcpy(&msg_info->response.open_result, result,
+ sizeof(hv_vmbus_channel_open_result));
+ sema_post(&msg_info->wait_sema);
+ break;
+ }
+ }
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+}
+
+/**
+ * @brief GPADL created handler.
+ *
+ * This is invoked when we received a response
+ * to our gpadl create request. Find the matching request, copy the
+ * response and signal the requesting thread.
+ */
+static void
+vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr)
+{
+ hv_vmbus_channel_gpadl_created* gpadl_created;
+ hv_vmbus_channel_msg_info* msg_info;
+ hv_vmbus_channel_msg_header* request_header;
+ hv_vmbus_channel_gpadl_header* gpadl_header;
+
+ gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr;
+
+ /* Find the establish msg, copy the result and signal/unblock
+ * the wait event
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_list_entry) {
+ request_header = (hv_vmbus_channel_msg_header*) msg_info->msg;
+ if (request_header->message_type ==
+ HV_CHANNEL_MESSAGEL_GPADL_HEADER) {
+ gpadl_header =
+ (hv_vmbus_channel_gpadl_header*) request_header;
+
+ if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id)
+ && (gpadl_created->gpadl == gpadl_header->gpadl)) {
+ memcpy(&msg_info->response.gpadl_created,
+ gpadl_created,
+ sizeof(hv_vmbus_channel_gpadl_created));
+ sema_post(&msg_info->wait_sema);
+ break;
+ }
+ }
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+}
+
+/**
+ * @brief GPADL torndown handler.
+ *
+ * This is invoked when we received a respons
+ * to our gpadl teardown request. Find the matching request, copy the
+ * response and signal the requesting thread
+ */
+static void
+vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr)
+{
+ hv_vmbus_channel_gpadl_torndown* gpadl_torndown;
+ hv_vmbus_channel_msg_info* msg_info;
+ hv_vmbus_channel_msg_header* requestHeader;
+ hv_vmbus_channel_gpadl_teardown* gpadlTeardown;
+
+ gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr;
+
+ /*
+ * Find the open msg, copy the result and signal/unblock the
+ * wait event.
+ */
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_list_entry) {
+ requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
+
+ if (requestHeader->message_type
+ == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) {
+ gpadlTeardown =
+ (hv_vmbus_channel_gpadl_teardown*) requestHeader;
+
+ if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) {
+ memcpy(&msg_info->response.gpadl_torndown,
+ gpadl_torndown,
+ sizeof(hv_vmbus_channel_gpadl_torndown));
+ sema_post(&msg_info->wait_sema);
+ break;
+ }
+ }
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+}
+
+/**
+ * @brief Version response handler.
+ *
+ * This is invoked when we received a response
+ * to our initiate contact request. Find the matching request, copy th
+ * response and signal the requesting thread.
+ */
+static void
+vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
+{
+ hv_vmbus_channel_msg_info* msg_info;
+ hv_vmbus_channel_msg_header* requestHeader;
+ hv_vmbus_channel_initiate_contact* initiate;
+ hv_vmbus_channel_version_response* versionResponse;
+
+ versionResponse = (hv_vmbus_channel_version_response*)hdr;
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_list_entry) {
+ requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
+ if (requestHeader->message_type
+ == HV_CHANNEL_MESSAGE_INITIATED_CONTACT) {
+ initiate =
+ (hv_vmbus_channel_initiate_contact*) requestHeader;
+ memcpy(&msg_info->response.version_response,
+ versionResponse,
+ sizeof(hv_vmbus_channel_version_response));
+ sema_post(&msg_info->wait_sema);
+ }
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+}
+
+/**
+ * @brief Handler for channel protocol messages.
+ *
+ * This is invoked in the vmbus worker thread context.
+ */
+void
+hv_vmbus_on_channel_message(void *context)
+{
+ hv_vmbus_message* msg;
+ hv_vmbus_channel_msg_header* hdr;
+ int size;
+
+ msg = (hv_vmbus_message*) context;
+ hdr = (hv_vmbus_channel_msg_header*) msg->u.payload;
+ size = msg->header.payload_size;
+
+ if (hdr->message_type >= HV_CHANNEL_MESSAGE_COUNT) {
+ free(msg, M_DEVBUF);
+ return;
+ }
+
+ if (g_channel_message_table[hdr->message_type].messageHandler) {
+ g_channel_message_table[hdr->message_type].messageHandler(hdr);
+ }
+
+ /* Free the msg that was allocated in VmbusOnMsgDPC() */
+ free(msg, M_DEVBUF);
+}
+
+/**
+ * @brief Send a request to get all our pending offers.
+ */
+int
+hv_vmbus_request_channel_offers(void)
+{
+ int ret;
+ hv_vmbus_channel_msg_header* msg;
+ hv_vmbus_channel_msg_info* msg_info;
+
+ msg_info = (hv_vmbus_channel_msg_info *)
+ malloc(sizeof(hv_vmbus_channel_msg_info)
+ + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT);
+
+ if (msg_info == NULL) {
+ if(bootverbose)
+ printf("Error VMBUS: malloc failed for Request Offers\n");
+ return (ENOMEM);
+ }
+
+ msg = (hv_vmbus_channel_msg_header*) msg_info->msg;
+ msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS;
+
+ ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header));
+
+ if (msg_info)
+ free(msg_info, M_DEVBUF);
+
+ return (ret);
+}
+
+/**
+ * @brief Release channels that are unattached/unconnected (i.e., no drivers associated)
+ */
+void
+hv_vmbus_release_unattached_channels(void)
+{
+ hv_vmbus_channel *channel;
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+
+ while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
+ channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
+ TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
+ channel, list_entry);
+
+ hv_vmbus_child_device_unregister(channel->device);
+ hv_vmbus_free_vmbus_channel(channel);
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+}
diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c
new file mode 100644
index 0000000..c8e0b48
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_connection.c
@@ -0,0 +1,431 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <machine/bus.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include "hv_vmbus_priv.h"
+
+/*
+ * Globals
+ */
+hv_vmbus_connection hv_vmbus_g_connection =
+ { .connect_state = HV_DISCONNECTED,
+ .next_gpadl_handle = 0xE1E10, };
+
+/**
+ * Send a connect request on the partition service connection
+ */
+int
+hv_vmbus_connect(void) {
+ int ret = 0;
+ hv_vmbus_channel_msg_info* msg_info = NULL;
+ hv_vmbus_channel_initiate_contact* msg;
+
+ /**
+ * Make sure we are not connecting or connected
+ */
+ if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
+ return (-1);
+ }
+
+ /**
+ * Initialize the vmbus connection
+ */
+ hv_vmbus_g_connection.connect_state = HV_CONNECTING;
+ hv_vmbus_g_connection.work_queue = hv_work_queue_create("vmbusQ");
+ sema_init(&hv_vmbus_g_connection.control_sema, 1, "control_sema");
+
+ TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
+ mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
+ NULL, MTX_SPIN);
+
+ TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
+ mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
+ NULL, MTX_SPIN);
+
+ /**
+ * Setup the vmbus event connection for channel interrupt abstraction
+ * stuff
+ */
+ hv_vmbus_g_connection.interrupt_page = contigmalloc(
+ PAGE_SIZE, M_DEVBUF,
+ M_NOWAIT | M_ZERO, 0UL,
+ BUS_SPACE_MAXADDR,
+ PAGE_SIZE, 0);
+ KASSERT(hv_vmbus_g_connection.interrupt_page != NULL,
+ ("Error VMBUS: malloc failed to allocate Channel"
+ " Request Event message!"));
+ if (hv_vmbus_g_connection.interrupt_page == NULL) {
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ hv_vmbus_g_connection.recv_interrupt_page =
+ hv_vmbus_g_connection.interrupt_page;
+
+ hv_vmbus_g_connection.send_interrupt_page =
+ ((uint8_t *) hv_vmbus_g_connection.interrupt_page +
+ (PAGE_SIZE >> 1));
+
+ /**
+ * Set up the monitor notification facility. The 1st page for
+ * parent->child and the 2nd page for child->parent
+ */
+ hv_vmbus_g_connection.monitor_pages = contigmalloc(
+ 2 * PAGE_SIZE,
+ M_DEVBUF,
+ M_NOWAIT | M_ZERO,
+ 0UL,
+ BUS_SPACE_MAXADDR,
+ PAGE_SIZE,
+ 0);
+ KASSERT(hv_vmbus_g_connection.monitor_pages != NULL,
+ ("Error VMBUS: malloc failed to allocate Monitor Pages!"));
+ if (hv_vmbus_g_connection.monitor_pages == NULL) {
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ msg_info = (hv_vmbus_channel_msg_info*)
+ malloc(sizeof(hv_vmbus_channel_msg_info) +
+ sizeof(hv_vmbus_channel_initiate_contact),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(msg_info != NULL,
+ ("Error VMBUS: malloc failed for Initiate Contact message!"));
+ if (msg_info == NULL) {
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
+ msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
+
+ msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
+ msg->vmbus_version_requested = HV_VMBUS_REVISION_NUMBER;
+
+ msg->interrupt_page = hv_get_phys_addr(
+ hv_vmbus_g_connection.interrupt_page);
+
+ msg->monitor_page_1 = hv_get_phys_addr(
+ hv_vmbus_g_connection.monitor_pages);
+
+ msg->monitor_page_2 =
+ hv_get_phys_addr(
+ ((uint8_t *) hv_vmbus_g_connection.monitor_pages
+ + PAGE_SIZE));
+
+ /**
+ * Add to list before we send the request since we may receive the
+ * response before returning from this routine
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ TAILQ_INSERT_TAIL(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info,
+ msg_list_entry);
+
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ ret = hv_vmbus_post_message(
+ msg,
+ sizeof(hv_vmbus_channel_initiate_contact));
+
+ if (ret != 0) {
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info,
+ msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ goto cleanup;
+ }
+
+ /**
+ * Wait for the connection response
+ */
+ ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info,
+ msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ /**
+ * Check if successful
+ */
+ if (msg_info->response.version_response.version_supported) {
+ hv_vmbus_g_connection.connect_state = HV_CONNECTED;
+ } else {
+ ret = ECONNREFUSED;
+ goto cleanup;
+ }
+
+ sema_destroy(&msg_info->wait_sema);
+ free(msg_info, M_DEVBUF);
+
+ return (0);
+
+ /*
+ * Cleanup after failure!
+ */
+ cleanup:
+
+ hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
+
+ hv_work_queue_close(hv_vmbus_g_connection.work_queue);
+ sema_destroy(&hv_vmbus_g_connection.control_sema);
+ mtx_destroy(&hv_vmbus_g_connection.channel_lock);
+ mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
+
+ if (hv_vmbus_g_connection.interrupt_page != NULL) {
+ contigfree(
+ hv_vmbus_g_connection.interrupt_page,
+ PAGE_SIZE,
+ M_DEVBUF);
+ hv_vmbus_g_connection.interrupt_page = NULL;
+ }
+
+ if (hv_vmbus_g_connection.monitor_pages != NULL) {
+ contigfree(
+ hv_vmbus_g_connection.monitor_pages,
+ 2 * PAGE_SIZE,
+ M_DEVBUF);
+ hv_vmbus_g_connection.monitor_pages = NULL;
+ }
+
+ if (msg_info) {
+ sema_destroy(&msg_info->wait_sema);
+ free(msg_info, M_DEVBUF);
+ }
+
+ return (ret);
+}
+
+/**
+ * Send a disconnect request on the partition service connection
+ */
+int
+hv_vmbus_disconnect(void) {
+ int ret = 0;
+ hv_vmbus_channel_unload* msg;
+
+ msg = malloc(sizeof(hv_vmbus_channel_unload),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(msg != NULL,
+ ("Error VMBUS: malloc failed to allocate Channel Unload Msg!"));
+ if (msg == NULL)
+ return (ENOMEM);
+
+ msg->message_type = HV_CHANNEL_MESSAGE_UNLOAD;
+
+ ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_unload));
+
+
+ contigfree(hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF);
+
+ mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
+
+ hv_work_queue_close(hv_vmbus_g_connection.work_queue);
+ sema_destroy(&hv_vmbus_g_connection.control_sema);
+
+ hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
+
+ free(msg, M_DEVBUF);
+
+ return (ret);
+}
+
+/**
+ * Get the channel object given its child relative id (ie channel id)
+ */
+hv_vmbus_channel*
+hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) {
+
+ hv_vmbus_channel* channel;
+ hv_vmbus_channel* foundChannel = NULL;
+
+ /*
+ * TODO:
+ * Consider optimization where relids are stored in a fixed size array
+ * and channels are accessed without the need to take this lock or search
+ * the list.
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+ TAILQ_FOREACH(channel,
+ &hv_vmbus_g_connection.channel_anchor, list_entry) {
+
+ if (channel->offer_msg.child_rel_id == rel_id) {
+ foundChannel = channel;
+ break;
+ }
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+
+ return (foundChannel);
+}
+
+/**
+ * Process a channel event notification
+ */
+static void
+VmbusProcessChannelEvent(uint32_t relid)
+{
+ hv_vmbus_channel* channel;
+
+ /**
+ * Find the channel based on this relid and invokes
+ * the channel callback to process the event
+ */
+
+ channel = hv_vmbus_get_channel_from_rel_id(relid);
+
+ if (channel == NULL) {
+ return;
+ }
+ /**
+ * To deal with the race condition where we might
+ * receive a packet while the relevant driver is
+ * being unloaded, dispatch the callback while
+ * holding the channel lock. The unloading driver
+ * will acquire the same channel lock to set the
+ * callback to NULL. This closes the window.
+ */
+
+ mtx_lock(&channel->inbound_lock);
+ if (channel->on_channel_callback != NULL) {
+ channel->on_channel_callback(channel->channel_callback_context);
+ }
+ mtx_unlock(&channel->inbound_lock);
+}
+
+/**
+ * Handler for events
+ */
+void
+hv_vmbus_on_events(void *arg)
+{
+ int dword;
+ int bit;
+ int rel_id;
+ int maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
+ /* int maxdword = PAGE_SIZE >> 3; */
+
+ /*
+ * receive size is 1/2 page and divide that by 4 bytes
+ */
+
+ uint32_t* recv_interrupt_page =
+ hv_vmbus_g_connection.recv_interrupt_page;
+
+ /*
+ * Check events
+ */
+ if (recv_interrupt_page != NULL) {
+ for (dword = 0; dword < maxdword; dword++) {
+ if (recv_interrupt_page[dword]) {
+ for (bit = 0; bit < 32; bit++) {
+ if (synch_test_and_clear_bit(bit,
+ (uint32_t *) &recv_interrupt_page[dword])) {
+ rel_id = (dword << 5) + bit;
+ if (rel_id == 0) {
+ /*
+ * Special case -
+ * vmbus channel protocol msg.
+ */
+ continue;
+ } else {
+ VmbusProcessChannelEvent(rel_id);
+
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return;
+}
+
+/**
+ * Send a msg on the vmbus's message connection
+ */
+int hv_vmbus_post_message(void *buffer, size_t bufferLen) {
+ int ret = 0;
+ hv_vmbus_connection_id connId;
+ unsigned retries = 0;
+
+ /* NetScaler delays from previous code were consolidated here */
+ static int delayAmount[] = {100, 100, 100, 500, 500, 5000, 5000, 5000};
+
+ /* for(each entry in delayAmount) try to post message,
+ * delay a little bit before retrying
+ */
+ for (retries = 0;
+ retries < sizeof(delayAmount)/sizeof(delayAmount[0]); retries++) {
+ connId.as_uint32_t = 0;
+ connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
+ ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer, bufferLen);
+ if (ret != HV_STATUS_INSUFFICIENT_BUFFERS)
+ break;
+ /* TODO: KYS We should use a blocking wait call */
+ DELAY(delayAmount[retries]);
+ }
+
+ KASSERT(ret == 0, ("Error VMBUS: Message Post Failed\n"));
+
+ return (ret);
+}
+
+/**
+ * Send an event notification to the parent
+ */
+int
+hv_vmbus_set_event(uint32_t child_rel_id) {
+ int ret = 0;
+
+ /* Each uint32_t represents 32 channels */
+
+ synch_set_bit(child_rel_id & 31,
+ (((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
+ + (child_rel_id >> 5))));
+ ret = hv_vmbus_signal_event();
+
+ return (ret);
+}
+
diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c
new file mode 100644
index 0000000..0e73bdc
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_hv.c
@@ -0,0 +1,515 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Implements low-level interactions with Hypver-V/Azure
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/timetc.h>
+#include <machine/bus.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+
+#include "hv_vmbus_priv.h"
+
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+#define HV_X64_CPUID_MIN 0x40000005
+#define HV_X64_CPUID_MAX 0x4000ffff
+#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+
+#define HV_NANOSECONDS_PER_SEC 1000000000L
+
+
+static u_int hv_get_timecount(struct timecounter *tc);
+static u_int hv_get_timecount(struct timecounter *tc);
+
+static inline void do_cpuid_inline(unsigned int op, unsigned int *eax,
+ unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
+ __asm__ __volatile__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx),
+ "=d" (*edx) : "0" (op), "c" (ecx));
+}
+
+/**
+ * Globals
+ */
+hv_vmbus_context hv_vmbus_g_context = {
+ .syn_ic_initialized = FALSE,
+ .hypercall_page = NULL,
+ .signal_event_param = NULL,
+ .signal_event_buffer = NULL,
+};
+
+static struct timecounter hv_timecounter = {
+ hv_get_timecount, 0, ~0u, HV_NANOSECONDS_PER_SEC/100, "Hyper-V", HV_NANOSECONDS_PER_SEC/100
+};
+
+static u_int
+hv_get_timecount(struct timecounter *tc)
+{
+ u_int now = hv_vmbus_read_msr(HV_X64_MSR_TIME_REF_COUNT);
+ return (now);
+}
+
+/**
+ * @brief Query the cpuid for presence of windows hypervisor
+ */
+int
+hv_vmbus_query_hypervisor_presence(void)
+{
+ u_int regs[4];
+ int hyper_v_detected = 0;
+ do_cpuid(1, regs);
+ if (regs[2] & 0x80000000) { /* if(a hypervisor is detected) */
+ /* make sure this really is Hyper-V */
+ /* we look at the CPUID info */
+ do_cpuid(HV_X64_MSR_GUEST_OS_ID, regs);
+ hyper_v_detected =
+ regs[0] >= HV_X64_CPUID_MIN &&
+ regs[0] <= HV_X64_CPUID_MAX &&
+ !memcmp("Microsoft Hv", &regs[1], 12);
+ }
+ return (hyper_v_detected);
+}
+
+/**
+ * @brief Get version of the windows hypervisor
+ */
+static int
+hv_vmbus_get_hypervisor_version(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ unsigned int maxLeaf;
+ unsigned int op;
+
+ /*
+ * Its assumed that this is called after confirming that
+ * Viridian is present
+ * Query id and revision.
+ */
+ eax = 0;
+ ebx = 0;
+ ecx = 0;
+ edx = 0;
+ op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION;
+ do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
+
+ maxLeaf = eax;
+ eax = 0;
+ ebx = 0;
+ ecx = 0;
+ edx = 0;
+ op = HV_CPU_ID_FUNCTION_HV_INTERFACE;
+ do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
+
+ if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_VERSION) {
+ eax = 0;
+ ebx = 0;
+ ecx = 0;
+ edx = 0;
+ op = HV_CPU_ID_FUNCTION_MS_HV_VERSION;
+ do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
+ }
+ return (maxLeaf);
+}
+
+/**
+ * @brief Invoke the specified hypercall
+ */
+static uint64_t
+hv_vmbus_do_hypercall(uint64_t control, void* input, void* output)
+{
+#ifdef __x86_64__
+ uint64_t hv_status = 0;
+ uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
+ uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
+ volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page;
+
+ __asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_address): "r8");
+ __asm__ __volatile__ ("call *%3" : "=a"(hv_status):
+ "c" (control), "d" (input_address),
+ "m" (hypercall_page));
+ return (hv_status);
+#else
+ uint32_t control_high = control >> 32;
+ uint32_t control_low = control & 0xFFFFFFFF;
+ uint32_t hv_status_high = 1;
+ uint32_t hv_status_low = 1;
+ uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
+ uint32_t input_address_high = input_address >> 32;
+ uint32_t input_address_low = input_address & 0xFFFFFFFF;
+ uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
+ uint32_t output_address_high = output_address >> 32;
+ uint32_t output_address_low = output_address & 0xFFFFFFFF;
+ volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page;
+
+ __asm__ __volatile__ ("call *%8" : "=d"(hv_status_high),
+ "=a"(hv_status_low) : "d" (control_high),
+ "a" (control_low), "b" (input_address_high),
+ "c" (input_address_low),
+ "D"(output_address_high),
+ "S"(output_address_low), "m" (hypercall_page));
+ return (hv_status_low | ((uint64_t)hv_status_high << 32));
+#endif /* __x86_64__ */
+}
+
+/**
+ * @brief Main initialization routine.
+ *
+ * This routine must be called
+ * before any other routines in here are called
+ */
+int
+hv_vmbus_init(void)
+{
+ int max_leaf;
+ hv_vmbus_x64_msr_hypercall_contents hypercall_msr;
+ void* virt_addr = 0;
+
+ memset(
+ hv_vmbus_g_context.syn_ic_event_page,
+ 0,
+ sizeof(hv_vmbus_handle) * MAXCPU);
+
+ memset(
+ hv_vmbus_g_context.syn_ic_msg_page,
+ 0,
+ sizeof(hv_vmbus_handle) * MAXCPU);
+
+ if (!hv_vmbus_query_hypervisor_presence())
+ goto cleanup;
+
+ max_leaf = hv_vmbus_get_hypervisor_version();
+
+ /*
+ * Write our OS info
+ */
+ uint64_t os_guest_info = HV_FREEBSD_GUEST_ID;
+ hv_vmbus_write_msr(HV_X64_MSR_GUEST_OS_ID, os_guest_info);
+ hv_vmbus_g_context.guest_id = os_guest_info;
+
+ /*
+ * See if the hypercall page is already set
+ */
+ hypercall_msr.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_HYPERCALL);
+ virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(virt_addr != NULL,
+ ("Error VMBUS: malloc failed to allocate page during init!"));
+ if (virt_addr == NULL)
+ goto cleanup;
+
+ hypercall_msr.enable = 1;
+ hypercall_msr.guest_physical_address =
+ (hv_get_phys_addr(virt_addr) >> PAGE_SHIFT);
+ hv_vmbus_write_msr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t);
+
+ /*
+ * Confirm that hypercall page did get set up
+ */
+ hypercall_msr.as_uint64_t = 0;
+ hypercall_msr.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_HYPERCALL);
+
+ if (!hypercall_msr.enable)
+ goto cleanup;
+
+ hv_vmbus_g_context.hypercall_page = virt_addr;
+
+ /*
+ * Setup the global signal event param for the signal event hypercall
+ */
+ hv_vmbus_g_context.signal_event_buffer =
+ malloc(sizeof(hv_vmbus_input_signal_event_buffer), M_DEVBUF,
+ M_ZERO | M_NOWAIT);
+ KASSERT(hv_vmbus_g_context.signal_event_buffer != NULL,
+ ("Error VMBUS: Failed to allocate signal_event_buffer\n"));
+ if (hv_vmbus_g_context.signal_event_buffer == NULL)
+ goto cleanup;
+
+ hv_vmbus_g_context.signal_event_param =
+ (hv_vmbus_input_signal_event*)
+ (HV_ALIGN_UP((unsigned long)
+ hv_vmbus_g_context.signal_event_buffer,
+ HV_HYPERCALL_PARAM_ALIGN));
+ hv_vmbus_g_context.signal_event_param->connection_id.as_uint32_t = 0;
+ hv_vmbus_g_context.signal_event_param->connection_id.u.id =
+ HV_VMBUS_EVENT_CONNECTION_ID;
+ hv_vmbus_g_context.signal_event_param->flag_number = 0;
+ hv_vmbus_g_context.signal_event_param->rsvd_z = 0;
+
+ tc_init(&hv_timecounter); /* register virtual timecount */
+
+ return (0);
+
+ cleanup:
+ if (virt_addr != NULL) {
+ if (hypercall_msr.enable) {
+ hypercall_msr.as_uint64_t = 0;
+ hv_vmbus_write_msr(HV_X64_MSR_HYPERCALL,
+ hypercall_msr.as_uint64_t);
+ }
+
+ free(virt_addr, M_DEVBUF);
+ }
+ return (ENOTSUP);
+}
+
+/**
+ * @brief Cleanup routine, called normally during driver unloading or exiting
+ */
+void
+hv_vmbus_cleanup(void)
+{
+ hv_vmbus_x64_msr_hypercall_contents hypercall_msr;
+
+ if (hv_vmbus_g_context.signal_event_buffer != NULL) {
+ free(hv_vmbus_g_context.signal_event_buffer, M_DEVBUF);
+ hv_vmbus_g_context.signal_event_buffer = NULL;
+ hv_vmbus_g_context.signal_event_param = NULL;
+ }
+
+ if (hv_vmbus_g_context.guest_id == HV_FREEBSD_GUEST_ID) {
+ if (hv_vmbus_g_context.hypercall_page != NULL) {
+ hypercall_msr.as_uint64_t = 0;
+ hv_vmbus_write_msr(HV_X64_MSR_HYPERCALL,
+ hypercall_msr.as_uint64_t);
+ free(hv_vmbus_g_context.hypercall_page, M_DEVBUF);
+ hv_vmbus_g_context.hypercall_page = NULL;
+ }
+ }
+}
+
+/**
+ * @brief Post a message using the hypervisor message IPC.
+ * (This involves a hypercall.)
+ */
+hv_vmbus_status
+hv_vmbus_post_msg_via_msg_ipc(
+ hv_vmbus_connection_id connection_id,
+ hv_vmbus_msg_type message_type,
+ void* payload,
+ size_t payload_size)
+{
+ struct alignedinput {
+ uint64_t alignment8;
+ hv_vmbus_input_post_message msg;
+ };
+
+ hv_vmbus_input_post_message* aligned_msg;
+ hv_vmbus_status status;
+ size_t addr;
+
+ if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
+ return (EMSGSIZE);
+
+ addr = (size_t) malloc(sizeof(struct alignedinput), M_DEVBUF,
+ M_ZERO | M_NOWAIT);
+ KASSERT(addr != 0,
+ ("Error VMBUS: malloc failed to allocate message buffer!"));
+ if (addr == 0)
+ return (ENOMEM);
+
+ aligned_msg = (hv_vmbus_input_post_message*)
+ (HV_ALIGN_UP(addr, HV_HYPERCALL_PARAM_ALIGN));
+
+ aligned_msg->connection_id = connection_id;
+ aligned_msg->message_type = message_type;
+ aligned_msg->payload_size = payload_size;
+ memcpy((void*) aligned_msg->payload, payload, payload_size);
+
+ status = hv_vmbus_do_hypercall(
+ HV_CALL_POST_MESSAGE, aligned_msg, 0) & 0xFFFF;
+
+ free((void *) addr, M_DEVBUF);
+ return (status);
+}
+
+/**
+ * @brief Signal an event on the specified connection using the hypervisor
+ * event IPC. (This involves a hypercall.)
+ */
+hv_vmbus_status
+hv_vmbus_signal_event()
+{
+ hv_vmbus_status status;
+
+ status = hv_vmbus_do_hypercall(
+ HV_CALL_SIGNAL_EVENT,
+ hv_vmbus_g_context.signal_event_param,
+ 0) & 0xFFFF;
+
+ return (status);
+}
+
+/**
+ * @brief hv_vmbus_synic_init
+ */
+void
+hv_vmbus_synic_init(void *irq_arg)
+
+{
+ int cpu;
+ uint32_t irq_vector;
+ hv_vmbus_synic_simp simp;
+ hv_vmbus_synic_siefp siefp;
+ hv_vmbus_synic_scontrol sctrl;
+ hv_vmbus_synic_sint shared_sint;
+ uint64_t version;
+
+ irq_vector = *((uint32_t *) (irq_arg));
+ cpu = PCPU_GET(cpuid);
+
+ if (hv_vmbus_g_context.hypercall_page == NULL)
+ return;
+
+ /*
+ * KYS: Looks like we can only initialize on cpu0; don't we support
+ * SMP guests?
+ *
+ * TODO: Need to add SMP support for FreeBSD V9
+ */
+
+ if (cpu != 0)
+ return;
+
+ /*
+ * TODO: Check the version
+ */
+ version = hv_vmbus_read_msr(HV_X64_MSR_SVERSION);
+
+ hv_vmbus_g_context.syn_ic_msg_page[cpu] =
+ malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(hv_vmbus_g_context.syn_ic_msg_page[cpu] != NULL,
+ ("Error VMBUS: malloc failed for allocating page!"));
+ if (hv_vmbus_g_context.syn_ic_msg_page[cpu] == NULL)
+ goto cleanup;
+
+ hv_vmbus_g_context.syn_ic_event_page[cpu] =
+ malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(hv_vmbus_g_context.syn_ic_event_page[cpu] != NULL,
+ ("Error VMBUS: malloc failed to allocate page!"));
+ if (hv_vmbus_g_context.syn_ic_event_page[cpu] == NULL)
+ goto cleanup;
+
+ /*
+ * Setup the Synic's message page
+ */
+
+ simp.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SIMP);
+ simp.simp_enabled = 1;
+ simp.base_simp_gpa = ((hv_get_phys_addr(
+ hv_vmbus_g_context.syn_ic_msg_page[cpu])) >> PAGE_SHIFT);
+
+ hv_vmbus_write_msr(HV_X64_MSR_SIMP, simp.as_uint64_t);
+
+ /*
+ * Setup the Synic's event page
+ */
+ siefp.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SIEFP);
+ siefp.siefp_enabled = 1;
+ siefp.base_siefp_gpa = ((hv_get_phys_addr(
+ hv_vmbus_g_context.syn_ic_event_page[cpu])) >> PAGE_SHIFT);
+
+ hv_vmbus_write_msr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
+
+ shared_sint.vector = irq_vector; /*HV_SHARED_SINT_IDT_VECTOR + 0x20; */
+ shared_sint.masked = FALSE;
+ shared_sint.auto_eoi = FALSE;
+
+ hv_vmbus_write_msr(
+ HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
+ shared_sint.as_uint64_t);
+
+ /* Enable the global synic bit */
+ sctrl.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SCONTROL);
+ sctrl.enable = 1;
+
+ hv_vmbus_write_msr(HV_X64_MSR_SCONTROL, sctrl.as_uint64_t);
+
+ hv_vmbus_g_context.syn_ic_initialized = TRUE;
+
+ return;
+
+ cleanup:
+
+ free(hv_vmbus_g_context.syn_ic_msg_page[cpu], M_DEVBUF);
+ free(hv_vmbus_g_context.syn_ic_msg_page[cpu], M_DEVBUF);
+}
+
+/**
+ * @brief Cleanup routine for hv_vmbus_synic_init()
+ */
+void hv_vmbus_synic_cleanup(void *arg)
+{
+ hv_vmbus_synic_sint shared_sint;
+ hv_vmbus_synic_simp simp;
+ hv_vmbus_synic_siefp siefp;
+ int cpu = PCPU_GET(cpuid);
+
+ if (!hv_vmbus_g_context.syn_ic_initialized)
+ return;
+
+ if (cpu != 0)
+ return; /* TODO: XXXKYS: SMP? */
+
+ shared_sint.as_uint64_t = hv_vmbus_read_msr(
+ HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT);
+
+ shared_sint.masked = 1;
+
+ /*
+ * Disable the interrupt
+ */
+ hv_vmbus_write_msr(
+ HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
+ shared_sint.as_uint64_t);
+
+ simp.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SIMP);
+ simp.simp_enabled = 0;
+ simp.base_simp_gpa = 0;
+
+ hv_vmbus_write_msr(HV_X64_MSR_SIMP, simp.as_uint64_t);
+
+ siefp.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SIEFP);
+ siefp.siefp_enabled = 0;
+ siefp.base_siefp_gpa = 0;
+
+ hv_vmbus_write_msr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
+
+ contigfree(hv_vmbus_g_context.syn_ic_msg_page[cpu],
+ PAGE_SIZE, M_DEVBUF);
+ contigfree(hv_vmbus_g_context.syn_ic_event_page[cpu],
+ PAGE_SIZE, M_DEVBUF);
+}
+
diff --git a/sys/dev/hyperv/vmbus/hv_ring_buffer.c b/sys/dev/hyperv/vmbus/hv_ring_buffer.c
new file mode 100644
index 0000000..f7c1965
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_ring_buffer.c
@@ -0,0 +1,440 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include "hv_vmbus_priv.h"
+
+/* Amount of space to write to */
+#define HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \
+ ((z) - ((w) - (r))):((r) - (w))
+
+/**
+ * @brief Get number of bytes available to read and to write to
+ * for the specified ring buffer
+ */
+static inline void
+get_ring_buffer_avail_bytes(
+ hv_vmbus_ring_buffer_info* rbi,
+ uint32_t* read,
+ uint32_t* write)
+{
+ uint32_t read_loc, write_loc;
+
+ /*
+ * Capture the read/write indices before they changed
+ */
+ read_loc = rbi->ring_buffer->read_index;
+ write_loc = rbi->ring_buffer->write_index;
+
+ *write = HV_BYTES_AVAIL_TO_WRITE(
+ read_loc, write_loc, rbi->ring_data_size);
+ *read = rbi->ring_data_size - *write;
+}
+
+/**
+ * @brief Get the next write location for the specified ring buffer
+ */
+static inline uint32_t
+get_next_write_location(hv_vmbus_ring_buffer_info* ring_info)
+{
+ uint32_t next = ring_info->ring_buffer->write_index;
+ return (next);
+}
+
+/**
+ * @brief Set the next write location for the specified ring buffer
+ */
+static inline void
+set_next_write_location(
+ hv_vmbus_ring_buffer_info* ring_info,
+ uint32_t next_write_location)
+{
+ ring_info->ring_buffer->write_index = next_write_location;
+}
+
+/**
+ * @brief Get the next read location for the specified ring buffer
+ */
+static inline uint32_t
+get_next_read_location(hv_vmbus_ring_buffer_info* ring_info)
+{
+ uint32_t next = ring_info->ring_buffer->read_index;
+ return (next);
+}
+
+/**
+ * @brief Get the next read location + offset for the specified ring buffer.
+ * This allows the caller to skip.
+ */
+static inline uint32_t
+get_next_read_location_with_offset(
+ hv_vmbus_ring_buffer_info* ring_info,
+ uint32_t offset)
+{
+ uint32_t next = ring_info->ring_buffer->read_index;
+ next += offset;
+ next %= ring_info->ring_data_size;
+ return (next);
+}
+
+/**
+ * @brief Set the next read location for the specified ring buffer
+ */
+static inline void
+set_next_read_location(
+ hv_vmbus_ring_buffer_info* ring_info,
+ uint32_t next_read_location)
+{
+ ring_info->ring_buffer->read_index = next_read_location;
+}
+
+/**
+ * @brief Get the start of the ring buffer
+ */
+static inline void *
+get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info)
+{
+ return (void *) ring_info->ring_buffer->buffer;
+}
+
+/**
+ * @brief Get the size of the ring buffer.
+ */
+static inline uint32_t
+get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info)
+{
+ return ring_info->ring_data_size;
+}
+
+/**
+ * Get the read and write indices as uint64_t of the specified ring buffer.
+ */
+static inline uint64_t
+get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info)
+{
+ return (uint64_t) ring_info->ring_buffer->write_index << 32;
+}
+
+static uint32_t copy_to_ring_buffer(
+ hv_vmbus_ring_buffer_info* ring_info,
+ uint32_t start_write_offset,
+ char* src,
+ uint32_t src_len);
+
+static uint32_t copy_from_ring_buffer(
+ hv_vmbus_ring_buffer_info* ring_info,
+ char* dest,
+ uint32_t dest_len,
+ uint32_t start_read_offset);
+
+
+/**
+ * @brief Get the interrupt mask for the specified ring buffer.
+ */
+uint32_t
+hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi)
+{
+ return rbi->ring_buffer->interrupt_mask;
+}
+
+/**
+ * @brief Initialize the ring buffer.
+ */
+int
+hv_vmbus_ring_buffer_init(
+ hv_vmbus_ring_buffer_info* ring_info,
+ void* buffer,
+ uint32_t buffer_len)
+{
+ memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info));
+
+ ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer;
+ ring_info->ring_buffer->read_index =
+ ring_info->ring_buffer->write_index = 0;
+
+ ring_info->ring_size = buffer_len;
+ ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer);
+
+ mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN);
+
+ return (0);
+}
+
+/**
+ * @brief Cleanup the ring buffer.
+ */
+void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info)
+{
+ mtx_destroy(&ring_info->ring_lock);
+}
+
+/**
+ * @brief Write to the ring buffer.
+ */
+int
+hv_ring_buffer_write(
+ hv_vmbus_ring_buffer_info* out_ring_info,
+ hv_vmbus_sg_buffer_list sg_buffers[],
+ uint32_t sg_buffer_count)
+{
+ int i = 0;
+ uint32_t byte_avail_to_write;
+ uint32_t byte_avail_to_read;
+ uint32_t total_bytes_to_write = 0;
+
+ volatile uint32_t next_write_location;
+ uint64_t prev_indices = 0;
+
+ for (i = 0; i < sg_buffer_count; i++) {
+ total_bytes_to_write += sg_buffers[i].length;
+ }
+
+ total_bytes_to_write += sizeof(uint64_t);
+
+ mtx_lock_spin(&out_ring_info->ring_lock);
+
+ get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read,
+ &byte_avail_to_write);
+
+ /*
+ * If there is only room for the packet, assume it is full.
+ * Otherwise, the next time around, we think the ring buffer
+ * is empty since the read index == write index
+ */
+
+ if (byte_avail_to_write <= total_bytes_to_write) {
+
+ mtx_unlock_spin(&out_ring_info->ring_lock);
+ return (EAGAIN);
+ }
+
+ /*
+ * Write to the ring buffer
+ */
+ next_write_location = get_next_write_location(out_ring_info);
+
+ for (i = 0; i < sg_buffer_count; i++) {
+ next_write_location = copy_to_ring_buffer(out_ring_info,
+ next_write_location, (char *) sg_buffers[i].data,
+ sg_buffers[i].length);
+ }
+
+ /*
+ * Set previous packet start
+ */
+ prev_indices = get_ring_buffer_indices(out_ring_info);
+
+ next_write_location = copy_to_ring_buffer(
+ out_ring_info, next_write_location,
+ (char *) &prev_indices, sizeof(uint64_t));
+
+ /*
+ * Make sure we flush all writes before updating the writeIndex
+ */
+ wmb();
+
+ /*
+ * Now, update the write location
+ */
+ set_next_write_location(out_ring_info, next_write_location);
+
+ mtx_unlock_spin(&out_ring_info->ring_lock);
+
+ return (0);
+}
+
+/**
+ * @brief Read without advancing the read index.
+ */
+int
+hv_ring_buffer_peek(
+ hv_vmbus_ring_buffer_info* in_ring_info,
+ void* buffer,
+ uint32_t buffer_len)
+{
+ uint32_t bytesAvailToWrite;
+ uint32_t bytesAvailToRead;
+ uint32_t nextReadLocation = 0;
+
+ mtx_lock_spin(&in_ring_info->ring_lock);
+
+ get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead,
+ &bytesAvailToWrite);
+
+ /*
+ * Make sure there is something to read
+ */
+ if (bytesAvailToRead < buffer_len) {
+ mtx_unlock_spin(&in_ring_info->ring_lock);
+ return (EAGAIN);
+ }
+
+ /*
+ * Convert to byte offset
+ */
+ nextReadLocation = get_next_read_location(in_ring_info);
+
+ nextReadLocation = copy_from_ring_buffer(
+ in_ring_info, (char *)buffer, buffer_len, nextReadLocation);
+
+ mtx_unlock_spin(&in_ring_info->ring_lock);
+
+ return (0);
+}
+
+/**
+ * @brief Read and advance the read index.
+ */
+int
+hv_ring_buffer_read(
+ hv_vmbus_ring_buffer_info* in_ring_info,
+ void* buffer,
+ uint32_t buffer_len,
+ uint32_t offset)
+{
+ uint32_t bytes_avail_to_write;
+ uint32_t bytes_avail_to_read;
+ uint32_t next_read_location = 0;
+ uint64_t prev_indices = 0;
+
+ if (buffer_len <= 0)
+ return (EINVAL);
+
+ mtx_lock_spin(&in_ring_info->ring_lock);
+
+ get_ring_buffer_avail_bytes(
+ in_ring_info, &bytes_avail_to_read,
+ &bytes_avail_to_write);
+
+ /*
+ * Make sure there is something to read
+ */
+ if (bytes_avail_to_read < buffer_len) {
+ mtx_unlock_spin(&in_ring_info->ring_lock);
+ return (EAGAIN);
+ }
+
+ next_read_location = get_next_read_location_with_offset(
+ in_ring_info,
+ offset);
+
+ next_read_location = copy_from_ring_buffer(
+ in_ring_info,
+ (char *) buffer,
+ buffer_len,
+ next_read_location);
+
+ next_read_location = copy_from_ring_buffer(
+ in_ring_info,
+ (char *) &prev_indices,
+ sizeof(uint64_t),
+ next_read_location);
+
+ /*
+ * Make sure all reads are done before we update the read index since
+ * the writer may start writing to the read area once the read index
+ * is updated.
+ */
+ wmb();
+
+ /*
+ * Update the read index
+ */
+ set_next_read_location(in_ring_info, next_read_location);
+
+ mtx_unlock_spin(&in_ring_info->ring_lock);
+
+ return (0);
+}
+
+/**
+ * @brief Helper routine to copy from source to ring buffer.
+ *
+ * Assume there is enough room. Handles wrap-around in dest case only!
+ */
+uint32_t
+copy_to_ring_buffer(
+ hv_vmbus_ring_buffer_info* ring_info,
+ uint32_t start_write_offset,
+ char* src,
+ uint32_t src_len)
+{
+ char *ring_buffer = get_ring_buffer(ring_info);
+ uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
+ uint32_t fragLen;
+
+ if (src_len > ring_buffer_size - start_write_offset) {
+ /* wrap-around detected! */
+ fragLen = ring_buffer_size - start_write_offset;
+ memcpy(ring_buffer + start_write_offset, src, fragLen);
+ memcpy(ring_buffer, src + fragLen, src_len - fragLen);
+ } else {
+ memcpy(ring_buffer + start_write_offset, src, src_len);
+ }
+
+ start_write_offset += src_len;
+ start_write_offset %= ring_buffer_size;
+
+ return (start_write_offset);
+}
+
+/**
+ * @brief Helper routine to copy to source from ring buffer.
+ *
+ * Assume there is enough room. Handles wrap-around in src case only!
+ */
+uint32_t
+copy_from_ring_buffer(
+ hv_vmbus_ring_buffer_info* ring_info,
+ char* dest,
+ uint32_t dest_len,
+ uint32_t start_read_offset)
+{
+ uint32_t fragLen;
+ char *ring_buffer = get_ring_buffer(ring_info);
+ uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
+
+ if (dest_len > ring_buffer_size - start_read_offset) {
+ /* wrap-around detected at the src */
+ fragLen = ring_buffer_size - start_read_offset;
+ memcpy(dest, ring_buffer + start_read_offset, fragLen);
+ memcpy(dest + fragLen, ring_buffer, dest_len - fragLen);
+ } else {
+ memcpy(dest, ring_buffer + start_read_offset, dest_len);
+ }
+
+ start_read_offset += dest_len;
+ start_read_offset %= ring_buffer_size;
+
+ return (start_read_offset);
+}
+
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
new file mode 100644
index 0000000..e010308
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
@@ -0,0 +1,583 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * VM Bus Driver Implementation
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+#include <sys/rtprio.h>
+#include <sys/interrupt.h>
+#include <sys/sx.h>
+#include <sys/taskqueue.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
+
+#include <machine/resource.h>
+#include <sys/rman.h>
+
+#include <machine/stdarg.h>
+#include <machine/intr_machdep.h>
+#include <sys/pcpu.h>
+
+#include "hv_vmbus_priv.h"
+
+
+#define VMBUS_IRQ 0x5
+
+static struct intr_event *hv_msg_intr_event;
+static struct intr_event *hv_event_intr_event;
+static void *msg_swintr;
+static void *event_swintr;
+static device_t vmbus_devp;
+static void *vmbus_cookiep;
+static int vmbus_rid;
+struct resource *intr_res;
+static int vmbus_irq = VMBUS_IRQ;
+static int vmbus_inited;
+
+/**
+ * @brief Software interrupt thread routine to handle channel messages from
+ * the hypervisor.
+ */
+static void
+vmbus_msg_swintr(void *dummy)
+{
+ int cpu;
+ void* page_addr;
+ hv_vmbus_message* msg;
+ hv_vmbus_message* copied;
+
+ cpu = PCPU_GET(cpuid);
+ page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
+ msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
+
+ for (;;) {
+ if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) {
+ break; /* no message */
+ } else {
+ copied = malloc(sizeof(hv_vmbus_message),
+ M_DEVBUF, M_NOWAIT);
+ KASSERT(copied != NULL,
+ ("Error VMBUS: malloc failed to allocate"
+ " hv_vmbus_message!"));
+ if (copied == NULL)
+ continue;
+ memcpy(copied, msg, sizeof(hv_vmbus_message));
+ hv_queue_work_item(hv_vmbus_g_connection.work_queue,
+ hv_vmbus_on_channel_message, copied);
+ }
+
+ msg->header.message_type = HV_MESSAGE_TYPE_NONE;
+
+ /*
+ * Make sure the write to message_type (ie set to
+ * HV_MESSAGE_TYPE_NONE) happens before we read the
+ * message_pending and EOMing. Otherwise, the EOMing will
+ * not deliver any more messages
+ * since there is no empty slot
+ */
+ wmb();
+
+ if (msg->header.message_flags.message_pending) {
+ /*
+ * This will cause message queue rescan to possibly
+ * deliver another msg from the hypervisor
+ */
+ hv_vmbus_write_msr(HV_X64_MSR_EOM, 0);
+ }
+ }
+}
+
+/**
+ * @brief Interrupt filter routine for VMBUS.
+ *
+ * The purpose of this routine is to determine the type of VMBUS protocol
+ * message to process - an event or a channel message.
+ * As this is an interrupt filter routine, the function runs in a very
+ * restricted envinronment. From the manpage for bus_setup_intr(9)
+ *
+ * In this restricted environment, care must be taken to account for all
+ * races. A careful analysis of races should be done as well. It is gener-
+ * ally cheaper to take an extra interrupt, for example, than to protect
+ * variables with spinlocks. Read, modify, write cycles of hardware regis-
+ * ters need to be carefully analyzed if other threads are accessing the
+ * same registers.
+ */
+static int
+hv_vmbus_isr(void *unused)
+{
+ int cpu;
+ hv_vmbus_message* msg;
+ hv_vmbus_synic_event_flags* event;
+ void* page_addr;
+
+ cpu = PCPU_GET(cpuid);
+ /* (Temporary limit) */
+ KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
+
+ /*
+ * The Windows team has advised that we check for events
+ * before checking for messages. This is the way they do it
+ * in Windows when running as a guest in Hyper-V
+ */
+
+ page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
+ event = (hv_vmbus_synic_event_flags*)
+ page_addr + HV_VMBUS_MESSAGE_SINT;
+
+ /* Since we are a child, we only need to check bit 0 */
+ if (synch_test_and_clear_bit(0, &event->flags32[0])) {
+ swi_sched(event_swintr, 0);
+ }
+
+ /* Check if there are actual msgs to be process */
+ page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
+ msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
+
+ if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
+ swi_sched(msg_swintr, 0);
+ }
+
+ return FILTER_HANDLED;
+}
+
+static int
+vmbus_read_ivar(
+ device_t dev,
+ device_t child,
+ int index,
+ uintptr_t* result)
+{
+ struct hv_device *child_dev_ctx = device_get_ivars(child);
+
+ switch (index) {
+
+ case HV_VMBUS_IVAR_TYPE:
+ *result = (uintptr_t) &child_dev_ctx->class_id;
+ return (0);
+ case HV_VMBUS_IVAR_INSTANCE:
+ *result = (uintptr_t) &child_dev_ctx->device_id;
+ return (0);
+ case HV_VMBUS_IVAR_DEVCTX:
+ *result = (uintptr_t) child_dev_ctx;
+ return (0);
+ case HV_VMBUS_IVAR_NODE:
+ *result = (uintptr_t) child_dev_ctx->device;
+ return (0);
+ }
+ return (ENOENT);
+}
+
+static int
+vmbus_write_ivar(
+ device_t dev,
+ device_t child,
+ int index,
+ uintptr_t value)
+{
+ switch (index) {
+
+ case HV_VMBUS_IVAR_TYPE:
+ case HV_VMBUS_IVAR_INSTANCE:
+ case HV_VMBUS_IVAR_DEVCTX:
+ case HV_VMBUS_IVAR_NODE:
+ /* read-only */
+ return (EINVAL);
+ }
+ return (ENOENT);
+}
+
+struct hv_device*
+hv_vmbus_child_device_create(
+ hv_guid type,
+ hv_guid instance,
+ hv_vmbus_channel* channel)
+{
+ hv_device* child_dev;
+
+ /*
+ * Allocate the new child device
+ */
+ child_dev = malloc(sizeof(hv_device), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ KASSERT(child_dev != NULL,
+ ("Error VMBUS: malloc failed to allocate hv_device!"));
+
+ if (child_dev == NULL)
+ return (NULL);
+
+ child_dev->channel = channel;
+ memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
+ memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
+
+ return (child_dev);
+}
+
+static void
+print_dev_guid(struct hv_device *dev)
+{
+ int i;
+ unsigned char guid_name[100];
+ for (i = 0; i < 32; i += 2)
+ sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
+ if(bootverbose)
+ printf("VMBUS: Class ID: %s\n", guid_name);
+}
+
+int
+hv_vmbus_child_device_register(struct hv_device *child_dev)
+{
+ device_t child;
+ int ret = 0;
+
+ print_dev_guid(child_dev);
+
+
+ child = device_add_child(vmbus_devp, NULL, -1);
+ child_dev->device = child;
+ device_set_ivars(child, child_dev);
+
+ mtx_lock(&Giant);
+ ret = device_probe_and_attach(child);
+ mtx_unlock(&Giant);
+
+ return (0);
+}
+
+int
+hv_vmbus_child_device_unregister(struct hv_device *child_dev)
+{
+ int ret = 0;
+ /*
+ * XXXKYS: Ensure that this is the opposite of
+ * device_add_child()
+ */
+ mtx_lock(&Giant);
+ ret = device_delete_child(vmbus_devp, child_dev->device);
+ mtx_unlock(&Giant);
+ return(ret);
+}
+
+static void vmbus_identify(driver_t *driver, device_t parent) {
+ BUS_ADD_CHILD(parent, 0, "vmbus", 0);
+ if (device_find_child(parent, "vmbus", 0) == NULL) {
+ BUS_ADD_CHILD(parent, 0, "vmbus", 0);
+ }
+}
+
+static int
+vmbus_probe(device_t dev) {
+ if(bootverbose)
+ device_printf(dev, "VMBUS: probe\n");
+
+ if (!hv_vmbus_query_hypervisor_presence())
+ return (ENXIO);
+
+ device_set_desc(dev, "Vmbus Devices");
+
+ return (0);
+}
+
+/**
+ * @brief Main vmbus driver initialization routine.
+ *
+ * Here, we
+ * - initialize the vmbus driver context
+ * - setup various driver entry points
+ * - invoke the vmbus hv main init routine
+ * - get the irq resource
+ * - invoke the vmbus to add the vmbus root device
+ * - setup the vmbus root device
+ * - retrieve the channel offers
+ */
+static int
+vmbus_bus_init(void)
+{
+ struct ioapic_intsrc {
+ struct intsrc io_intsrc;
+ u_int io_irq;
+ u_int io_intpin:8;
+ u_int io_vector:8;
+ u_int io_cpu:8;
+ u_int io_activehi:1;
+ u_int io_edgetrigger:1;
+ u_int io_masked:1;
+ int io_bus:4;
+ uint32_t io_lowreg;
+ };
+
+ int ret;
+ unsigned int vector = 0;
+ struct intsrc *isrc;
+ struct ioapic_intsrc *intpin;
+
+ if (vmbus_inited)
+ return (0);
+
+ vmbus_inited = 1;
+
+ ret = hv_vmbus_init();
+
+ if (ret) {
+ if(bootverbose)
+ printf("Error VMBUS: Hypervisor Initialization Failed!\n");
+ return (ret);
+ }
+
+ ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
+ NULL, SWI_CLOCK, 0, &msg_swintr);
+
+ if (ret)
+ goto cleanup;
+
+ /*
+ * Message SW interrupt handler checks a per-CPU page and
+ * thus the thread needs to be bound to CPU-0 - which is where
+ * all interrupts are processed.
+ */
+ ret = intr_event_bind(hv_msg_intr_event, 0);
+
+ if (ret)
+ goto cleanup1;
+
+ ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
+ NULL, SWI_CLOCK, 0, &event_swintr);
+
+ if (ret)
+ goto cleanup1;
+
+ intr_res = bus_alloc_resource(vmbus_devp,
+ SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
+
+ if (intr_res == NULL) {
+ ret = ENOMEM; /* XXXKYS: Need a better errno */
+ goto cleanup2;
+ }
+
+ /*
+ * Setup interrupt filter handler
+ */
+ ret = bus_setup_intr(vmbus_devp, intr_res,
+ INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
+ NULL, &vmbus_cookiep);
+
+ if (ret != 0)
+ goto cleanup3;
+
+ ret = bus_bind_intr(vmbus_devp, intr_res, 0);
+ if (ret != 0)
+ goto cleanup4;
+
+ isrc = intr_lookup_source(vmbus_irq);
+ if ((isrc == NULL) || (isrc->is_event == NULL)) {
+ ret = EINVAL;
+ goto cleanup4;
+ }
+
+ /* vector = isrc->is_event->ie_vector; */
+ intpin = (struct ioapic_intsrc *)isrc;
+ vector = intpin->io_vector;
+
+ if(bootverbose)
+ printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
+
+ /**
+ * Notify the hypervisor of our irq.
+ */
+
+ smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &vector);
+
+ /**
+ * Connect to VMBus in the root partition
+ */
+ ret = hv_vmbus_connect();
+
+ if (ret)
+ goto cleanup4;
+
+ hv_vmbus_request_channel_offers();
+ return (ret);
+
+ cleanup4:
+
+ /*
+ * remove swi, bus and intr resource
+ */
+ bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
+
+ cleanup3:
+
+ bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
+
+ cleanup2:
+ swi_remove(event_swintr);
+
+ cleanup1:
+ swi_remove(msg_swintr);
+
+ cleanup:
+ hv_vmbus_cleanup();
+
+ return (ret);
+}
+
+static int
+vmbus_attach(device_t dev)
+{
+ if(bootverbose)
+ device_printf(dev, "VMBUS: attach dev: %p\n", dev);
+ vmbus_devp = dev;
+
+ /*
+ * If the system has already booted and thread
+ * scheduling is possible indicated by the global
+ * cold set to zero, we just call the driver
+ * initialization directly.
+ */
+ if (!cold)
+ vmbus_bus_init();
+
+ return (0);
+}
+
+static void
+vmbus_init(void)
+{
+ /*
+ * If the system has already booted and thread
+ * scheduling is possible indicated by the global
+ * cold set to zero, we just call the driver
+ * initialization directly.
+ */
+ if (!cold)
+ vmbus_bus_init();
+}
+
+static void
+vmbus_bus_exit(void)
+{
+ hv_vmbus_release_unattached_channels();
+ hv_vmbus_disconnect();
+
+ smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
+
+ hv_vmbus_cleanup();
+
+ /* remove swi, bus and intr resource */
+ bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
+
+ bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
+
+ swi_remove(msg_swintr);
+ swi_remove(event_swintr);
+
+ return;
+}
+
+static void
+vmbus_exit(void)
+{
+ vmbus_bus_exit();
+}
+
+static int
+vmbus_detach(device_t dev)
+{
+ vmbus_exit();
+ return (0);
+}
+
+static void
+vmbus_mod_load(void)
+{
+ if(bootverbose)
+ printf("VMBUS: load\n");
+}
+
+static void
+vmbus_mod_unload(void)
+{
+ if(bootverbose)
+ printf("VMBUS: unload\n");
+}
+
+static int
+vmbus_modevent(module_t mod, int what, void *arg)
+{
+ switch (what) {
+
+ case MOD_LOAD:
+ vmbus_mod_load();
+ break;
+ case MOD_UNLOAD:
+ vmbus_mod_unload();
+ break;
+ }
+
+ return (0);
+}
+
+static device_method_t vmbus_methods[] = {
+ /** Device interface */
+ DEVMETHOD(device_identify, vmbus_identify),
+ DEVMETHOD(device_probe, vmbus_probe),
+ DEVMETHOD(device_attach, vmbus_attach),
+ DEVMETHOD(device_detach, vmbus_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
+ DEVMETHOD(device_resume, bus_generic_resume),
+
+ /** Bus interface */
+ DEVMETHOD(bus_add_child, bus_generic_add_child),
+ DEVMETHOD(bus_print_child, bus_generic_print_child),
+ DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
+ DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
+
+ { 0, 0 } };
+
+static char driver_name[] = "vmbus";
+static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
+
+
+devclass_t vmbus_devclass;
+
+DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
+MODULE_VERSION(vmbus,1);
+
+/* TODO: We want to be earlier than SI_SUB_VFS */
+SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);
+
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
new file mode 100644
index 0000000..739acb1
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
@@ -0,0 +1,770 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HYPERV_PRIV_H__
+#define __HYPERV_PRIV_H__
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sema.h>
+
+#include <dev/hyperv/include/hyperv.h>
+
+
+/*
+ * Status codes for hypervisor operations.
+ */
+
+typedef uint16_t hv_vmbus_status;
+
+#define HV_MESSAGE_SIZE (256)
+#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
+#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
+#define HV_ANY_VP (0xFFFFFFFF)
+
+/*
+ * Synthetic interrupt controller flag constants.
+ */
+
+#define HV_EVENT_FLAGS_COUNT (256 * 8)
+#define HV_EVENT_FLAGS_BYTE_COUNT (256)
+#define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(uint32_t))
+
+/*
+ * MessageId: HV_STATUS_INSUFFICIENT_BUFFERS
+ * MessageText:
+ * You did not supply enough message buffers to send a message.
+ */
+
+#define HV_STATUS_INSUFFICIENT_BUFFERS ((uint16_t)0x0013)
+
+typedef void (*hv_vmbus_channel_callback)(void *context);
+
+typedef struct {
+ void* data;
+ uint32_t length;
+} hv_vmbus_sg_buffer_list;
+
+typedef struct {
+ uint32_t current_interrupt_mask;
+ uint32_t current_read_index;
+ uint32_t current_write_index;
+ uint32_t bytes_avail_to_read;
+ uint32_t bytes_avail_to_write;
+} hv_vmbus_ring_buffer_debug_info;
+
+typedef struct {
+ uint32_t rel_id;
+ hv_vmbus_channel_state state;
+ hv_guid interface_type;
+ hv_guid interface_instance;
+ uint32_t monitor_id;
+ uint32_t server_monitor_pending;
+ uint32_t server_monitor_latency;
+ uint32_t server_monitor_connection_id;
+ uint32_t client_monitor_pending;
+ uint32_t client_monitor_latency;
+ uint32_t client_monitor_connection_id;
+ hv_vmbus_ring_buffer_debug_info inbound;
+ hv_vmbus_ring_buffer_debug_info outbound;
+} hv_vmbus_channel_debug_info;
+
+typedef union {
+ hv_vmbus_channel_version_supported version_supported;
+ hv_vmbus_channel_open_result open_result;
+ hv_vmbus_channel_gpadl_torndown gpadl_torndown;
+ hv_vmbus_channel_gpadl_created gpadl_created;
+ hv_vmbus_channel_version_response version_response;
+} hv_vmbus_channel_msg_response;
+
+/*
+ * Represents each channel msg on the vmbus connection
+ * This is a variable-size data structure depending on
+ * the msg type itself
+ */
+typedef struct hv_vmbus_channel_msg_info {
+ /*
+ * Bookkeeping stuff
+ */
+ TAILQ_ENTRY(hv_vmbus_channel_msg_info) msg_list_entry;
+ /*
+ * So far, this is only used to handle
+ * gpadl body message
+ */
+ TAILQ_HEAD(, hv_vmbus_channel_msg_info) sub_msg_list_anchor;
+ /*
+ * Synchronize the request/response if
+ * needed.
+ * KYS: Use a semaphore for now.
+ * Not perf critical.
+ */
+ struct sema wait_sema;
+ hv_vmbus_channel_msg_response response;
+ uint32_t message_size;
+ /**
+ * The channel message that goes out on
+ * the "wire". It will contain at
+ * minimum the
+ * hv_vmbus_channel_msg_header
+ * header.
+ */
+ unsigned char msg[0];
+} hv_vmbus_channel_msg_info;
+
+/*
+ * The format must be the same as hv_vm_data_gpa_direct
+ */
+typedef struct hv_vmbus_channel_packet_page_buffer {
+ uint16_t type;
+ uint16_t data_offset8;
+ uint16_t length8;
+ uint16_t flags;
+ uint64_t transaction_id;
+ uint32_t reserved;
+ uint32_t range_count;
+ hv_vmbus_page_buffer range[HV_MAX_PAGE_BUFFER_COUNT];
+} __packed hv_vmbus_channel_packet_page_buffer;
+
+/*
+ * The format must be the same as hv_vm_data_gpa_direct
+ */
+typedef struct hv_vmbus_channel_packet_multipage_buffer {
+ uint16_t type;
+ uint16_t data_offset8;
+ uint16_t length8;
+ uint16_t flags;
+ uint64_t transaction_id;
+ uint32_t reserved;
+ uint32_t range_count; /* Always 1 in this case */
+ hv_vmbus_multipage_buffer range;
+} __packed hv_vmbus_channel_packet_multipage_buffer;
+
+enum {
+ HV_VMBUS_MESSAGE_CONNECTION_ID = 1,
+ HV_VMBUS_MESSAGE_PORT_ID = 1,
+ HV_VMBUS_EVENT_CONNECTION_ID = 2,
+ HV_VMBUS_EVENT_PORT_ID = 2,
+ HV_VMBUS_MONITOR_CONNECTION_ID = 3,
+ HV_VMBUS_MONITOR_PORT_ID = 3,
+ HV_VMBUS_MESSAGE_SINT = 2
+};
+
+#define HV_PRESENT_BIT 0x80000000
+
+#define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t)
+
+/*
+ * Connection identifier type
+ */
+typedef union {
+ uint32_t as_uint32_t;
+ struct {
+ uint32_t id:24;
+ uint32_t reserved:8;
+ } u;
+
+} __packed hv_vmbus_connection_id;
+
+/*
+ * Definition of the hv_vmbus_signal_event hypercall input structure
+ */
+typedef struct {
+ hv_vmbus_connection_id connection_id;
+ uint16_t flag_number;
+ uint16_t rsvd_z;
+} __packed hv_vmbus_input_signal_event;
+
+typedef struct {
+ uint64_t align8;
+ hv_vmbus_input_signal_event event;
+} __packed hv_vmbus_input_signal_event_buffer;
+
+typedef struct {
+ uint64_t guest_id;
+ void* hypercall_page;
+ hv_bool_uint8_t syn_ic_initialized;
+ /*
+ * This is used as an input param to HV_CALL_SIGNAL_EVENT hypercall.
+ * The input param is immutable in our usage and
+ * must be dynamic mem (vs stack or global).
+ */
+ hv_vmbus_input_signal_event_buffer *signal_event_buffer;
+ /*
+ * 8-bytes aligned of the buffer above
+ */
+ hv_vmbus_input_signal_event *signal_event_param;
+
+ hv_vmbus_handle syn_ic_msg_page[MAXCPU];
+ hv_vmbus_handle syn_ic_event_page[MAXCPU];
+} hv_vmbus_context;
+
+/*
+ * Define hypervisor message types
+ */
+typedef enum {
+
+ HV_MESSAGE_TYPE_NONE = 0x00000000,
+
+ /*
+ * Memory access messages
+ */
+ HV_MESSAGE_TYPE_UNMAPPED_GPA = 0x80000000,
+ HV_MESSAGE_TYPE_GPA_INTERCEPT = 0x80000001,
+
+ /*
+ * Timer notification messages
+ */
+ HV_MESSAGE_TIMER_EXPIRED = 0x80000010,
+
+ /*
+ * Error messages
+ */
+ HV_MESSAGE_TYPE_INVALID_VP_REGISTER_VALUE = 0x80000020,
+ HV_MESSAGE_TYPE_UNRECOVERABLE_EXCEPTION = 0x80000021,
+ HV_MESSAGE_TYPE_UNSUPPORTED_FEATURE = 0x80000022,
+
+ /*
+ * Trace buffer complete messages
+ */
+ HV_MESSAGE_TYPE_EVENT_LOG_BUFFER_COMPLETE = 0x80000040,
+
+ /*
+ * Platform-specific processor intercept messages
+ */
+ HV_MESSAGE_TYPE_X64_IO_PORT_INTERCEPT = 0x80010000,
+ HV_MESSAGE_TYPE_X64_MSR_INTERCEPT = 0x80010001,
+ HV_MESSAGE_TYPE_X64_CPU_INTERCEPT = 0x80010002,
+ HV_MESSAGE_TYPE_X64_EXCEPTION_INTERCEPT = 0x80010003,
+ HV_MESSAGE_TYPE_X64_APIC_EOI = 0x80010004,
+ HV_MESSAGE_TYPE_X64_LEGACY_FP_ERROR = 0x80010005
+
+} hv_vmbus_msg_type;
+
+/*
+ * Define port identifier type
+ */
+typedef union _hv_vmbus_port_id {
+ uint32_t as_uint32_t;
+ struct {
+ uint32_t id:24;
+ uint32_t reserved:8;
+ } u ;
+} hv_vmbus_port_id;
+
+/*
+ * Define synthetic interrupt controller message flag
+ */
+typedef union {
+ uint8_t as_uint8_t;
+ struct {
+ uint8_t message_pending:1;
+ uint8_t reserved:7;
+ };
+} hv_vmbus_msg_flags;
+
+typedef uint64_t hv_vmbus_partition_id;
+
+/*
+ * Define synthetic interrupt controller message header
+ */
+typedef struct {
+ hv_vmbus_msg_type message_type;
+ uint8_t payload_size;
+ hv_vmbus_msg_flags message_flags;
+ uint8_t reserved[2];
+ union {
+ hv_vmbus_partition_id sender;
+ hv_vmbus_port_id port;
+ } u;
+} hv_vmbus_msg_header;
+
+/*
+ * Define synthetic interrupt controller message format
+ */
+typedef struct {
+ hv_vmbus_msg_header header;
+ union {
+ uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
+ } u ;
+} hv_vmbus_message;
+
+/*
+ * Maximum channels is determined by the size of the interrupt
+ * page which is PAGE_SIZE. 1/2 of PAGE_SIZE is for
+ * send endpoint interrupt and the other is receive
+ * endpoint interrupt.
+ *
+ * Note: (PAGE_SIZE >> 1) << 3 allocates 16348 channels
+ */
+#define HV_MAX_NUM_CHANNELS (PAGE_SIZE >> 1) << 3
+
+/*
+ * (The value here must be in multiple of 32)
+ */
+#define HV_MAX_NUM_CHANNELS_SUPPORTED 256
+
+/*
+ * VM Bus connection states
+ */
+typedef enum {
+ HV_DISCONNECTED,
+ HV_CONNECTING,
+ HV_CONNECTED,
+ HV_DISCONNECTING
+} hv_vmbus_connect_state;
+
+#define HV_MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT
+
+
+typedef struct {
+ hv_vmbus_connect_state connect_state;
+ uint32_t next_gpadl_handle;
+ /**
+ * Represents channel interrupts. Each bit position
+ * represents a channel.
+ * When a channel sends an interrupt via VMBUS, it
+ * finds its bit in the send_interrupt_page, set it and
+ * calls Hv to generate a port event. The other end
+ * receives the port event and parse the
+ * recv_interrupt_page to see which bit is set
+ */
+ void *interrupt_page;
+ void *send_interrupt_page;
+ void *recv_interrupt_page;
+ /*
+ * 2 pages - 1st page for parent->child
+ * notification and 2nd is child->parent
+ * notification
+ */
+ void *monitor_pages;
+ TAILQ_HEAD(, hv_vmbus_channel_msg_info) channel_msg_anchor;
+ struct mtx channel_msg_lock;
+ /**
+ * List of channels
+ */
+ TAILQ_HEAD(, hv_vmbus_channel) channel_anchor;
+ struct mtx channel_lock;
+
+ hv_vmbus_handle work_queue;
+ struct sema control_sema;
+} hv_vmbus_connection;
+
+/*
+ * Declare the MSR used to identify the guest OS
+ */
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+
+typedef union {
+ uint64_t as_uint64_t;
+ struct {
+ uint64_t build_number : 16;
+ uint64_t service_version : 8; /* Service Pack, etc. */
+ uint64_t minor_version : 8;
+ uint64_t major_version : 8;
+ /*
+ * HV_GUEST_OS_MICROSOFT_IDS (If Vendor=MS)
+ * HV_GUEST_OS_VENDOR
+ */
+ uint64_t os_id : 8;
+ uint64_t vendor_id : 16;
+ };
+} hv_vmbus_x64_msr_guest_os_id_contents;
+
+/*
+ * Declare the MSR used to setup pages used to communicate with the hypervisor
+ */
+#define HV_X64_MSR_HYPERCALL 0x40000001
+
+typedef union {
+ uint64_t as_uint64_t;
+ struct {
+ uint64_t enable :1;
+ uint64_t reserved :11;
+ uint64_t guest_physical_address :52;
+ };
+} hv_vmbus_x64_msr_hypercall_contents;
+
+typedef union {
+ uint32_t as_uint32_t;
+ struct {
+ uint32_t group_enable :4;
+ uint32_t rsvd_z :28;
+ };
+} hv_vmbus_monitor_trigger_state;
+
+typedef union {
+ uint64_t as_uint64_t;
+ struct {
+ uint32_t pending;
+ uint32_t armed;
+ };
+} hv_vmbus_monitor_trigger_group;
+
+typedef struct {
+ hv_vmbus_connection_id connection_id;
+ uint16_t flag_number;
+ uint16_t rsvd_z;
+} hv_vmbus_monitor_parameter;
+
+/*
+ * hv_vmbus_monitor_page Layout
+ * ------------------------------------------------------
+ * | 0 | trigger_state (4 bytes) | Rsvd1 (4 bytes) |
+ * | 8 | trigger_group[0] |
+ * | 10 | trigger_group[1] |
+ * | 18 | trigger_group[2] |
+ * | 20 | trigger_group[3] |
+ * | 28 | Rsvd2[0] |
+ * | 30 | Rsvd2[1] |
+ * | 38 | Rsvd2[2] |
+ * | 40 | next_check_time[0][0] | next_check_time[0][1] |
+ * | ... |
+ * | 240 | latency[0][0..3] |
+ * | 340 | Rsvz3[0] |
+ * | 440 | parameter[0][0] |
+ * | 448 | parameter[0][1] |
+ * | ... |
+ * | 840 | Rsvd4[0] |
+ * ------------------------------------------------------
+ */
+
+typedef struct {
+ hv_vmbus_monitor_trigger_state trigger_state;
+ uint32_t rsvd_z1;
+
+ hv_vmbus_monitor_trigger_group trigger_group[4];
+ uint64_t rsvd_z2[3];
+
+ int32_t next_check_time[4][32];
+
+ uint16_t latency[4][32];
+ uint64_t rsvd_z3[32];
+
+ hv_vmbus_monitor_parameter parameter[4][32];
+
+ uint8_t rsvd_z4[1984];
+} hv_vmbus_monitor_page;
+
+/*
+ * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
+ * is set by CPUID(HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES).
+ */
+typedef enum {
+ HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES = 0x00000001,
+ HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION = 0x40000000,
+ HV_CPU_ID_FUNCTION_HV_INTERFACE = 0x40000001,
+ /*
+ * The remaining functions depend on the value
+ * of hv_cpu_id_function_interface
+ */
+ HV_CPU_ID_FUNCTION_MS_HV_VERSION = 0x40000002,
+ HV_CPU_ID_FUNCTION_MS_HV_FEATURES = 0x40000003,
+ HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION = 0x40000004,
+ HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS = 0x40000005
+
+} hv_vmbus_cpuid_function;
+
+/*
+ * Define the format of the SIMP register
+ */
+typedef union {
+ uint64_t as_uint64_t;
+ struct {
+ uint64_t simp_enabled : 1;
+ uint64_t preserved : 11;
+ uint64_t base_simp_gpa : 52;
+ };
+} hv_vmbus_synic_simp;
+
+/*
+ * Define the format of the SIEFP register
+ */
+typedef union {
+ uint64_t as_uint64_t;
+ struct {
+ uint64_t siefp_enabled : 1;
+ uint64_t preserved : 11;
+ uint64_t base_siefp_gpa : 52;
+ };
+} hv_vmbus_synic_siefp;
+
+/*
+ * Define synthetic interrupt source
+ */
+typedef union {
+ uint64_t as_uint64_t;
+ struct {
+ uint64_t vector : 8;
+ uint64_t reserved1 : 8;
+ uint64_t masked : 1;
+ uint64_t auto_eoi : 1;
+ uint64_t reserved2 : 46;
+ };
+} hv_vmbus_synic_sint;
+
+/*
+ * Define syn_ic control register
+ */
+typedef union _hv_vmbus_synic_scontrol {
+ uint64_t as_uint64_t;
+ struct {
+ uint64_t enable : 1;
+ uint64_t reserved : 63;
+ };
+} hv_vmbus_synic_scontrol;
+
+/*
+ * Define the hv_vmbus_post_message hypercall input structure
+ */
+typedef struct {
+ hv_vmbus_connection_id connection_id;
+ uint32_t reserved;
+ hv_vmbus_msg_type message_type;
+ uint32_t payload_size;
+ uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
+} hv_vmbus_input_post_message;
+
+/*
+ * Define the synthetic interrupt controller event flags format
+ */
+typedef union {
+ uint8_t flags8[HV_EVENT_FLAGS_BYTE_COUNT];
+ uint32_t flags32[HV_EVENT_FLAGS_DWORD_COUNT];
+} hv_vmbus_synic_event_flags;
+
+
+/*
+ * Define synthetic interrupt controller model specific registers
+ */
+#define HV_X64_MSR_SCONTROL (0x40000080)
+#define HV_X64_MSR_SVERSION (0x40000081)
+#define HV_X64_MSR_SIEFP (0x40000082)
+#define HV_X64_MSR_SIMP (0x40000083)
+#define HV_X64_MSR_EOM (0x40000084)
+
+#define HV_X64_MSR_SINT0 (0x40000090)
+#define HV_X64_MSR_SINT1 (0x40000091)
+#define HV_X64_MSR_SINT2 (0x40000092)
+#define HV_X64_MSR_SINT3 (0x40000093)
+#define HV_X64_MSR_SINT4 (0x40000094)
+#define HV_X64_MSR_SINT5 (0x40000095)
+#define HV_X64_MSR_SINT6 (0x40000096)
+#define HV_X64_MSR_SINT7 (0x40000097)
+#define HV_X64_MSR_SINT8 (0x40000098)
+#define HV_X64_MSR_SINT9 (0x40000099)
+#define HV_X64_MSR_SINT10 (0x4000009A)
+#define HV_X64_MSR_SINT11 (0x4000009B)
+#define HV_X64_MSR_SINT12 (0x4000009C)
+#define HV_X64_MSR_SINT13 (0x4000009D)
+#define HV_X64_MSR_SINT14 (0x4000009E)
+#define HV_X64_MSR_SINT15 (0x4000009F)
+
+/*
+ * Declare the various hypercall operations
+ */
+typedef enum {
+ HV_CALL_POST_MESSAGE = 0x005c,
+ HV_CALL_SIGNAL_EVENT = 0x005d,
+} hv_vmbus_call_code;
+
+/**
+ * Global variables
+ */
+
+extern hv_vmbus_context hv_vmbus_g_context;
+extern hv_vmbus_connection hv_vmbus_g_connection;
+
+
+/*
+ * Private, VM Bus functions
+ */
+
+int hv_vmbus_ring_buffer_init(
+ hv_vmbus_ring_buffer_info *ring_info,
+ void *buffer,
+ uint32_t buffer_len);
+
+void hv_ring_buffer_cleanup(
+ hv_vmbus_ring_buffer_info *ring_info);
+
+int hv_ring_buffer_write(
+ hv_vmbus_ring_buffer_info *ring_info,
+ hv_vmbus_sg_buffer_list sg_buffers[],
+ uint32_t sg_buff_count);
+
+int hv_ring_buffer_peek(
+ hv_vmbus_ring_buffer_info *ring_info,
+ void *buffer,
+ uint32_t buffer_len);
+
+int hv_ring_buffer_read(
+ hv_vmbus_ring_buffer_info *ring_info,
+ void *buffer,
+ uint32_t buffer_len,
+ uint32_t offset);
+
+uint32_t hv_vmbus_get_ring_buffer_interrupt_mask(
+ hv_vmbus_ring_buffer_info *ring_info);
+
+void hv_vmbus_dump_ring_info(
+ hv_vmbus_ring_buffer_info *ring_info,
+ char *prefix);
+
+hv_vmbus_channel* hv_vmbus_allocate_channel(void);
+void hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel);
+void hv_vmbus_on_channel_message(void *context);
+int hv_vmbus_request_channel_offers(void);
+void hv_vmbus_release_unattached_channels(void);
+int hv_vmbus_init(void);
+void hv_vmbus_cleanup(void);
+
+uint16_t hv_vmbus_post_msg_via_msg_ipc(
+ hv_vmbus_connection_id connection_id,
+ hv_vmbus_msg_type message_type,
+ void *payload,
+ size_t payload_size);
+
+uint16_t hv_vmbus_signal_event(void);
+void hv_vmbus_synic_init(void *irq_arg);
+void hv_vmbus_synic_cleanup(void *arg);
+int hv_vmbus_query_hypervisor_presence(void);
+
+struct hv_device* hv_vmbus_child_device_create(
+ hv_guid device_type,
+ hv_guid device_instance,
+ hv_vmbus_channel *channel);
+
+int hv_vmbus_child_device_register(
+ struct hv_device *child_dev);
+int hv_vmbus_child_device_unregister(
+ struct hv_device *child_dev);
+hv_vmbus_channel* hv_vmbus_get_channel_from_rel_id(uint32_t rel_id);
+
+/**
+ * Connection interfaces
+ */
+int hv_vmbus_connect(void);
+int hv_vmbus_disconnect(void);
+int hv_vmbus_post_message(void *buffer, size_t buf_size);
+int hv_vmbus_set_event(uint32_t child_rel_id);
+void hv_vmbus_on_events(void *);
+
+/*
+ * static inline functions
+ * (with some helper macros for reading/writing to model specific registers)
+ */
+
+#ifdef __x86_64__
+
+#define HV_VMBUS_READ_MSR(reg, v) { \
+ uint32_t h, l; \
+ __asm__ __volatile__("rdmsr" \
+ : "=a" (l), "=d" (h) \
+ : "c" (reg)); \
+ v = (((uint64_t)h) << 32) | l; \
+}
+
+#define HV_VMBUS_WRITE_MSR(reg, v) { \
+ uint32_t h, l; \
+ l = (uint32_t)(((uint64_t)(v)) & 0xFFFFFFFF); \
+ h = (uint32_t)((((uint64_t)(v)) >> 32) & 0xFFFFFFFF); \
+ __asm__ __volatile__("wrmsr" \
+ : /* no outputs */ \
+ : "c" (reg), "a" (l), "d" (h)); \
+}
+
+#else
+
+#define HV_VMBUS_READ_MSR(reg, v) \
+ __asm__ __volatile__("rdmsr" \
+ : "=A" (v) \
+ : "c" (reg))
+
+#define HV_VMBUS_WRITE_MSR(reg, v) \
+ __asm__ __volatile__("wrmsr" \
+ : /* no outputs */ \
+ : "c" (reg), "A" ((uint64_t)v))
+
+#endif
+
+static inline unsigned long long
+hv_vmbus_read_msr(int msr)
+{
+ unsigned long long val;
+ HV_VMBUS_READ_MSR(msr, val);
+ return (val);
+}
+
+static inline
+void hv_vmbus_write_msr(int msr, uint64_t val)
+{
+ HV_VMBUS_WRITE_MSR(msr, val);
+ return;
+}
+
+/*
+ * The guest OS needs to register the guest ID with the hypervisor.
+ * The guest ID is a 64 bit entity and the structure of this ID is
+ * specified in the Hyper-V specification:
+ *
+ * http://msdn.microsoft.com/en-us/library/windows/
+ * hardware/ff542653%28v=vs.85%29.aspx
+ *
+ * While the current guideline does not specify how FreeBSD guest ID(s)
+ * need to be generated, our plan is to publish the guidelines for
+ * FreeBSD and other guest operating systems that currently are hosted
+ * on Hyper-V. The implementation here conforms to this yet
+ * unpublished guidelines.
+ *
+ * Bit(s)
+ * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
+ * 62:56 - Os Type; Linux is 0x100, FreeBSD is 0x200
+ * 55:48 - Distro specific identification
+ * 47:16 - FreeBSD kernel version number
+ * 15:0 - Distro specific identification
+ *
+ */
+
+#define HV_FREEBSD_VENDOR_ID 0x8200
+#define HV_FREEBSD_GUEST_ID hv_generate_guest_id(0,0)
+
+static inline uint64_t hv_generate_guest_id(
+ uint8_t distro_id_part1,
+ uint16_t distro_id_part2)
+{
+ uint64_t guest_id;
+ guest_id = (((uint64_t)HV_FREEBSD_VENDOR_ID) << 48);
+ guest_id |= (((uint64_t)(distro_id_part1)) << 48);
+ guest_id |= (((uint64_t)(__FreeBSD_version)) << 16); /* in param.h */
+ guest_id |= ((uint64_t)(distro_id_part2));
+ return guest_id;
+}
+
+
+#endif /* __HYPERV_PRIV_H__ */
OpenPOWER on IntegriCloud