/*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * VM Bus Driver Implementation */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_vmbus_priv.h" #define VMBUS_IRQ 0x5 static struct intr_event *hv_msg_intr_event; static struct intr_event *hv_event_intr_event; static void *msg_swintr; static void *event_swintr; static device_t vmbus_devp; static void *vmbus_cookiep; static int vmbus_rid; struct resource *intr_res; static int vmbus_irq = VMBUS_IRQ; static int vmbus_inited; static hv_setup_args setup_args; /* only CPU 0 supported at this time */ /** * @brief Software interrupt thread routine to handle channel messages from * the hypervisor. */ static void vmbus_msg_swintr(void *dummy) { int cpu; void* page_addr; hv_vmbus_message* msg; hv_vmbus_message* copied; cpu = PCPU_GET(cpuid); page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; for (;;) { if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) { break; /* no message */ } else { copied = malloc(sizeof(hv_vmbus_message), M_DEVBUF, M_NOWAIT); KASSERT(copied != NULL, ("Error VMBUS: malloc failed to allocate" " hv_vmbus_message!")); if (copied == NULL) continue; memcpy(copied, msg, sizeof(hv_vmbus_message)); hv_queue_work_item(hv_vmbus_g_connection.work_queue, hv_vmbus_on_channel_message, copied); } msg->header.message_type = HV_MESSAGE_TYPE_NONE; /* * Make sure the write to message_type (ie set to * HV_MESSAGE_TYPE_NONE) happens before we read the * message_pending and EOMing. Otherwise, the EOMing will * not deliver any more messages * since there is no empty slot */ wmb(); if (msg->header.message_flags.u.message_pending) { /* * This will cause message queue rescan to possibly * deliver another msg from the hypervisor */ wrmsr(HV_X64_MSR_EOM, 0); } } } /** * @brief Interrupt filter routine for VMBUS. * * The purpose of this routine is to determine the type of VMBUS protocol * message to process - an event or a channel message. * As this is an interrupt filter routine, the function runs in a very * restricted envinronment. From the manpage for bus_setup_intr(9) * * In this restricted environment, care must be taken to account for all * races. A careful analysis of races should be done as well. It is gener- * ally cheaper to take an extra interrupt, for example, than to protect * variables with spinlocks. Read, modify, write cycles of hardware regis- * ters need to be carefully analyzed if other threads are accessing the * same registers. */ static int hv_vmbus_isr(void *unused) { int cpu; hv_vmbus_message* msg; hv_vmbus_synic_event_flags* event; void* page_addr; cpu = PCPU_GET(cpuid); /* (Temporary limit) */ KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero")); /* * The Windows team has advised that we check for events * before checking for messages. This is the way they do it * in Windows when running as a guest in Hyper-V */ page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; event = (hv_vmbus_synic_event_flags*) page_addr + HV_VMBUS_MESSAGE_SINT; /* Since we are a child, we only need to check bit 0 */ if (synch_test_and_clear_bit(0, &event->flags32[0])) { swi_sched(event_swintr, 0); } /* Check if there are actual msgs to be process */ page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { swi_sched(msg_swintr, 0); } return FILTER_HANDLED; } static int vmbus_read_ivar( device_t dev, device_t child, int index, uintptr_t* result) { struct hv_device *child_dev_ctx = device_get_ivars(child); switch (index) { case HV_VMBUS_IVAR_TYPE: *result = (uintptr_t) &child_dev_ctx->class_id; return (0); case HV_VMBUS_IVAR_INSTANCE: *result = (uintptr_t) &child_dev_ctx->device_id; return (0); case HV_VMBUS_IVAR_DEVCTX: *result = (uintptr_t) child_dev_ctx; return (0); case HV_VMBUS_IVAR_NODE: *result = (uintptr_t) child_dev_ctx->device; return (0); } return (ENOENT); } static int vmbus_write_ivar( device_t dev, device_t child, int index, uintptr_t value) { switch (index) { case HV_VMBUS_IVAR_TYPE: case HV_VMBUS_IVAR_INSTANCE: case HV_VMBUS_IVAR_DEVCTX: case HV_VMBUS_IVAR_NODE: /* read-only */ return (EINVAL); } return (ENOENT); } struct hv_device* hv_vmbus_child_device_create( hv_guid type, hv_guid instance, hv_vmbus_channel* channel) { hv_device* child_dev; /* * Allocate the new child device */ child_dev = malloc(sizeof(hv_device), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(child_dev != NULL, ("Error VMBUS: malloc failed to allocate hv_device!")); if (child_dev == NULL) return (NULL); child_dev->channel = channel; memcpy(&child_dev->class_id, &type, sizeof(hv_guid)); memcpy(&child_dev->device_id, &instance, sizeof(hv_guid)); return (child_dev); } static void print_dev_guid(struct hv_device *dev) { int i; unsigned char guid_name[100]; for (i = 0; i < 32; i += 2) sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]); if(bootverbose) printf("VMBUS: Class ID: %s\n", guid_name); } int hv_vmbus_child_device_register(struct hv_device *child_dev) { device_t child; int ret = 0; print_dev_guid(child_dev); child = device_add_child(vmbus_devp, NULL, -1); child_dev->device = child; device_set_ivars(child, child_dev); mtx_lock(&Giant); ret = device_probe_and_attach(child); mtx_unlock(&Giant); return (0); } int hv_vmbus_child_device_unregister(struct hv_device *child_dev) { int ret = 0; /* * XXXKYS: Ensure that this is the opposite of * device_add_child() */ mtx_lock(&Giant); ret = device_delete_child(vmbus_devp, child_dev->device); mtx_unlock(&Giant); return(ret); } static void vmbus_identify(driver_t *driver, device_t parent) { if (!hv_vmbus_query_hypervisor_presence()) return; vm_guest = VM_GUEST_HV; BUS_ADD_CHILD(parent, 0, "vmbus", 0); } static int vmbus_probe(device_t dev) { if(bootverbose) device_printf(dev, "VMBUS: probe\n"); device_set_desc(dev, "Vmbus Devices"); return (0); } /** * @brief Main vmbus driver initialization routine. * * Here, we * - initialize the vmbus driver context * - setup various driver entry points * - invoke the vmbus hv main init routine * - get the irq resource * - invoke the vmbus to add the vmbus root device * - setup the vmbus root device * - retrieve the channel offers */ static int vmbus_bus_init(void) { struct ioapic_intsrc { struct intsrc io_intsrc; u_int io_irq; u_int io_intpin:8; u_int io_vector:8; u_int io_cpu:8; u_int io_activehi:1; u_int io_edgetrigger:1; u_int io_masked:1; int io_bus:4; uint32_t io_lowreg; }; int i, ret; unsigned int vector = 0; struct intsrc *isrc; struct ioapic_intsrc *intpin; if (vmbus_inited) return (0); vmbus_inited = 1; ret = hv_vmbus_init(); if (ret) { if(bootverbose) printf("Error VMBUS: Hypervisor Initialization Failed!\n"); return (ret); } ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr, NULL, SWI_CLOCK, 0, &msg_swintr); if (ret) goto cleanup; /* * Message SW interrupt handler checks a per-CPU page and * thus the thread needs to be bound to CPU-0 - which is where * all interrupts are processed. */ ret = intr_event_bind(hv_msg_intr_event, 0); if (ret) goto cleanup1; ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events, NULL, SWI_CLOCK, 0, &event_swintr); if (ret) goto cleanup1; intr_res = bus_alloc_resource(vmbus_devp, SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE); if (intr_res == NULL) { ret = ENOMEM; /* XXXKYS: Need a better errno */ goto cleanup2; } /* * Setup interrupt filter handler */ ret = bus_setup_intr(vmbus_devp, intr_res, INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL, NULL, &vmbus_cookiep); if (ret != 0) goto cleanup3; ret = bus_bind_intr(vmbus_devp, intr_res, 0); if (ret != 0) goto cleanup4; isrc = intr_lookup_source(vmbus_irq); if ((isrc == NULL) || (isrc->is_event == NULL)) { ret = EINVAL; goto cleanup4; } /* vector = isrc->is_event->ie_vector; */ intpin = (struct ioapic_intsrc *)isrc; vector = intpin->io_vector; if(bootverbose) printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector); /** * Notify the hypervisor of our irq. */ setup_args.vector = vector; for(i = 0; i < 2; i++) { setup_args.page_buffers[i] = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO); if (setup_args.page_buffers[i] == NULL) { KASSERT(setup_args.page_buffers[i] != NULL, ("Error VMBUS: malloc failed!")); if (i > 0) free(setup_args.page_buffers[0], M_DEVBUF); goto cleanup4; } } /* only CPU #0 supported at this time */ smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args); /* * Connect to VMBus in the root partition */ ret = hv_vmbus_connect(); if (ret != 0) goto cleanup4; hv_vmbus_request_channel_offers(); return (ret); cleanup4: /* * remove swi, bus and intr resource */ bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep); cleanup3: bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res); cleanup2: swi_remove(event_swintr); cleanup1: swi_remove(msg_swintr); cleanup: hv_vmbus_cleanup(); return (ret); } static int vmbus_attach(device_t dev) { if(bootverbose) device_printf(dev, "VMBUS: attach dev: %p\n", dev); vmbus_devp = dev; /* * If the system has already booted and thread * scheduling is possible indicated by the global * cold set to zero, we just call the driver * initialization directly. */ if (!cold) vmbus_bus_init(); return (0); } static void vmbus_init(void) { if (vm_guest != VM_GUEST_HV) return; /* * If the system has already booted and thread * scheduling is possible, as indicated by the * global cold set to zero, we just call the driver * initialization directly. */ if (!cold) vmbus_bus_init(); } static void vmbus_bus_exit(void) { int i; hv_vmbus_release_unattached_channels(); hv_vmbus_disconnect(); smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL); for(i = 0; i < 2; i++) { if (setup_args.page_buffers[i] != 0) free(setup_args.page_buffers[i], M_DEVBUF); } hv_vmbus_cleanup(); /* remove swi, bus and intr resource */ bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep); bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res); swi_remove(msg_swintr); swi_remove(event_swintr); return; } static void vmbus_exit(void) { vmbus_bus_exit(); } static int vmbus_detach(device_t dev) { vmbus_exit(); return (0); } static void vmbus_mod_load(void) { if(bootverbose) printf("VMBUS: load\n"); } static void vmbus_mod_unload(void) { if(bootverbose) printf("VMBUS: unload\n"); } static int vmbus_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: vmbus_mod_load(); break; case MOD_UNLOAD: vmbus_mod_unload(); break; } return (0); } static device_method_t vmbus_methods[] = { /** Device interface */ DEVMETHOD(device_identify, vmbus_identify), DEVMETHOD(device_probe, vmbus_probe), DEVMETHOD(device_attach, vmbus_attach), DEVMETHOD(device_detach, vmbus_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /** Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_read_ivar, vmbus_read_ivar), DEVMETHOD(bus_write_ivar, vmbus_write_ivar), { 0, 0 } }; static char driver_name[] = "vmbus"; static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; devclass_t vmbus_devclass; DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); MODULE_VERSION(vmbus,1); /* TODO: We want to be earlier than SI_SUB_VFS */ SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);