summaryrefslogtreecommitdiffstats
path: root/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c')
-rw-r--r--sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c364
1 files changed, 244 insertions, 120 deletions
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
index ca28fd5..f9432c8 100644
--- a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
@@ -53,22 +53,17 @@ __FBSDID("$FreeBSD$");
#include <machine/stdarg.h>
#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/segments.h>
#include <sys/pcpu.h>
+#include <x86/apicvar.h>
#include "hv_vmbus_priv.h"
#define VMBUS_IRQ 0x5
-static struct intr_event *hv_msg_intr_event;
-static struct intr_event *hv_event_intr_event;
-static void *msg_swintr;
-static void *event_swintr;
static device_t vmbus_devp;
-static void *vmbus_cookiep;
-static int vmbus_rid;
-struct resource *intr_res;
-static int vmbus_irq = VMBUS_IRQ;
static int vmbus_inited;
static hv_setup_args setup_args; /* only CPU 0 supported at this time */
@@ -77,14 +72,17 @@ static hv_setup_args setup_args; /* only CPU 0 supported at this time */
* the hypervisor.
*/
static void
-vmbus_msg_swintr(void *dummy)
+vmbus_msg_swintr(void *arg)
{
int cpu;
void* page_addr;
hv_vmbus_message* msg;
hv_vmbus_message* copied;
- cpu = PCPU_GET(cpuid);
+ cpu = (int)(long)arg;
+ KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: "
+ "cpu out of range!"));
+
page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
@@ -130,17 +128,8 @@ vmbus_msg_swintr(void *dummy)
*
* The purpose of this routine is to determine the type of VMBUS protocol
* message to process - an event or a channel message.
- * As this is an interrupt filter routine, the function runs in a very
- * restricted envinronment. From the manpage for bus_setup_intr(9)
- *
- * In this restricted environment, care must be taken to account for all
- * races. A careful analysis of races should be done as well. It is gener-
- * ally cheaper to take an extra interrupt, for example, than to protect
- * variables with spinlocks. Read, modify, write cycles of hardware regis-
- * ters need to be carefully analyzed if other threads are accessing the
- * same registers.
*/
-static int
+static inline int
hv_vmbus_isr(void *unused)
{
int cpu;
@@ -149,8 +138,6 @@ hv_vmbus_isr(void *unused)
void* page_addr;
cpu = PCPU_GET(cpuid);
- /* (Temporary limit) */
- KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
/*
* The Windows team has advised that we check for events
@@ -162,9 +149,21 @@ hv_vmbus_isr(void *unused)
event = (hv_vmbus_synic_event_flags*)
page_addr + HV_VMBUS_MESSAGE_SINT;
- /* Since we are a child, we only need to check bit 0 */
- if (synch_test_and_clear_bit(0, &event->flags32[0])) {
- swi_sched(event_swintr, 0);
+ if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+ (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
+ /* Since we are a child, we only need to check bit 0 */
+ if (synch_test_and_clear_bit(0, &event->flags32[0])) {
+ swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
+ }
+ } else {
+ /*
+ * On host with Win8 or above, we can directly look at
+ * the event page. If bit n is set, we have an interrupt
+ * on the channel with id n.
+ * Directly schedule the event software interrupt on
+ * current cpu.
+ */
+ swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
}
/* Check if there are actual msgs to be process */
@@ -172,12 +171,47 @@ hv_vmbus_isr(void *unused)
msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
- swi_sched(msg_swintr, 0);
+ swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0);
}
return FILTER_HANDLED;
}
+#ifdef HV_DEBUG_INTR
+uint32_t hv_intr_count = 0;
+#endif
+uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
+uint32_t hv_vmbus_intr_cpu[MAXCPU];
+
+void
+hv_vector_handler(struct trapframe *trap_frame)
+{
+#ifdef HV_DEBUG_INTR
+ int cpu;
+#endif
+
+ /*
+ * Disable preemption.
+ */
+ critical_enter();
+
+#ifdef HV_DEBUG_INTR
+ /*
+ * Do a little interrupt counting.
+ */
+ cpu = PCPU_GET(cpuid);
+ hv_vmbus_intr_cpu[cpu]++;
+ hv_intr_count++;
+#endif
+
+ hv_vmbus_isr(NULL);
+
+ /*
+ * Enable preemption.
+ */
+ critical_exit();
+}
+
static int
vmbus_read_ivar(
device_t dev,
@@ -316,6 +350,81 @@ vmbus_probe(device_t dev) {
return (BUS_PROBE_NOWILDCARD);
}
+#ifdef HYPERV
+extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback);
+
+/**
+ * @brief Find a free IDT slot and setup the interrupt handler.
+ */
+static int
+vmbus_vector_alloc(void)
+{
+ int vector;
+ uintptr_t func;
+ struct gate_descriptor *ip;
+
+ /*
+ * Search backwards form the highest IDT vector available for use
+ * as vmbus channel callback vector. We install 'hv_vmbus_callback'
+ * handler at that vector and use it to interrupt vcpus.
+ */
+ vector = APIC_SPURIOUS_INT;
+ while (--vector >= APIC_IPI_INTS) {
+ ip = &idt[vector];
+ func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+ if (func == (uintptr_t)&IDTVEC(rsvd)) {
+#ifdef __i386__
+ setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT,
+ SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#else
+ setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT,
+ SEL_KPL, 0);
+#endif
+
+ return (vector);
+ }
+ }
+ return (0);
+}
+
+/**
+ * @brief Restore the IDT slot to rsvd.
+ */
+static void
+vmbus_vector_free(int vector)
+{
+ uintptr_t func;
+ struct gate_descriptor *ip;
+
+ if (vector == 0)
+ return;
+
+ KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
+ ("invalid vector %d", vector));
+
+ ip = &idt[vector];
+ func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+ KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback),
+ ("invalid vector %d", vector));
+
+ setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+}
+
+#else /* HYPERV */
+
+static int
+vmbus_vector_alloc(void)
+{
+ return(0);
+}
+
+static void
+vmbus_vector_free(int vector)
+{
+}
+
+#endif /* HYPERV */
+
/**
* @brief Main vmbus driver initialization routine.
*
@@ -331,22 +440,7 @@ vmbus_probe(device_t dev) {
static int
vmbus_bus_init(void)
{
- struct ioapic_intsrc {
- struct intsrc io_intsrc;
- u_int io_irq;
- u_int io_intpin:8;
- u_int io_vector:8;
- u_int io_cpu:8;
- u_int io_activehi:1;
- u_int io_edgetrigger:1;
- u_int io_masked:1;
- int io_bus:4;
- uint32_t io_lowreg;
- };
- int i, ret;
- unsigned int vector = 0;
- struct intsrc *isrc;
- struct ioapic_intsrc *intpin;
+ int i, j, n, ret;
if (vmbus_inited)
return (0);
@@ -361,80 +455,100 @@ vmbus_bus_init(void)
return (ret);
}
- ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
- NULL, SWI_CLOCK, 0, &msg_swintr);
-
- if (ret)
- goto cleanup;
-
/*
- * Message SW interrupt handler checks a per-CPU page and
- * thus the thread needs to be bound to CPU-0 - which is where
- * all interrupts are processed.
+ * Find a free IDT slot for vmbus callback.
*/
- ret = intr_event_bind(hv_msg_intr_event, 0);
-
- if (ret)
- goto cleanup1;
+ hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc();
- ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
- NULL, SWI_CLOCK, 0, &event_swintr);
-
- if (ret)
- goto cleanup1;
+ if (hv_vmbus_g_context.hv_cb_vector == 0) {
+ if(bootverbose)
+ printf("Error VMBUS: Cannot find free IDT slot for "
+ "vmbus callback!\n");
+ goto cleanup;
+ }
- intr_res = bus_alloc_resource(vmbus_devp,
- SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
+ if(bootverbose)
+ printf("VMBUS: vmbus callback vector %d\n",
+ hv_vmbus_g_context.hv_cb_vector);
- if (intr_res == NULL) {
- ret = ENOMEM; /* XXXKYS: Need a better errno */
- goto cleanup2;
+ /*
+ * Notify the hypervisor of our vector.
+ */
+ setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
+
+ CPU_FOREACH(j) {
+ hv_vmbus_intr_cpu[j] = 0;
+ hv_vmbus_swintr_event_cpu[j] = 0;
+ hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
+ hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
+ hv_vmbus_g_context.event_swintr[j] = NULL;
+ hv_vmbus_g_context.msg_swintr[j] = NULL;
+
+ for (i = 0; i < 2; i++)
+ setup_args.page_buffers[2 * j + i] = NULL;
}
/*
- * Setup interrupt filter handler
+ * Per cpu setup.
*/
- ret = bus_setup_intr(vmbus_devp, intr_res,
- INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
- NULL, &vmbus_cookiep);
-
- if (ret != 0)
- goto cleanup3;
-
- ret = bus_bind_intr(vmbus_devp, intr_res, 0);
- if (ret != 0)
- goto cleanup4;
-
- isrc = intr_lookup_source(vmbus_irq);
- if ((isrc == NULL) || (isrc->is_event == NULL)) {
- ret = EINVAL;
- goto cleanup4;
- }
+ CPU_FOREACH(j) {
+ /*
+ * Setup software interrupt thread and handler for msg handling.
+ */
+ ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j],
+ "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0,
+ &hv_vmbus_g_context.msg_swintr[j]);
+ if (ret) {
+ if(bootverbose)
+ printf("VMBUS: failed to setup msg swi for "
+ "cpu %d\n", j);
+ goto cleanup1;
+ }
- /* vector = isrc->is_event->ie_vector; */
- intpin = (struct ioapic_intsrc *)isrc;
- vector = intpin->io_vector;
+ /*
+ * Bind the swi thread to the cpu.
+ */
+ ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j],
+ j);
+ if (ret) {
+ if(bootverbose)
+ printf("VMBUS: failed to bind msg swi thread "
+ "to cpu %d\n", j);
+ goto cleanup1;
+ }
- if(bootverbose)
- printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
+ /*
+ * Setup software interrupt thread and handler for
+ * event handling.
+ */
+ ret = swi_add(&hv_vmbus_g_context.hv_event_intr_event[j],
+ "hv_event", hv_vmbus_on_events, (void *)(long)j,
+ SWI_CLOCK, 0, &hv_vmbus_g_context.event_swintr[j]);
+ if (ret) {
+ if(bootverbose)
+ printf("VMBUS: failed to setup event swi for "
+ "cpu %d\n", j);
+ goto cleanup1;
+ }
- /**
- * Notify the hypervisor of our irq.
- */
- setup_args.vector = vector;
- for(i = 0; i < 2; i++) {
- setup_args.page_buffers[i] =
+ /*
+ * Prepare the per cpu msg and event pages to be called on each cpu.
+ */
+ for(i = 0; i < 2; i++) {
+ setup_args.page_buffers[2 * j + i] =
malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
- if (setup_args.page_buffers[i] == NULL) {
- KASSERT(setup_args.page_buffers[i] != NULL,
+ if (setup_args.page_buffers[2 * j + i] == NULL) {
+ KASSERT(setup_args.page_buffers[2 * j + i] != NULL,
("Error VMBUS: malloc failed!"));
- if (i > 0)
- free(setup_args.page_buffers[0], M_DEVBUF);
- goto cleanup4;
+ goto cleanup1;
+ }
}
}
- /* only CPU #0 supported at this time */
+ if (bootverbose)
+ printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
+ smp_started);
+
smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
/*
@@ -443,26 +557,32 @@ vmbus_bus_init(void)
ret = hv_vmbus_connect();
if (ret != 0)
- goto cleanup4;
+ goto cleanup1;
hv_vmbus_request_channel_offers();
return (ret);
- cleanup4:
-
+ cleanup1:
/*
- * remove swi, bus and intr resource
+ * Free pages alloc'ed
*/
- bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
+ for (n = 0; n < 2 * MAXCPU; n++)
+ if (setup_args.page_buffers[n] != NULL)
+ free(setup_args.page_buffers[n], M_DEVBUF);
- cleanup3:
- bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
-
- cleanup2:
- swi_remove(event_swintr);
+ /*
+ * remove swi and vmbus callback vector;
+ */
+ CPU_FOREACH(j) {
+ if (hv_vmbus_g_context.msg_swintr[j] != NULL)
+ swi_remove(hv_vmbus_g_context.msg_swintr[j]);
+ if (hv_vmbus_g_context.event_swintr[j] != NULL)
+ swi_remove(hv_vmbus_g_context.event_swintr[j]);
+ hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
+ hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
+ }
- cleanup1:
- swi_remove(msg_swintr);
+ vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
cleanup:
hv_vmbus_cleanup();
@@ -515,20 +635,24 @@ vmbus_bus_exit(void)
smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
- for(i = 0; i < 2; i++) {
+ for(i = 0; i < 2 * MAXCPU; i++) {
if (setup_args.page_buffers[i] != 0)
free(setup_args.page_buffers[i], M_DEVBUF);
}
hv_vmbus_cleanup();
- /* remove swi, bus and intr resource */
- bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
-
- bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
+ /* remove swi */
+ CPU_FOREACH(i) {
+ if (hv_vmbus_g_context.msg_swintr[i] != NULL)
+ swi_remove(hv_vmbus_g_context.msg_swintr[i]);
+ if (hv_vmbus_g_context.event_swintr[i] != NULL)
+ swi_remove(hv_vmbus_g_context.event_swintr[i]);
+ hv_vmbus_g_context.hv_msg_intr_event[i] = NULL;
+ hv_vmbus_g_context.hv_event_intr_event[i] = NULL;
+ }
- swi_remove(msg_swintr);
- swi_remove(event_swintr);
+ vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
return;
}
@@ -603,6 +727,6 @@ devclass_t vmbus_devclass;
DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
MODULE_VERSION(vmbus,1);
-/* TODO: We want to be earlier than SI_SUB_VFS */
-SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);
+/* We want to be started after SMP is initialized */
+SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL);
OpenPOWER on IntegriCloud