From e1016866c7ece00b60ccc7f0807fb1c2ebd0f3b8 Mon Sep 17 00:00:00 2001 From: jhb Date: Thu, 23 Jan 2014 20:21:39 +0000 Subject: MFC 257422,257661,258075,258476,258494,258579,258609,258699: Several enhancements to the I/O APIC support in bhyve including: - Move the I/O APIC device model from userspace into vmm.ko and add ioctls to assert and deassert I/O APIC pins. - Add HPET device emulation including a single timer block with 8 timers. - Remove the 'vdev' abstraction. Approved by: neel --- sys/amd64/include/vmm.h | 5 + sys/amd64/include/vmm_dev.h | 20 ++ sys/amd64/vmm/intel/vmx.c | 30 +- sys/amd64/vmm/io/ppt.c | 2 +- sys/amd64/vmm/io/vdev.c | 270 --------------- sys/amd64/vmm/io/vdev.h | 84 ----- sys/amd64/vmm/io/vhpet.c | 783 ++++++++++++++++++++++++++++++++++++++++++++ sys/amd64/vmm/io/vhpet.h | 44 +++ sys/amd64/vmm/io/vioapic.c | 475 +++++++++++++++++++++++++++ sys/amd64/vmm/io/vioapic.h | 50 +++ sys/amd64/vmm/io/vlapic.c | 99 +++--- sys/amd64/vmm/io/vlapic.h | 15 +- sys/amd64/vmm/vmm.c | 57 +++- sys/amd64/vmm/vmm_dev.c | 26 +- sys/amd64/vmm/vmm_ktr.h | 36 +- sys/amd64/vmm/vmm_lapic.c | 12 +- sys/amd64/vmm/vmm_lapic.h | 18 +- sys/modules/vmm/Makefile | 3 +- 18 files changed, 1556 insertions(+), 473 deletions(-) delete mode 100644 sys/amd64/vmm/io/vdev.c delete mode 100644 sys/amd64/vmm/io/vdev.h create mode 100644 sys/amd64/vmm/io/vhpet.c create mode 100644 sys/amd64/vmm/io/vhpet.h create mode 100644 sys/amd64/vmm/io/vioapic.c create mode 100644 sys/amd64/vmm/io/vioapic.h (limited to 'sys') diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index e8bc409..b6bb309 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -38,6 +38,8 @@ struct vm_memory_segment; struct seg_desc; struct vm_exit; struct vm_run; +struct vhpet; +struct vioapic; struct vlapic; struct vmspace; struct vm_object; @@ -116,10 +118,13 @@ int vm_nmi_pending(struct vm *vm, int vcpuid); void vm_nmi_clear(struct vm *vm, int vcpuid); uint64_t *vm_guest_msrs(struct vm *vm, int cpu); struct vlapic *vm_lapic(struct vm *vm, int cpu); +struct vioapic *vm_ioapic(struct vm *vm); +struct vhpet *vm_hpet(struct vm *vm); int vm_get_capability(struct vm *vm, int vcpu, int type, int *val); int vm_set_capability(struct vm *vm, int vcpu, int type, int val); int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state); int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state); +int vm_apicid2vcpuid(struct vm *vm, int apicid); void vm_activate_cpu(struct vm *vm, int vcpu); cpuset_t vm_active_cpus(struct vm *vm); struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index bf014cc..19a5b02 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -71,6 +71,10 @@ struct vm_lapic_irq { int vector; }; +struct vm_ioapic_irq { + int irq; +}; + struct vm_capability { int cpuid; enum vm_cap_type captype; @@ -142,6 +146,10 @@ struct vm_gpa_pte { int ptenum; }; +struct vm_hpet_cap { + uint32_t capabilities; /* lower 32 bits of HPET capabilities */ +}; + enum { /* general routines */ IOCNUM_ABIVERS = 0, @@ -164,6 +172,9 @@ enum { IOCNUM_INJECT_EVENT = 30, IOCNUM_LAPIC_IRQ = 31, IOCNUM_INJECT_NMI = 32, + IOCNUM_IOAPIC_ASSERT_IRQ = 33, + IOCNUM_IOAPIC_DEASSERT_IRQ = 34, + IOCNUM_IOAPIC_PULSE_IRQ = 35, /* PCI pass-thru */ IOCNUM_BIND_PPTDEV = 40, @@ -179,6 +190,7 @@ enum { /* kernel device state */ IOCNUM_SET_X2APIC_STATE = 60, IOCNUM_GET_X2APIC_STATE = 61, + IOCNUM_GET_HPET_CAPABILITIES = 62, }; #define VM_RUN \ @@ -199,6 +211,12 @@ enum { _IOW('v', IOCNUM_INJECT_EVENT, struct vm_event) #define VM_LAPIC_IRQ \ _IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq) +#define VM_IOAPIC_ASSERT_IRQ \ + _IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq) +#define VM_IOAPIC_DEASSERT_IRQ \ + _IOW('v', IOCNUM_IOAPIC_DEASSERT_IRQ, struct vm_ioapic_irq) +#define VM_IOAPIC_PULSE_IRQ \ + _IOW('v', IOCNUM_IOAPIC_PULSE_IRQ, struct vm_ioapic_irq) #define VM_SET_CAPABILITY \ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability) #define VM_GET_CAPABILITY \ @@ -223,6 +241,8 @@ enum { _IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic) #define VM_GET_X2APIC_STATE \ _IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic) +#define VM_GET_HPET_CAPABILITIES \ + _IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap) #define VM_GET_GPA_PMAP \ _IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte) #endif diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 1241b14..c89c397 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -308,8 +308,8 @@ vmx_setjmp_rc2str(int rc) } } -#define SETJMP_TRACE(vmx, vcpu, vmxctx, regname) \ - VMM_CTR1((vmx)->vm, (vcpu), "setjmp trace " #regname " 0x%016lx", \ +#define SETJMP_TRACE(vmx, vcpu, vmxctx, regname) \ + VCPU_CTR1((vmx)->vm, (vcpu), "setjmp trace " #regname " 0x%016lx", \ (vmxctx)->regname) static void @@ -321,14 +321,14 @@ vmx_setjmp_trace(struct vmx *vmx, int vcpu, struct vmxctx *vmxctx, int rc) panic("vmx_setjmp_trace: invalid vmxctx %p; should be %p", vmxctx, &vmx->ctx[vcpu]); - VMM_CTR1((vmx)->vm, (vcpu), "vmxctx = %p", vmxctx); - VMM_CTR2((vmx)->vm, (vcpu), "setjmp return code %s(%d)", + VCPU_CTR1((vmx)->vm, (vcpu), "vmxctx = %p", vmxctx); + VCPU_CTR2((vmx)->vm, (vcpu), "setjmp return code %s(%d)", vmx_setjmp_rc2str(rc), rc); host_rsp = host_rip = ~0; vmread(VMCS_HOST_RIP, &host_rip); vmread(VMCS_HOST_RSP, &host_rsp); - VMM_CTR2((vmx)->vm, (vcpu), "vmcs host_rip 0x%016lx, host_rsp 0x%016lx", + VCPU_CTR2((vmx)->vm, (vcpu), "vmcs host_rip 0x%016lx, host_rsp %#lx", host_rip, host_rsp); SETJMP_TRACE(vmx, vcpu, vmxctx, host_r15); @@ -887,7 +887,7 @@ static __inline void vmx_run_trace(struct vmx *vmx, int vcpu) { #ifdef KTR - VMM_CTR1(vmx->vm, vcpu, "Resume execution at 0x%0lx", vmcs_guest_rip()); + VCPU_CTR1(vmx->vm, vcpu, "Resume execution at %#lx", vmcs_guest_rip()); #endif } @@ -896,7 +896,7 @@ vmx_exit_trace(struct vmx *vmx, int vcpu, uint64_t rip, uint32_t exit_reason, int handled) { #ifdef KTR - VMM_CTR3(vmx->vm, vcpu, "%s %s vmexit at 0x%0lx", + VCPU_CTR3(vmx->vm, vcpu, "%s %s vmexit at 0x%0lx", handled ? "handled" : "unhandled", exit_reason_to_str(exit_reason), rip); #endif @@ -906,7 +906,7 @@ static __inline void vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip) { #ifdef KTR - VMM_CTR1(vmx->vm, vcpu, "astpending vmexit at 0x%0lx", rip); + VCPU_CTR1(vmx->vm, vcpu, "astpending vmexit at 0x%0lx", rip); #endif } @@ -1055,7 +1055,7 @@ vmx_inject_nmi(struct vmx *vmx, int vcpu) if (error) panic("vmx_inject_nmi: vmwrite(intrinfo) %d", error); - VMM_CTR0(vmx->vm, vcpu, "Injecting vNMI"); + VCPU_CTR0(vmx->vm, vcpu, "Injecting vNMI"); /* Clear the request */ vm_nmi_clear(vmx->vm, vcpu); @@ -1068,7 +1068,7 @@ nmiblocked: */ vmx_set_nmi_window_exiting(vmx, vcpu); - VMM_CTR0(vmx->vm, vcpu, "Enabling NMI window exiting"); + VCPU_CTR0(vmx->vm, vcpu, "Enabling NMI window exiting"); return (1); } @@ -1134,7 +1134,7 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu) /* Update the Local APIC ISR */ lapic_intr_accepted(vmx->vm, vcpu, vector); - VMM_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector); + VCPU_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector); return; @@ -1145,7 +1145,7 @@ cantinject: */ vmx_set_int_window_exiting(vmx, vcpu); - VMM_CTR0(vmx->vm, vcpu, "Enabling interrupt window exiting"); + VCPU_CTR0(vmx->vm, vcpu, "Enabling interrupt window exiting"); } static int @@ -1435,7 +1435,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) case EXIT_REASON_INTR_WINDOW: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INTR_WINDOW, 1); vmx_clear_int_window_exiting(vmx, vcpu); - VMM_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting"); + VCPU_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting"); return (1); case EXIT_REASON_EXT_INTR: /* @@ -1458,7 +1458,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) /* Exit to allow the pending virtual NMI to be injected */ vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NMI_WINDOW, 1); vmx_clear_nmi_window_exiting(vmx, vcpu); - VMM_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting"); + VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting"); return (1); case EXIT_REASON_INOUT: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1); @@ -1659,7 +1659,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap) if (!handled) vmm_stat_incr(vmx->vm, vcpu, VMEXIT_USERSPACE, 1); - VMM_CTR1(vmx->vm, vcpu, "goto userland: exitcode %d",vmexit->exitcode); + VCPU_CTR1(vmx->vm, vcpu, "goto userland: exitcode %d",vmexit->exitcode); /* * XXX diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index a6061e9..fce4bbd 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -421,7 +421,7 @@ pptintr(void *arg) vec = pptarg->vec; if (ppt->vm != NULL) - (void) lapic_set_intr(ppt->vm, pptarg->vcpu, vec); + lapic_intr_edge(ppt->vm, pptarg->vcpu, vec); else { /* * XXX diff --git a/sys/amd64/vmm/io/vdev.c b/sys/amd64/vmm/io/vdev.c deleted file mode 100644 index cd6c5d1..0000000 --- a/sys/amd64/vmm/io/vdev.c +++ /dev/null @@ -1,270 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include - -#include "vdev.h" - -struct vdev { - SLIST_ENTRY(vdev) entry; - struct vdev_ops *ops; - void *dev; -}; -static SLIST_HEAD(, vdev) vdev_head; -static int vdev_count; - -struct vdev_region { - SLIST_ENTRY(vdev_region) entry; - struct vdev_ops *ops; - void *dev; - struct io_region *io; -}; -static SLIST_HEAD(, vdev_region) region_head; -static int region_count; - -static MALLOC_DEFINE(M_VDEV, "vdev", "vdev"); - -#define VDEV_INIT (0) -#define VDEV_RESET (1) -#define VDEV_HALT (2) - -// static const char* vdev_event_str[] = {"VDEV_INIT", "VDEV_RESET", "VDEV_HALT"}; - -static int -vdev_system_event(int event) -{ - struct vdev *vd; - int rc; - - // TODO: locking - SLIST_FOREACH(vd, &vdev_head, entry) { - // printf("%s : %s Device %s\n", __func__, vdev_event_str[event], vd->ops->name); - switch (event) { - case VDEV_INIT: - rc = vd->ops->init(vd->dev); - break; - case VDEV_RESET: - rc = vd->ops->reset(vd->dev); - break; - case VDEV_HALT: - rc = vd->ops->halt(vd->dev); - break; - default: - break; - } - if (rc) { - printf("vdev %s init failed rc=%d\n", - vd->ops->name, rc); - return rc; - } - } - return 0; -} - -int -vdev_init(void) -{ - return vdev_system_event(VDEV_INIT); -} - -int -vdev_reset(void) -{ - return vdev_system_event(VDEV_RESET); -} - -int -vdev_halt(void) -{ - return vdev_system_event(VDEV_HALT); -} - -void -vdev_vm_init(void) -{ - SLIST_INIT(&vdev_head); - vdev_count = 0; - - SLIST_INIT(®ion_head); - region_count = 0; -} -void -vdev_vm_cleanup(void) -{ - struct vdev *vd; - - // TODO: locking - while (!SLIST_EMPTY(&vdev_head)) { - vd = SLIST_FIRST(&vdev_head); - SLIST_REMOVE_HEAD(&vdev_head, entry); - free(vd, M_VDEV); - vdev_count--; - } -} - -int -vdev_register(struct vdev_ops *ops, void *dev) -{ - struct vdev *vd; - vd = malloc(sizeof(*vd), M_VDEV, M_WAITOK | M_ZERO); - vd->ops = ops; - vd->dev = dev; - - // TODO: locking - SLIST_INSERT_HEAD(&vdev_head, vd, entry); - vdev_count++; - return 0; -} - -void -vdev_unregister(void *dev) -{ - struct vdev *vd, *found; - - found = NULL; - // TODO: locking - SLIST_FOREACH(vd, &vdev_head, entry) { - if (vd->dev == dev) { - found = vd; - } - } - - if (found) { - SLIST_REMOVE(&vdev_head, found, vdev, entry); - free(found, M_VDEV); - } -} - -#define IN_RANGE(val, start, end) \ - (((val) >= (start)) && ((val) < (end))) - -static struct vdev_region* -vdev_find_region(struct io_region *io, void *dev) -{ - struct vdev_region *region, *found; - uint64_t region_base; - uint64_t region_end; - - found = NULL; - - // TODO: locking - // FIXME: we should verify we are in the context the current - // vcpu here as well. - SLIST_FOREACH(region, ®ion_head, entry) { - region_base = region->io->base; - region_end = region_base + region->io->len; - if (IN_RANGE(io->base, region_base, region_end) && - IN_RANGE(io->base+io->len, region_base, region_end+1) && - (dev && dev == region->dev)) { - found = region; - break; - } - } - return found; -} - -int -vdev_register_region(struct vdev_ops *ops, void *dev, struct io_region *io) -{ - struct vdev_region *region; - - region = vdev_find_region(io, dev); - if (region) { - return -EEXIST; - } - - region = malloc(sizeof(*region), M_VDEV, M_WAITOK | M_ZERO); - region->io = io; - region->ops = ops; - region->dev = dev; - - // TODO: locking - SLIST_INSERT_HEAD(®ion_head, region, entry); - region_count++; - - return 0; -} - -void -vdev_unregister_region(void *dev, struct io_region *io) -{ - struct vdev_region *region; - - region = vdev_find_region(io, dev); - - if (region) { - SLIST_REMOVE(®ion_head, region, vdev_region, entry); - free(region, M_VDEV); - region_count--; - } -} - -static int -vdev_memrw(uint64_t gpa, opsize_t size, uint64_t *data, int read) -{ - struct vdev_region *region; - struct io_region io; - region_attr_t attr; - int rc; - - io.base = gpa; - io.len = size; - - region = vdev_find_region(&io, NULL); - if (!region) - return -EINVAL; - - attr = (read) ? MMIO_READ : MMIO_WRITE; - if (!(region->io->attr & attr)) - return -EPERM; - - if (read) - rc = region->ops->memread(region->dev, gpa, size, data); - else - rc = region->ops->memwrite(region->dev, gpa, size, *data); - - return rc; -} - -int -vdev_memread(uint64_t gpa, opsize_t size, uint64_t *data) -{ - return vdev_memrw(gpa, size, data, 1); -} - -int -vdev_memwrite(uint64_t gpa, opsize_t size, uint64_t data) -{ - return vdev_memrw(gpa, size, &data, 0); -} diff --git a/sys/amd64/vmm/io/vdev.h b/sys/amd64/vmm/io/vdev.h deleted file mode 100644 index 6feeba8..0000000 --- a/sys/amd64/vmm/io/vdev.h +++ /dev/null @@ -1,84 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _VDEV_H_ -#define _VDEV_H_ - -typedef enum { - BYTE = 1, - WORD = 2, - DWORD = 4, - QWORD = 8, -} opsize_t; - -typedef enum { - MMIO_READ = 1, - MMIO_WRITE = 2, -} region_attr_t; - -struct io_region { - uint64_t base; - uint64_t len; - region_attr_t attr; - int vcpu; -}; - -typedef int (*vdev_init_t)(void* dev); -typedef int (*vdev_reset_t)(void* dev); -typedef int (*vdev_halt_t)(void* dev); -typedef int (*vdev_memread_t)(void* dev, uint64_t gpa, opsize_t size, uint64_t *data); -typedef int (*vdev_memwrite_t)(void* dev, uint64_t gpa, opsize_t size, uint64_t data); - - -struct vdev_ops { - const char *name; - vdev_init_t init; - vdev_reset_t reset; - vdev_halt_t halt; - vdev_memread_t memread; - vdev_memwrite_t memwrite; -}; - - -void vdev_vm_init(void); -void vdev_vm_cleanup(void); - -int vdev_register(struct vdev_ops *ops, void *dev); -void vdev_unregister(void *dev); - -int vdev_register_region(struct vdev_ops *ops, void *dev, struct io_region *io); -void vdev_unregister_region(void *dev, struct io_region *io); - -int vdev_init(void); -int vdev_reset(void); -int vdev_halt(void); -int vdev_memread(uint64_t gpa, opsize_t size, uint64_t *data); -int vdev_memwrite(uint64_t gpa, opsize_t size, uint64_t data); - -#endif /* _VDEV_H_ */ - diff --git a/sys/amd64/vmm/io/vhpet.c b/sys/amd64/vmm/io/vhpet.c new file mode 100644 index 0000000..112480ee --- /dev/null +++ b/sys/amd64/vmm/io/vhpet.c @@ -0,0 +1,783 @@ +/*- + * Copyright (c) 2013 Tycho Nightingale + * Copyright (c) 2013 Neel Natu + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "vmm_lapic.h" +#include "vioapic.h" +#include "vhpet.h" + +#include "vmm_ktr.h" + +static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet"); + +#define HPET_FREQ 10000000 /* 10.0 Mhz */ +#define FS_PER_S 1000000000000000ul + +/* Timer N Configuration and Capabilities Register */ +#define HPET_TCAP_RO_MASK (HPET_TCAP_INT_ROUTE | \ + HPET_TCAP_FSB_INT_DEL | \ + HPET_TCAP_SIZE | \ + HPET_TCAP_PER_INT) +/* + * HPET requires at least 3 timers and up to 32 timers per block. + */ +#define VHPET_NUM_TIMERS 8 +CTASSERT(VHPET_NUM_TIMERS >= 3 && VHPET_NUM_TIMERS <= 32); + +struct vhpet_callout_arg { + struct vhpet *vhpet; + int timer_num; +}; + +struct vhpet { + struct vm *vm; + struct mtx mtx; + sbintime_t freq_sbt; + + uint64_t config; /* Configuration */ + uint64_t isr; /* Interrupt Status */ + uint32_t counter; /* HPET Counter */ + sbintime_t counter_sbt; + + struct { + uint64_t cap_config; /* Configuration */ + uint64_t msireg; /* FSB interrupt routing */ + uint32_t compval; /* Comparator */ + uint32_t comprate; + struct callout callout; + struct vhpet_callout_arg arg; + } timer[VHPET_NUM_TIMERS]; +}; + +#define VHPET_LOCK(vhp) mtx_lock(&((vhp)->mtx)) +#define VHPET_UNLOCK(vhp) mtx_unlock(&((vhp)->mtx)) + +static uint64_t +vhpet_capabilities(void) +{ + uint64_t cap = 0; + + cap |= 0x8086 << 16; /* vendor id */ + cap |= HPET_CAP_LEG_RT; /* legacy routing capable */ + cap |= (VHPET_NUM_TIMERS - 1) << 8; /* number of timers */ + cap |= 1; /* revision */ + cap &= ~HPET_CAP_COUNT_SIZE; /* 32-bit timer */ + + cap &= 0xffffffff; + cap |= (FS_PER_S / HPET_FREQ) << 32; /* tick period in fs */ + + return (cap); +} + +static __inline bool +vhpet_counter_enabled(struct vhpet *vhpet) +{ + + return ((vhpet->config & HPET_CNF_ENABLE) ? true : false); +} + +static __inline bool +vhpet_timer_msi_enabled(struct vhpet *vhpet, int n) +{ + const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN; + + /* + * LegacyReplacement Route configuration takes precedence over MSI + * for timers 0 and 1. + */ + if (n == 0 || n == 1) { + if (vhpet->config & HPET_CNF_LEG_RT) + return (false); + } + + if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable) + return (true); + else + return (false); +} + +static __inline int +vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n) +{ + /* + * If the timer is configured to use MSI then treat it as if the + * timer is not connected to the ioapic. + */ + if (vhpet_timer_msi_enabled(vhpet, n)) + return (0); + + if (vhpet->config & HPET_CNF_LEG_RT) { + /* + * In "legacy routing" timers 0 and 1 are connected to + * ioapic pins 2 and 8 respectively. + */ + switch (n) { + case 0: + return (2); + case 1: + return (8); + } + } + + return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9); +} + +static uint32_t +vhpet_counter(struct vhpet *vhpet, bool latch) +{ + uint32_t val; + sbintime_t cur_sbt, delta_sbt; + + val = vhpet->counter; + if (vhpet_counter_enabled(vhpet)) { + cur_sbt = sbinuptime(); + delta_sbt = cur_sbt - vhpet->counter_sbt; + KASSERT(delta_sbt >= 0, + ("vhpet counter went backwards: %#lx to %#lx", + vhpet->counter_sbt, cur_sbt)); + val += delta_sbt / vhpet->freq_sbt; + + /* + * Keep track of the last value of the main counter that + * was read by the guest. + */ + if (latch) { + vhpet->counter = val; + vhpet->counter_sbt = cur_sbt; + } + } + + return (val); +} + +static void +vhpet_timer_clear_isr(struct vhpet *vhpet, int n) +{ + int pin; + + if (vhpet->isr & (1 << n)) { + pin = vhpet_timer_ioapic_pin(vhpet, n); + KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n)); + vioapic_deassert_irq(vhpet->vm, pin); + vhpet->isr &= ~(1 << n); + } +} + +static __inline bool +vhpet_periodic_timer(struct vhpet *vhpet, int n) +{ + + return ((vhpet->timer[n].cap_config & HPET_TCNF_TYPE) != 0); +} + +static __inline bool +vhpet_timer_interrupt_enabled(struct vhpet *vhpet, int n) +{ + + return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ENB) != 0); +} + +static __inline bool +vhpet_timer_edge_trig(struct vhpet *vhpet, int n) +{ + + KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: " + "timer %d is using MSI", n)); + + /* The legacy replacement interrupts are always edge triggered */ + if (vhpet->config & HPET_CNF_LEG_RT) { + if (n == 0 || n == 1) + return (true); + } + + if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0) + return (true); + else + return (false); +} + +static void +vhpet_timer_interrupt(struct vhpet *vhpet, int n) +{ + int apicid, vector, vcpuid, pin; + cpuset_t dmask; + + /* If interrupts are not enabled for this timer then just return. */ + if (!vhpet_timer_interrupt_enabled(vhpet, n)) + return; + + /* + * If a level triggered interrupt is already asserted then just return. + */ + if ((vhpet->isr & (1 << n)) != 0) { + VM_CTR1(vhpet->vm, "hpet t%d intr is already asserted", n); + return; + } + + if (vhpet_timer_msi_enabled(vhpet, n)) { + /* + * XXX should have an API 'vlapic_deliver_msi(vm, addr, data)' + * - assuming physical delivery mode + * - no need to interpret contents of 'msireg' here + */ + vector = vhpet->timer[n].msireg & 0xff; + apicid = (vhpet->timer[n].msireg >> (32 + 12)) & 0xff; + if (apicid != 0xff) { + /* unicast */ + vcpuid = vm_apicid2vcpuid(vhpet->vm, apicid); + lapic_intr_edge(vhpet->vm, vcpuid, vector); + } else { + /* broadcast */ + dmask = vm_active_cpus(vhpet->vm); + while ((vcpuid = CPU_FFS(&dmask)) != 0) { + vcpuid--; + CPU_CLR(vcpuid, &dmask); + lapic_intr_edge(vhpet->vm, vcpuid, vector); + } + } + return; + } + + pin = vhpet_timer_ioapic_pin(vhpet, n); + if (pin == 0) { + VM_CTR1(vhpet->vm, "hpet t%d intr is not routed to ioapic", n); + return; + } + + if (vhpet_timer_edge_trig(vhpet, n)) { + vioapic_pulse_irq(vhpet->vm, pin); + } else { + vhpet->isr |= 1 << n; + vioapic_assert_irq(vhpet->vm, pin); + } +} + +static void +vhpet_adjust_compval(struct vhpet *vhpet, int n, uint32_t counter) +{ + uint32_t compval, comprate, compnext; + + KASSERT(vhpet->timer[n].comprate != 0, ("hpet t%d is not periodic", n)); + + compval = vhpet->timer[n].compval; + comprate = vhpet->timer[n].comprate; + + /* + * Calculate the comparator value to be used for the next periodic + * interrupt. + * + * This function is commonly called from the callout handler. + * In this scenario the 'counter' is ahead of 'compval'. To find + * the next value to program into the accumulator we divide the + * number space between 'compval' and 'counter' into 'comprate' + * sized units. The 'compval' is rounded up such that is "ahead" + * of 'counter'. + */ + compnext = compval + ((counter - compval) / comprate + 1) * comprate; + + vhpet->timer[n].compval = compnext; +} + +static void +vhpet_handler(void *a) +{ + int n; + uint32_t counter; + sbintime_t sbt; + struct vhpet *vhpet; + struct callout *callout; + struct vhpet_callout_arg *arg; + + arg = a; + vhpet = arg->vhpet; + n = arg->timer_num; + callout = &vhpet->timer[n].callout; + + VM_CTR1(vhpet->vm, "hpet t%d fired", n); + + VHPET_LOCK(vhpet); + + if (callout_pending(callout)) /* callout was reset */ + goto done; + + if (!callout_active(callout)) /* callout was stopped */ + goto done; + + callout_deactivate(callout); + + if (!vhpet_counter_enabled(vhpet)) + panic("vhpet(%p) callout with counter disabled", vhpet); + + counter = vhpet_counter(vhpet, false); + + /* Update the accumulator for periodic timers */ + if (vhpet->timer[n].comprate != 0) + vhpet_adjust_compval(vhpet, n, counter); + + sbt = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt; + callout_reset_sbt(callout, sbt, 0, vhpet_handler, arg, 0); + vhpet_timer_interrupt(vhpet, n); +done: + VHPET_UNLOCK(vhpet); + return; +} + +static void +vhpet_stop_timer(struct vhpet *vhpet, int n) +{ + + callout_stop(&vhpet->timer[n].callout); + vhpet_timer_clear_isr(vhpet, n); +} + +static void +vhpet_start_timer(struct vhpet *vhpet, int n) +{ + uint32_t counter, delta, delta2; + sbintime_t sbt; + + counter = vhpet_counter(vhpet, false); + + if (vhpet->timer[n].comprate != 0) + vhpet_adjust_compval(vhpet, n, counter); + + delta = vhpet->timer[n].compval - counter; + + /* + * In one-shot mode the guest will typically read the main counter + * before programming the comparator. We can use this heuristic to + * figure out whether the expiration time is in the past. If this + * is the case we schedule the callout to fire immediately. + */ + if (!vhpet_periodic_timer(vhpet, n)) { + delta2 = vhpet->timer[n].compval - vhpet->counter; + if (delta > delta2) { + VM_CTR3(vhpet->vm, "hpet t%d comparator value is in " + "the past: %u/%u/%u", counter, + vhpet->timer[n].compval, vhpet->counter); + delta = 0; + } + } + + sbt = delta * vhpet->freq_sbt; + callout_reset_sbt(&vhpet->timer[n].callout, sbt, 0, vhpet_handler, + &vhpet->timer[n].arg, 0); +} + +static void +vhpet_start_counting(struct vhpet *vhpet) +{ + int i; + + vhpet->counter_sbt = sbinuptime(); + for (i = 0; i < VHPET_NUM_TIMERS; i++) + vhpet_start_timer(vhpet, i); +} + +static void +vhpet_stop_counting(struct vhpet *vhpet) +{ + int i; + + for (i = 0; i < VHPET_NUM_TIMERS; i++) + vhpet_stop_timer(vhpet, i); +} + +static __inline void +update_register(uint64_t *regptr, uint64_t data, uint64_t mask) +{ + + *regptr &= ~mask; + *regptr |= (data & mask); +} + +static void +vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data, + uint64_t mask) +{ + bool clear_isr; + int old_pin, new_pin; + uint32_t allowed_irqs; + uint64_t oldval, newval; + + if (vhpet_timer_msi_enabled(vhpet, n) || + vhpet_timer_edge_trig(vhpet, n)) { + if (vhpet->isr & (1 << n)) + panic("vhpet timer %d isr should not be asserted", n); + } + old_pin = vhpet_timer_ioapic_pin(vhpet, n); + oldval = vhpet->timer[n].cap_config; + + newval = oldval; + update_register(&newval, data, mask); + newval &= ~(HPET_TCAP_RO_MASK | HPET_TCNF_32MODE); + newval |= oldval & HPET_TCAP_RO_MASK; + + if (newval == oldval) + return; + + vhpet->timer[n].cap_config = newval; + VM_CTR2(vhpet->vm, "hpet t%d cap_config set to 0x%016x", n, newval); + + /* + * Validate the interrupt routing in the HPET_TCNF_INT_ROUTE field. + * If it does not match the bits set in HPET_TCAP_INT_ROUTE then set + * it to the default value of 0. + */ + allowed_irqs = vhpet->timer[n].cap_config >> 32; + new_pin = vhpet_timer_ioapic_pin(vhpet, n); + if (new_pin != 0 && (allowed_irqs & (1 << new_pin)) == 0) { + VM_CTR3(vhpet->vm, "hpet t%d configured invalid irq %d, " + "allowed_irqs 0x%08x", n, new_pin, allowed_irqs); + new_pin = 0; + vhpet->timer[n].cap_config &= ~HPET_TCNF_INT_ROUTE; + } + + if (!vhpet_periodic_timer(vhpet, n)) + vhpet->timer[n].comprate = 0; + + /* + * If the timer's ISR bit is set then clear it in the following cases: + * - interrupt is disabled + * - interrupt type is changed from level to edge or fsb. + * - interrupt routing is changed + * + * This is to ensure that this timer's level triggered interrupt does + * not remain asserted forever. + */ + if (vhpet->isr & (1 << n)) { + KASSERT(old_pin != 0, ("timer %d isr asserted to ioapic pin %d", + n, old_pin)); + if (!vhpet_timer_interrupt_enabled(vhpet, n)) + clear_isr = true; + else if (vhpet_timer_msi_enabled(vhpet, n)) + clear_isr = true; + else if (vhpet_timer_edge_trig(vhpet, n)) + clear_isr = true; + else if (vhpet_timer_ioapic_pin(vhpet, n) != old_pin) + clear_isr = true; + else + clear_isr = false; + + if (clear_isr) { + VM_CTR1(vhpet->vm, "hpet t%d isr cleared due to " + "configuration change", n); + vioapic_deassert_irq(vhpet->vm, old_pin); + vhpet->isr &= ~(1 << n); + } + } +} + +int +vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size, + void *arg) +{ + struct vhpet *vhpet; + uint64_t data, mask, oldval, val64; + uint32_t isr_clear_mask, old_compval, old_comprate; + int i, offset; + + vhpet = vm_hpet(vm); + offset = gpa - VHPET_BASE; + + VHPET_LOCK(vhpet); + + /* Accesses to the HPET should be 4 or 8 bytes wide */ + switch (size) { + case 8: + mask = 0xffffffffffffffff; + data = val; + break; + case 4: + mask = 0xffffffff; + data = val; + if ((offset & 0x4) != 0) { + mask <<= 32; + data <<= 32; + } + break; + default: + VM_CTR2(vhpet->vm, "hpet invalid mmio write: " + "offset 0x%08x, size %d", offset, size); + goto done; + } + + /* Access to the HPET should be naturally aligned to its width */ + if (offset & (size - 1)) { + VM_CTR2(vhpet->vm, "hpet invalid mmio write: " + "offset 0x%08x, size %d", offset, size); + goto done; + } + + if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) { + oldval = vhpet->config; + update_register(&vhpet->config, data, mask); + if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) { + if (vhpet_counter_enabled(vhpet)) { + vhpet_start_counting(vhpet); + VM_CTR0(vhpet->vm, "hpet enabled"); + } else { + vhpet_stop_counting(vhpet); + VM_CTR0(vhpet->vm, "hpet disabled"); + } + } + goto done; + } + + if (offset == HPET_ISR || offset == HPET_ISR + 4) { + isr_clear_mask = vhpet->isr & data; + for (i = 0; i < VHPET_NUM_TIMERS; i++) { + if ((isr_clear_mask & (1 << i)) != 0) { + VM_CTR1(vhpet->vm, "hpet t%d isr cleared", i); + vhpet_timer_clear_isr(vhpet, i); + } + } + goto done; + } + + if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) { + /* Zero-extend the counter to 64-bits before updating it */ + val64 = vhpet->counter; + update_register(&val64, data, mask); + vhpet->counter = val64; + if (vhpet_counter_enabled(vhpet)) + vhpet_start_counting(vhpet); + goto done; + } + + for (i = 0; i < VHPET_NUM_TIMERS; i++) { + if (offset == HPET_TIMER_CAP_CNF(i) || + offset == HPET_TIMER_CAP_CNF(i) + 4) { + vhpet_timer_update_config(vhpet, i, data, mask); + break; + } + + if (offset == HPET_TIMER_COMPARATOR(i) || + offset == HPET_TIMER_COMPARATOR(i) + 4) { + old_compval = vhpet->timer[i].compval; + old_comprate = vhpet->timer[i].comprate; + if (vhpet_periodic_timer(vhpet, i)) { + /* + * In periodic mode writes to the comparator + * change the 'compval' register only if the + * HPET_TCNF_VAL_SET bit is set in the config + * register. + */ + val64 = vhpet->timer[i].comprate; + update_register(&val64, data, mask); + vhpet->timer[i].comprate = val64; + if ((vhpet->timer[i].cap_config & + HPET_TCNF_VAL_SET) != 0) { + vhpet->timer[i].compval = val64; + } + } else { + KASSERT(vhpet->timer[i].comprate == 0, + ("vhpet one-shot timer %d has invalid " + "rate %u", i, vhpet->timer[i].comprate)); + val64 = vhpet->timer[i].compval; + update_register(&val64, data, mask); + vhpet->timer[i].compval = val64; + } + vhpet->timer[i].cap_config &= ~HPET_TCNF_VAL_SET; + + if (vhpet->timer[i].compval != old_compval || + vhpet->timer[i].comprate != old_comprate) { + if (vhpet_counter_enabled(vhpet)) + vhpet_start_timer(vhpet, i); + } + break; + } + + if (offset == HPET_TIMER_FSB_VAL(i) || + offset == HPET_TIMER_FSB_ADDR(i)) { + update_register(&vhpet->timer[i].msireg, data, mask); + break; + } + } +done: + VHPET_UNLOCK(vhpet); + return (0); +} + +int +vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, int size, + void *arg) +{ + int i, offset; + struct vhpet *vhpet; + uint64_t data; + + vhpet = vm_hpet(vm); + offset = gpa - VHPET_BASE; + + VHPET_LOCK(vhpet); + + /* Accesses to the HPET should be 4 or 8 bytes wide */ + if (size != 4 && size != 8) { + VM_CTR2(vhpet->vm, "hpet invalid mmio read: " + "offset 0x%08x, size %d", offset, size); + data = 0; + goto done; + } + + /* Access to the HPET should be naturally aligned to its width */ + if (offset & (size - 1)) { + VM_CTR2(vhpet->vm, "hpet invalid mmio read: " + "offset 0x%08x, size %d", offset, size); + data = 0; + goto done; + } + + if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) { + data = vhpet_capabilities(); + goto done; + } + + if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) { + data = vhpet->config; + goto done; + } + + if (offset == HPET_ISR || offset == HPET_ISR + 4) { + data = vhpet->isr; + goto done; + } + + if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) { + data = vhpet_counter(vhpet, true); + goto done; + } + + for (i = 0; i < VHPET_NUM_TIMERS; i++) { + if (offset == HPET_TIMER_CAP_CNF(i) || + offset == HPET_TIMER_CAP_CNF(i) + 4) { + data = vhpet->timer[i].cap_config; + break; + } + + if (offset == HPET_TIMER_COMPARATOR(i) || + offset == HPET_TIMER_COMPARATOR(i) + 4) { + data = vhpet->timer[i].compval; + break; + } + + if (offset == HPET_TIMER_FSB_VAL(i) || + offset == HPET_TIMER_FSB_ADDR(i)) { + data = vhpet->timer[i].msireg; + break; + } + } + + if (i >= VHPET_NUM_TIMERS) + data = 0; +done: + VHPET_UNLOCK(vhpet); + + if (size == 4) { + if (offset & 0x4) + data >>= 32; + } + *rval = data; + return (0); +} + +struct vhpet * +vhpet_init(struct vm *vm) +{ + int i, pincount; + struct vhpet *vhpet; + uint64_t allowed_irqs; + struct vhpet_callout_arg *arg; + struct bintime bt; + + vhpet = malloc(sizeof(struct vhpet), M_VHPET, M_WAITOK | M_ZERO); + vhpet->vm = vm; + mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF); + + FREQ2BT(HPET_FREQ, &bt); + vhpet->freq_sbt = bttosbt(bt); + + pincount = vioapic_pincount(vm); + if (pincount >= 24) + allowed_irqs = 0x00f00000; /* irqs 20, 21, 22 and 23 */ + else + allowed_irqs = 0; + + /* + * Initialize HPET timer hardware state. + */ + for (i = 0; i < VHPET_NUM_TIMERS; i++) { + vhpet->timer[i].cap_config = allowed_irqs << 32; + vhpet->timer[i].cap_config |= HPET_TCAP_PER_INT; + vhpet->timer[i].cap_config |= HPET_TCAP_FSB_INT_DEL; + + vhpet->timer[i].compval = 0xffffffff; + callout_init(&vhpet->timer[i].callout, 1); + + arg = &vhpet->timer[i].arg; + arg->vhpet = vhpet; + arg->timer_num = i; + } + + return (vhpet); +} + +void +vhpet_cleanup(struct vhpet *vhpet) +{ + int i; + + for (i = 0; i < VHPET_NUM_TIMERS; i++) + callout_drain(&vhpet->timer[i].callout); + + free(vhpet, M_VHPET); +} + +int +vhpet_getcap(struct vm_hpet_cap *cap) +{ + + cap->capabilities = vhpet_capabilities(); + return (0); +} diff --git a/sys/amd64/vmm/io/vhpet.h b/sys/amd64/vmm/io/vhpet.h new file mode 100644 index 0000000..330e017 --- /dev/null +++ b/sys/amd64/vmm/io/vhpet.h @@ -0,0 +1,44 @@ +/*- + * Copyright (c) 2013 Tycho Nightingale + * Copyright (c) 2013 Neel Natu + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VHPET_H_ +#define _VHPET_H_ + +#define VHPET_BASE 0xfed00000 +#define VHPET_SIZE 1024 + +struct vhpet *vhpet_init(struct vm *vm); +void vhpet_cleanup(struct vhpet *vhpet); +int vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, + int size, void *arg); +int vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *val, + int size, void *arg); +int vhpet_getcap(struct vm_hpet_cap *cap); + +#endif /* _VHPET_H_ */ diff --git a/sys/amd64/vmm/io/vioapic.c b/sys/amd64/vmm/io/vioapic.c new file mode 100644 index 0000000..167e8ab --- /dev/null +++ b/sys/amd64/vmm/io/vioapic.c @@ -0,0 +1,475 @@ +/*- + * Copyright (c) 2013 Tycho Nightingale + * Copyright (c) 2013 Neel Natu + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "vmm_ktr.h" +#include "vmm_lapic.h" +#include "vioapic.h" + +#define IOREGSEL 0x00 +#define IOWIN 0x10 + +#define REDIR_ENTRIES 24 +#define RTBL_RO_BITS ((uint64_t)(IOART_REM_IRR | IOART_DELIVS)) + +struct vioapic { + struct vm *vm; + struct mtx mtx; + uint32_t id; + uint32_t ioregsel; + struct { + uint64_t reg; + int acnt; /* sum of pin asserts (+1) and deasserts (-1) */ + } rtbl[REDIR_ENTRIES]; +}; + +#define VIOAPIC_LOCK(vioapic) mtx_lock(&((vioapic)->mtx)) +#define VIOAPIC_UNLOCK(vioapic) mtx_unlock(&((vioapic)->mtx)) +#define VIOAPIC_LOCKED(vioapic) mtx_owned(&((vioapic)->mtx)) + +static MALLOC_DEFINE(M_VIOAPIC, "vioapic", "bhyve virtual ioapic"); + +#define VIOAPIC_CTR1(vioapic, fmt, a1) \ + VM_CTR1((vioapic)->vm, fmt, a1) + +#define VIOAPIC_CTR2(vioapic, fmt, a1, a2) \ + VM_CTR2((vioapic)->vm, fmt, a1, a2) + +#define VIOAPIC_CTR3(vioapic, fmt, a1, a2, a3) \ + VM_CTR3((vioapic)->vm, fmt, a1, a2, a3) + +#define VIOAPIC_CTR4(vioapic, fmt, a1, a2, a3, a4) \ + VM_CTR4((vioapic)->vm, fmt, a1, a2, a3, a4) + +#ifdef KTR +static const char * +pinstate_str(bool asserted) +{ + + if (asserted) + return ("asserted"); + else + return ("deasserted"); +} + +static const char * +trigger_str(bool level) +{ + + if (level) + return ("level"); + else + return ("edge"); +} +#endif + +static void +vioapic_send_intr(struct vioapic *vioapic, int pin) +{ + int vector, apicid, vcpuid; + uint32_t low, high; + cpuset_t dmask; + bool level; + + KASSERT(pin >= 0 && pin < REDIR_ENTRIES, + ("vioapic_set_pinstate: invalid pin number %d", pin)); + + KASSERT(VIOAPIC_LOCKED(vioapic), + ("vioapic_set_pinstate: vioapic is not locked")); + + low = vioapic->rtbl[pin].reg; + high = vioapic->rtbl[pin].reg >> 32; + + /* + * XXX We only deal with: + * - physical destination + * - fixed delivery mode + */ + if ((low & IOART_DESTMOD) != IOART_DESTPHY) { + VIOAPIC_CTR2(vioapic, "ioapic pin%d: unsupported dest mode " + "0x%08x", pin, low); + return; + } + + if ((low & IOART_DELMOD) != IOART_DELFIXED) { + VIOAPIC_CTR2(vioapic, "ioapic pin%d: unsupported delivery mode " + "0x%08x", pin, low); + return; + } + + if ((low & IOART_INTMASK) == IOART_INTMSET) { + VIOAPIC_CTR1(vioapic, "ioapic pin%d: masked", pin); + return; + } + + level = low & IOART_TRGRLVL ? true : false; + if (level) + vioapic->rtbl[pin].reg |= IOART_REM_IRR; + + vector = low & IOART_INTVEC; + apicid = high >> APIC_ID_SHIFT; + if (apicid != 0xff) { + /* unicast */ + vcpuid = vm_apicid2vcpuid(vioapic->vm, apicid); + VIOAPIC_CTR4(vioapic, "ioapic pin%d: %s triggered intr " + "vector %d on vcpuid %d", pin, trigger_str(level), + vector, vcpuid); + lapic_set_intr(vioapic->vm, vcpuid, vector, level); + } else { + /* broadcast */ + VIOAPIC_CTR3(vioapic, "ioapic pin%d: %s triggered intr " + "vector %d on all vcpus", pin, trigger_str(level), vector); + dmask = vm_active_cpus(vioapic->vm); + while ((vcpuid = CPU_FFS(&dmask)) != 0) { + vcpuid--; + CPU_CLR(vcpuid, &dmask); + lapic_set_intr(vioapic->vm, vcpuid, vector, level); + } + } +} + +static void +vioapic_set_pinstate(struct vioapic *vioapic, int pin, bool newstate) +{ + int oldcnt, newcnt; + bool needintr; + + KASSERT(pin >= 0 && pin < REDIR_ENTRIES, + ("vioapic_set_pinstate: invalid pin number %d", pin)); + + KASSERT(VIOAPIC_LOCKED(vioapic), + ("vioapic_set_pinstate: vioapic is not locked")); + + oldcnt = vioapic->rtbl[pin].acnt; + if (newstate) + vioapic->rtbl[pin].acnt++; + else + vioapic->rtbl[pin].acnt--; + newcnt = vioapic->rtbl[pin].acnt; + + if (newcnt < 0) { + VIOAPIC_CTR2(vioapic, "ioapic pin%d: bad acnt %d", + pin, newcnt); + } + + needintr = false; + if (oldcnt == 0 && newcnt == 1) { + needintr = true; + VIOAPIC_CTR1(vioapic, "ioapic pin%d: asserted", pin); + } else if (oldcnt == 1 && newcnt == 0) { + VIOAPIC_CTR1(vioapic, "ioapic pin%d: deasserted", pin); + } else { + VIOAPIC_CTR3(vioapic, "ioapic pin%d: %s, ignored, acnt %d", + pin, pinstate_str(newstate), newcnt); + } + + if (needintr) + vioapic_send_intr(vioapic, pin); +} + +enum irqstate { + IRQSTATE_ASSERT, + IRQSTATE_DEASSERT, + IRQSTATE_PULSE +}; + +static int +vioapic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate) +{ + struct vioapic *vioapic; + + if (irq < 0 || irq >= REDIR_ENTRIES) + return (EINVAL); + + vioapic = vm_ioapic(vm); + + VIOAPIC_LOCK(vioapic); + switch (irqstate) { + case IRQSTATE_ASSERT: + vioapic_set_pinstate(vioapic, irq, true); + break; + case IRQSTATE_DEASSERT: + vioapic_set_pinstate(vioapic, irq, false); + break; + case IRQSTATE_PULSE: + vioapic_set_pinstate(vioapic, irq, true); + vioapic_set_pinstate(vioapic, irq, false); + break; + default: + panic("vioapic_set_irqstate: invalid irqstate %d", irqstate); + } + VIOAPIC_UNLOCK(vioapic); + + return (0); +} + +int +vioapic_assert_irq(struct vm *vm, int irq) +{ + + return (vioapic_set_irqstate(vm, irq, IRQSTATE_ASSERT)); +} + +int +vioapic_deassert_irq(struct vm *vm, int irq) +{ + + return (vioapic_set_irqstate(vm, irq, IRQSTATE_DEASSERT)); +} + +int +vioapic_pulse_irq(struct vm *vm, int irq) +{ + + return (vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE)); +} + +static uint32_t +vioapic_read(struct vioapic *vioapic, uint32_t addr) +{ + int regnum, pin, rshift; + + regnum = addr & 0xff; + switch (regnum) { + case IOAPIC_ID: + return (vioapic->id); + break; + case IOAPIC_VER: + return (((REDIR_ENTRIES - 1) << MAXREDIRSHIFT) | 0x11); + break; + case IOAPIC_ARB: + return (vioapic->id); + break; + default: + break; + } + + /* redirection table entries */ + if (regnum >= IOAPIC_REDTBL && + regnum < IOAPIC_REDTBL + REDIR_ENTRIES * 2) { + pin = (regnum - IOAPIC_REDTBL) / 2; + if ((regnum - IOAPIC_REDTBL) % 2) + rshift = 32; + else + rshift = 0; + + return (vioapic->rtbl[pin].reg >> rshift); + } + + return (0); +} + +static void +vioapic_write(struct vioapic *vioapic, uint32_t addr, uint32_t data) +{ + uint64_t data64, mask64; + int regnum, pin, lshift; + + regnum = addr & 0xff; + switch (regnum) { + case IOAPIC_ID: + vioapic->id = data & APIC_ID_MASK; + break; + case IOAPIC_VER: + case IOAPIC_ARB: + /* readonly */ + break; + default: + break; + } + + /* redirection table entries */ + if (regnum >= IOAPIC_REDTBL && + regnum < IOAPIC_REDTBL + REDIR_ENTRIES * 2) { + pin = (regnum - IOAPIC_REDTBL) / 2; + if ((regnum - IOAPIC_REDTBL) % 2) + lshift = 32; + else + lshift = 0; + + data64 = (uint64_t)data << lshift; + mask64 = (uint64_t)0xffffffff << lshift; + vioapic->rtbl[pin].reg &= ~mask64 | RTBL_RO_BITS; + vioapic->rtbl[pin].reg |= data64 & ~RTBL_RO_BITS; + + VIOAPIC_CTR2(vioapic, "ioapic pin%d: redir table entry %#lx", + pin, vioapic->rtbl[pin].reg); + + /* + * Generate an interrupt if the following conditions are met: + * - pin is not masked + * - previous interrupt has been EOIed + * - pin level is asserted + */ + if ((vioapic->rtbl[pin].reg & IOART_INTMASK) == IOART_INTMCLR && + (vioapic->rtbl[pin].reg & IOART_REM_IRR) == 0 && + (vioapic->rtbl[pin].acnt > 0)) { + VIOAPIC_CTR2(vioapic, "ioapic pin%d: asserted at rtbl " + "write, acnt %d", pin, vioapic->rtbl[pin].acnt); + vioapic_send_intr(vioapic, pin); + } + } +} + +static int +vioapic_mmio_rw(struct vioapic *vioapic, uint64_t gpa, uint64_t *data, + int size, bool doread) +{ + uint64_t offset; + + offset = gpa - VIOAPIC_BASE; + + /* + * The IOAPIC specification allows 32-bit wide accesses to the + * IOREGSEL (offset 0) and IOWIN (offset 16) registers. + */ + if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) { + if (doread) + *data = 0; + return (0); + } + + VIOAPIC_LOCK(vioapic); + if (offset == IOREGSEL) { + if (doread) + *data = vioapic->ioregsel; + else + vioapic->ioregsel = *data; + } else { + if (doread) + *data = vioapic_read(vioapic, vioapic->ioregsel); + else + vioapic_write(vioapic, vioapic->ioregsel, *data); + } + VIOAPIC_UNLOCK(vioapic); + + return (0); +} + +int +vioapic_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, + int size, void *arg) +{ + int error; + struct vioapic *vioapic; + + vioapic = vm_ioapic(vm); + error = vioapic_mmio_rw(vioapic, gpa, rval, size, true); + return (error); +} + +int +vioapic_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t wval, + int size, void *arg) +{ + int error; + struct vioapic *vioapic; + + vioapic = vm_ioapic(vm); + error = vioapic_mmio_rw(vioapic, gpa, &wval, size, false); + return (error); +} + +void +vioapic_process_eoi(struct vm *vm, int vcpuid, int vector) +{ + struct vioapic *vioapic; + int pin; + + KASSERT(vector >= 0 && vector < 256, + ("vioapic_process_eoi: invalid vector %d", vector)); + + vioapic = vm_ioapic(vm); + VIOAPIC_CTR1(vioapic, "ioapic processing eoi for vector %d", vector); + + /* + * XXX keep track of the pins associated with this vector instead + * of iterating on every single pin each time. + */ + VIOAPIC_LOCK(vioapic); + for (pin = 0; pin < REDIR_ENTRIES; pin++) { + if ((vioapic->rtbl[pin].reg & IOART_REM_IRR) == 0) + continue; + if ((vioapic->rtbl[pin].reg & IOART_INTVEC) != vector) + continue; + vioapic->rtbl[pin].reg &= ~IOART_REM_IRR; + if (vioapic->rtbl[pin].acnt > 0) { + VIOAPIC_CTR2(vioapic, "ioapic pin%d: asserted at eoi, " + "acnt %d", pin, vioapic->rtbl[pin].acnt); + vioapic_send_intr(vioapic, pin); + } + } + VIOAPIC_UNLOCK(vioapic); +} + +struct vioapic * +vioapic_init(struct vm *vm) +{ + int i; + struct vioapic *vioapic; + + vioapic = malloc(sizeof(struct vioapic), M_VIOAPIC, M_WAITOK | M_ZERO); + + vioapic->vm = vm; + mtx_init(&vioapic->mtx, "vioapic lock", NULL, MTX_DEF); + + /* Initialize all redirection entries to mask all interrupts */ + for (i = 0; i < REDIR_ENTRIES; i++) + vioapic->rtbl[i].reg = 0x0001000000010000UL; + + return (vioapic); +} + +void +vioapic_cleanup(struct vioapic *vioapic) +{ + + free(vioapic, M_VIOAPIC); +} + +int +vioapic_pincount(struct vm *vm) +{ + + return (REDIR_ENTRIES); +} diff --git a/sys/amd64/vmm/io/vioapic.h b/sys/amd64/vmm/io/vioapic.h new file mode 100644 index 0000000..65176b3 --- /dev/null +++ b/sys/amd64/vmm/io/vioapic.h @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2013 Tycho Nightingale + * Copyright (c) 2013 Neel Natu + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VIOAPIC_H_ +#define _VIOAPIC_H_ + +#define VIOAPIC_BASE 0xFEC00000 +#define VIOAPIC_SIZE 4096 + +struct vioapic *vioapic_init(struct vm *vm); +void vioapic_cleanup(struct vioapic *vioapic); + +int vioapic_assert_irq(struct vm *vm, int irq); +int vioapic_deassert_irq(struct vm *vm, int irq); +int vioapic_pulse_irq(struct vm *vm, int irq); + +int vioapic_mmio_write(void *vm, int vcpuid, uint64_t gpa, + uint64_t wval, int size, void *arg); +int vioapic_mmio_read(void *vm, int vcpuid, uint64_t gpa, + uint64_t *rval, int size, void *arg); + +int vioapic_pincount(struct vm *vm); +void vioapic_process_eoi(struct vm *vm, int vcpuid, int vector); +#endif diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index 6bfd034..6e5b5ea 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -44,14 +44,14 @@ __FBSDID("$FreeBSD$"); #include "vmm_stat.h" #include "vmm_lapic.h" #include "vmm_ktr.h" -#include "vdev.h" #include "vlapic.h" +#include "vioapic.h" #define VLAPIC_CTR0(vlapic, format) \ - VMM_CTR0((vlapic)->vm, (vlapic)->vcpuid, format) + VCPU_CTR0((vlapic)->vm, (vlapic)->vcpuid, format) #define VLAPIC_CTR1(vlapic, format, p1) \ - VMM_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1) + VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1) #define VLAPIC_CTR_IRR(vlapic, msg) \ do { \ @@ -100,8 +100,6 @@ struct vlapic { struct vm *vm; int vcpuid; - struct io_region *mmio; - struct vdev_ops *ops; struct LAPIC apic; int esr_update; @@ -195,9 +193,8 @@ vlapic_init_ipi(struct vlapic *vlapic) } static int -vlapic_op_reset(void* dev) +vlapic_reset(struct vlapic *vlapic) { - struct vlapic *vlapic = (struct vlapic*)dev; struct LAPIC *lapic = &vlapic->apic; memset(lapic, 0, sizeof(*lapic)); @@ -214,36 +211,33 @@ vlapic_op_reset(void* dev) } -static int -vlapic_op_init(void* dev) -{ - struct vlapic *vlapic = (struct vlapic*)dev; - vdev_register_region(vlapic->ops, vlapic, vlapic->mmio); - return vlapic_op_reset(dev); -} - -static int -vlapic_op_halt(void* dev) -{ - struct vlapic *vlapic = (struct vlapic*)dev; - vdev_unregister_region(vlapic, vlapic->mmio); - return 0; - -} - void -vlapic_set_intr_ready(struct vlapic *vlapic, int vector) +vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) { struct LAPIC *lapic = &vlapic->apic; - uint32_t *irrptr; + uint32_t *irrptr, *tmrptr, mask; int idx; if (vector < 0 || vector >= 256) panic("vlapic_set_intr_ready: invalid vector %d\n", vector); idx = (vector / 32) * 4; + mask = 1 << (vector % 32); + irrptr = &lapic->irr0; - atomic_set_int(&irrptr[idx], 1 << (vector % 32)); + atomic_set_int(&irrptr[idx], mask); + + /* + * Upon acceptance of an interrupt into the IRR the corresponding + * TMR bit is cleared for edge-triggered interrupts and set for + * level-triggered interrupts. + */ + tmrptr = &lapic->tmr0; + if (level) + atomic_set_int(&tmrptr[idx], mask); + else + atomic_clear_int(&tmrptr[idx], mask); + VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); } @@ -371,10 +365,11 @@ static void vlapic_process_eoi(struct vlapic *vlapic) { struct LAPIC *lapic = &vlapic->apic; - uint32_t *isrptr; - int i, idx, bitpos; + uint32_t *isrptr, *tmrptr; + int i, idx, bitpos, vector; isrptr = &lapic->isr0; + tmrptr = &lapic->tmr0; /* * The x86 architecture reserves the the first 32 vectors for use @@ -383,15 +378,20 @@ vlapic_process_eoi(struct vlapic *vlapic) for (i = 7; i > 0; i--) { idx = i * 4; bitpos = fls(isrptr[idx]); - if (bitpos != 0) { + if (bitpos-- != 0) { if (vlapic->isrvec_stk_top <= 0) { panic("invalid vlapic isrvec_stk_top %d", vlapic->isrvec_stk_top); } - isrptr[idx] &= ~(1 << (bitpos - 1)); + isrptr[idx] &= ~(1 << bitpos); VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); vlapic->isrvec_stk_top--; vlapic_update_ppr(vlapic); + if ((tmrptr[idx] & (1 << bitpos)) != 0) { + vector = i * 32 + bitpos; + vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, + vector); + } return; } } @@ -426,7 +426,7 @@ vlapic_fire_timer(struct vlapic *vlapic) if (!vlapic_get_lvt_field(lvt, APIC_LVTT_M)) { vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); vector = vlapic_get_lvt_field(lvt,APIC_LVTT_VECTOR); - vlapic_set_intr_ready(vlapic, vector); + vlapic_set_intr_ready(vlapic, vector, false); } } @@ -472,7 +472,7 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval) i--; CPU_CLR(i, &dmask); if (mode == APIC_DELMODE_FIXED) { - lapic_set_intr(vlapic->vm, i, vec); + lapic_intr_edge(vlapic->vm, i, vec); vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, i, 1); } else @@ -594,11 +594,9 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector) } int -vlapic_op_mem_read(void* dev, uint64_t gpa, opsize_t size, uint64_t *data) +vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data) { - struct vlapic *vlapic = (struct vlapic*)dev; struct LAPIC *lapic = &vlapic->apic; - uint64_t offset = gpa & ~(PAGE_SIZE); uint32_t *reg; int i; @@ -686,11 +684,9 @@ vlapic_op_mem_read(void* dev, uint64_t gpa, opsize_t size, uint64_t *data) } int -vlapic_op_mem_write(void* dev, uint64_t gpa, opsize_t size, uint64_t data) +vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data) { - struct vlapic *vlapic = (struct vlapic*)dev; struct LAPIC *lapic = &vlapic->apic; - uint64_t offset = gpa & ~(PAGE_SIZE); uint32_t *reg; int retval; @@ -832,16 +828,6 @@ restart: return (0); } -struct vdev_ops vlapic_dev_ops = { - .name = "vlapic", - .init = vlapic_op_init, - .reset = vlapic_op_reset, - .halt = vlapic_op_halt, - .memread = vlapic_op_mem_read, - .memwrite = vlapic_op_mem_write, -}; -static struct io_region vlapic_mmio[VM_MAXCPU]; - struct vlapic * vlapic_init(struct vm *vm, int vcpuid) { @@ -856,17 +842,7 @@ vlapic_init(struct vm *vm, int vcpuid) if (vcpuid == 0) vlapic->msr_apicbase |= APICBASE_BSP; - vlapic->ops = &vlapic_dev_ops; - - vlapic->mmio = vlapic_mmio + vcpuid; - vlapic->mmio->base = DEFAULT_APIC_BASE; - vlapic->mmio->len = PAGE_SIZE; - vlapic->mmio->attr = MMIO_READ|MMIO_WRITE; - vlapic->mmio->vcpu = vcpuid; - - vdev_register(&vlapic_dev_ops, vlapic); - - vlapic_op_init(vlapic); + vlapic_reset(vlapic); return (vlapic); } @@ -874,8 +850,7 @@ vlapic_init(struct vm *vm, int vcpuid) void vlapic_cleanup(struct vlapic *vlapic) { - vlapic_op_halt(vlapic); - vdev_unregister(vlapic); + free(vlapic, M_VLAPIC); } diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h index 00de019..8ea65ee 100644 --- a/sys/amd64/vmm/io/vlapic.h +++ b/sys/amd64/vmm/io/vlapic.h @@ -29,10 +29,8 @@ #ifndef _VLAPIC_H_ #define _VLAPIC_H_ -#include "vdev.h" - struct vm; - + /* * Map of APIC Registers: Offset Description Access */ @@ -92,16 +90,11 @@ enum x2apic_state; struct vlapic *vlapic_init(struct vm *vm, int vcpuid); void vlapic_cleanup(struct vlapic *vlapic); - -int vlapic_op_mem_write(void* dev, uint64_t gpa, - opsize_t size, uint64_t data); - -int vlapic_op_mem_read(void* dev, uint64_t gpa, - opsize_t size, uint64_t *data); - +int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data); +int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data); int vlapic_pending_intr(struct vlapic *vlapic); void vlapic_intr_accepted(struct vlapic *vlapic, int vector); -void vlapic_set_intr_ready(struct vlapic *vlapic, int vector); +void vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level); int vlapic_timer_tick(struct vlapic *vlapic); uint64_t vlapic_get_apicbase(struct vlapic *vlapic); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 5c2f202..8da66b2 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -60,11 +60,14 @@ __FBSDID("$FreeBSD$"); #include #include +#include + #include "vmm_ktr.h" #include "vmm_host.h" #include "vmm_mem.h" #include "vmm_util.h" -#include +#include "vhpet.h" +#include "vioapic.h" #include "vlapic.h" #include "vmm_msr.h" #include "vmm_ipi.h" @@ -107,6 +110,8 @@ struct mem_seg { struct vm { void *cookie; /* processor-specific data */ void *iommu; /* iommu-specific data */ + struct vhpet *vhpet; /* virtual HPET */ + struct vioapic *vioapic; /* virtual ioapic */ struct vmspace *vmspace; /* guest's address space */ struct vcpu vcpu[VM_MAXCPU]; int num_mem_segs; @@ -301,6 +306,8 @@ vm_create(const char *name, struct vm **retvm) vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); strcpy(vm->name, name); vm->cookie = VMINIT(vm, vmspace_pmap(vmspace)); + vm->vioapic = vioapic_init(vm); + vm->vhpet = vhpet_init(vm); for (i = 0; i < VM_MAXCPU; i++) { vcpu_init(vm, i); @@ -334,6 +341,9 @@ vm_destroy(struct vm *vm) if (vm->iommu != NULL) iommu_destroy_domain(vm->iommu); + vhpet_cleanup(vm->vhpet); + vioapic_cleanup(vm->vioapic); + for (i = 0; i < vm->num_mem_segs; i++) vm_free_mem_seg(vm, &vm->mem_segs[i]); @@ -919,8 +929,8 @@ vm_handle_paging(struct vm *vm, int vcpuid, boolean_t *retu) map = &vm->vmspace->vm_map; rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); - VMM_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, ftype = %d", - rv, vme->u.paging.gpa, ftype); + VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " + "ftype = %d", rv, vme->u.paging.gpa, ftype); if (rv != KERN_SUCCESS) return (EFAULT); @@ -939,6 +949,8 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu) struct vm_exit *vme; int error, inst_length; uint64_t rip, gla, gpa, cr3; + mem_region_read_t mread; + mem_region_write_t mwrite; vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; @@ -960,14 +972,22 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu) if (vmm_decode_instruction(vm, vcpuid, gla, vie) != 0) return (EFAULT); - /* return to userland unless this is a local apic access */ - if (gpa < DEFAULT_APIC_BASE || gpa >= DEFAULT_APIC_BASE + PAGE_SIZE) { + /* return to userland unless this is an in-kernel emulated device */ + if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { + mread = lapic_mmio_read; + mwrite = lapic_mmio_write; + } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { + mread = vioapic_mmio_read; + mwrite = vioapic_mmio_write; + } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { + mread = vhpet_mmio_read; + mwrite = vhpet_mmio_write; + } else { *retu = TRUE; return (0); } - error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, - lapic_mmio_read, lapic_mmio_write, 0); + error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite, 0); /* return to userland to spin up the AP */ if (error == 0 && vme->exitcode == VM_EXITCODE_SPINUP_AP) @@ -1150,6 +1170,20 @@ vm_lapic(struct vm *vm, int cpu) return (vm->vcpu[cpu].vlapic); } +struct vioapic * +vm_ioapic(struct vm *vm) +{ + + return (vm->vioapic); +} + +struct vhpet * +vm_hpet(struct vm *vm) +{ + + return (vm->vhpet); +} + boolean_t vmm_is_pptdev(int bus, int slot, int func) { @@ -1314,3 +1348,12 @@ vm_get_vmspace(struct vm *vm) return (vm->vmspace); } + +int +vm_apicid2vcpuid(struct vm *vm, int apicid) +{ + /* + * XXX apic id is assumed to be numerically identical to vcpu id + */ + return (apicid); +} diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 7ea45c9..a173de2 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -48,13 +48,15 @@ __FBSDID("$FreeBSD$"); #include #include - #include +#include + #include "vmm_lapic.h" #include "vmm_stat.h" #include "vmm_mem.h" #include "io/ppt.h" -#include +#include "io/vioapic.h" +#include "io/vhpet.h" struct vmmdev_softc { struct vm *vm; /* vm instance cookie */ @@ -147,10 +149,11 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vmmdev_softc *sc; struct vm_memory_segment *seg; struct vm_register *vmreg; - struct vm_seg_desc* vmsegdesc; + struct vm_seg_desc *vmsegdesc; struct vm_run *vmrun; struct vm_event *vmevent; struct vm_lapic_irq *vmirq; + struct vm_ioapic_irq *ioapic_irq; struct vm_capability *vmcap; struct vm_pptdev *pptdev; struct vm_pptdev_mmio *pptmmio; @@ -292,7 +295,19 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, break; case VM_LAPIC_IRQ: vmirq = (struct vm_lapic_irq *)data; - error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector); + error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector); + break; + case VM_IOAPIC_ASSERT_IRQ: + ioapic_irq = (struct vm_ioapic_irq *)data; + error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); + break; + case VM_IOAPIC_DEASSERT_IRQ: + ioapic_irq = (struct vm_ioapic_irq *)data; + error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); + break; + case VM_IOAPIC_PULSE_IRQ: + ioapic_irq = (struct vm_ioapic_irq *)data; + error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); break; case VM_MAP_MEMORY: seg = (struct vm_memory_segment *)data; @@ -354,6 +369,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, gpapte->gpa, gpapte->pte, &gpapte->ptenum); error = 0; break; + case VM_GET_HPET_CAPABILITIES: + error = vhpet_getcap((struct vm_hpet_cap *)data); + break; default: error = ENOTTY; break; diff --git a/sys/amd64/vmm/vmm_ktr.h b/sys/amd64/vmm/vmm_ktr.h index e691c61..9fb46d8 100644 --- a/sys/amd64/vmm/vmm_ktr.h +++ b/sys/amd64/vmm/vmm_ktr.h @@ -32,20 +32,34 @@ #include #include +#ifndef KTR_VMM #define KTR_VMM KTR_GEN +#endif + +#define VCPU_CTR0(vm, vcpuid, format) \ +CTR2(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid)) + +#define VCPU_CTR1(vm, vcpuid, format, p1) \ +CTR3(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1)) + +#define VCPU_CTR2(vm, vcpuid, format, p1, p2) \ +CTR4(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2)) + +#define VCPU_CTR3(vm, vcpuid, format, p1, p2, p3) \ +CTR5(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2), (p3)) + +#define VM_CTR0(vm, format) \ +CTR1(KTR_VMM, "vm %s: " format, vm_name((vm))) -#define VMM_CTR0(vm, vcpuid, format) \ -CTR3(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu) +#define VM_CTR1(vm, format, p1) \ +CTR2(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1)) -#define VMM_CTR1(vm, vcpuid, format, p1) \ -CTR4(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \ - (p1)) +#define VM_CTR2(vm, format, p1, p2) \ +CTR3(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2)) -#define VMM_CTR2(vm, vcpuid, format, p1, p2) \ -CTR5(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \ - (p1), (p2)) +#define VM_CTR3(vm, format, p1, p2, p3) \ +CTR4(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3)) -#define VMM_CTR3(vm, vcpuid, format, p1, p2, p3) \ -CTR6(KTR_VMM, "vm %s-%d(%d): " format, vm_name((vm)), (vcpuid), curcpu, \ - (p1), (p2), (p3)) +#define VM_CTR4(vm, format, p1, p2, p3, p4) \ +CTR5(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3), (p4)) #endif diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c index d024b71..465ce6c 100644 --- a/sys/amd64/vmm/vmm_lapic.c +++ b/sys/amd64/vmm/vmm_lapic.c @@ -62,7 +62,7 @@ lapic_intr_accepted(struct vm *vm, int cpu, int vector) } int -lapic_set_intr(struct vm *vm, int cpu, int vector) +lapic_set_intr(struct vm *vm, int cpu, int vector, bool level) { struct vlapic *vlapic; @@ -73,7 +73,7 @@ lapic_set_intr(struct vm *vm, int cpu, int vector) return (EINVAL); vlapic = vm_lapic(vm, cpu); - vlapic_set_intr_ready(vlapic, vector); + vlapic_set_intr_ready(vlapic, vector, level); vm_interrupt_hostcpu(vm, cpu); @@ -130,7 +130,7 @@ lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval) error = 0; } else { offset = x2apic_msr_to_regoff(msr); - error = vlapic_op_mem_read(vlapic, offset, DWORD, rval); + error = vlapic_read(vlapic, offset, rval); } return (error); @@ -150,7 +150,7 @@ lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val) error = 0; } else { offset = x2apic_msr_to_regoff(msr); - error = vlapic_op_mem_write(vlapic, offset, DWORD, val); + error = vlapic_write(vlapic, offset, val); } return (error); @@ -174,7 +174,7 @@ lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size, return (EINVAL); vlapic = vm_lapic(vm, cpu); - error = vlapic_op_mem_write(vlapic, off, DWORD, wval); + error = vlapic_write(vlapic, off, wval); return (error); } @@ -196,6 +196,6 @@ lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size, return (EINVAL); vlapic = vm_lapic(vm, cpu); - error = vlapic_op_mem_read(vlapic, off, DWORD, rval); + error = vlapic_read(vlapic, off, rval); return (error); } diff --git a/sys/amd64/vmm/vmm_lapic.h b/sys/amd64/vmm/vmm_lapic.h index a79912e..1461185 100644 --- a/sys/amd64/vmm/vmm_lapic.h +++ b/sys/amd64/vmm/vmm_lapic.h @@ -66,6 +66,22 @@ void lapic_intr_accepted(struct vm *vm, int cpu, int vector); * Signals to the LAPIC that an interrupt at 'vector' needs to be generated * to the 'cpu', the state is recorded in IRR. */ -int lapic_set_intr(struct vm *vm, int cpu, int vector); +int lapic_set_intr(struct vm *vm, int cpu, int vector, bool trig); + +#define LAPIC_TRIG_LEVEL true +#define LAPIC_TRIG_EDGE false +static __inline int +lapic_intr_level(struct vm *vm, int cpu, int vector) +{ + + return (lapic_set_intr(vm, cpu, vector, LAPIC_TRIG_LEVEL)); +} + +static __inline int +lapic_intr_edge(struct vm *vm, int cpu, int vector) +{ + + return (lapic_set_intr(vm, cpu, vector, LAPIC_TRIG_EDGE)); +} #endif diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile index 959f128..ea367a4 100644 --- a/sys/modules/vmm/Makefile +++ b/sys/modules/vmm/Makefile @@ -27,7 +27,8 @@ SRCS+= vmm.c \ .PATH: ${.CURDIR}/../../amd64/vmm/io SRCS+= iommu.c \ ppt.c \ - vdev.c \ + vhpet.c \ + vioapic.c \ vlapic.c # intel-specific files -- cgit v1.1