diff options
-rw-r--r-- | sys/amd64/include/vmm.h | 1 | ||||
-rw-r--r-- | sys/amd64/vmm/intel/vmx_msr.c | 26 | ||||
-rw-r--r-- | sys/amd64/vmm/io/vatpic.c | 95 | ||||
-rw-r--r-- | sys/amd64/vmm/io/vatpic.h | 10 | ||||
-rw-r--r-- | sys/amd64/vmm/io/vatpit.c | 4 | ||||
-rw-r--r-- | sys/amd64/vmm/io/vatpit.h | 6 | ||||
-rw-r--r-- | sys/amd64/vmm/io/vpmtmr.c | 104 | ||||
-rw-r--r-- | sys/amd64/vmm/io/vpmtmr.h | 42 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm.c | 13 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_ioport.c | 2 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_ioport.h | 2 | ||||
-rw-r--r-- | sys/modules/linux/Makefile | 1 | ||||
-rw-r--r-- | sys/modules/vmm/Makefile | 6 | ||||
-rw-r--r-- | usr.sbin/bhyve/Makefile | 1 | ||||
-rw-r--r-- | usr.sbin/bhyve/block_if.c | 174 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_ahci.c | 8 | ||||
-rw-r--r-- | usr.sbin/bhyve/pmtmr.c | 173 | ||||
-rw-r--r-- | usr.sbin/bhyve/rtc.c | 4 |
18 files changed, 429 insertions, 243 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 6f769b9..8a8c3f4 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -285,6 +285,7 @@ int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func); int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func); struct vatpic *vm_atpic(struct vm *vm); struct vatpit *vm_atpit(struct vm *vm); +struct vpmtmr *vm_pmtmr(struct vm *vm); /* * Inject exception 'vme' into the guest vcpu. This function returns 0 on diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c index 746ca73..f6bbf2a 100644 --- a/sys/amd64/vmm/intel/vmx_msr.c +++ b/sys/amd64/vmm/intel/vmx_msr.c @@ -376,9 +376,31 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) int vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) { - int error = 0; - + uint64_t changed; + int error; + + error = 0; switch (num) { + case MSR_IA32_MISC_ENABLE: + changed = val ^ misc_enable; + /* + * If the host has disabled the NX feature then the guest + * also cannot use it. However, a Linux guest will try to + * enable the NX feature by writing to the MISC_ENABLE MSR. + * + * This can be safely ignored because the memory management + * code looks at CPUID.80000001H:EDX.NX to check if the + * functionality is actually enabled. + */ + changed &= ~(1UL << 34); + + /* + * Punt to userspace if any other bits are being modified. + */ + if (changed) + error = EINVAL; + + break; default: error = EINVAL; break; diff --git a/sys/amd64/vmm/io/vatpic.c b/sys/amd64/vmm/io/vatpic.c index b710a84..74a7027 100644 --- a/sys/amd64/vmm/io/vatpic.c +++ b/sys/amd64/vmm/io/vatpic.c @@ -75,7 +75,7 @@ struct atpic { uint8_t mask; /* Interrupt Mask Register (IMR) */ int acnt[8]; /* sum of pin asserts and deasserts */ - int priority; /* current pin priority */ + int lowprio; /* lowest priority irq */ bool intr_raised; }; @@ -102,16 +102,33 @@ struct vatpic { #define VATPIC_CTR4(vatpic, fmt, a1, a2, a3, a4) \ VM_CTR4((vatpic)->vm, fmt, a1, a2, a3, a4) +/* + * Loop over all the pins in priority order from highest to lowest. + */ +#define ATPIC_PIN_FOREACH(pinvar, atpic, tmpvar) \ + for (tmpvar = 0, pinvar = (atpic->lowprio + 1) & 0x7; \ + tmpvar < 8; \ + tmpvar++, pinvar = (pinvar + 1) & 0x7) + static void vatpic_set_pinstate(struct vatpic *vatpic, int pin, bool newstate); +static __inline bool +master_atpic(struct vatpic *vatpic, struct atpic *atpic) +{ + + if (atpic == &vatpic->atpic[0]) + return (true); + else + return (false); +} + static __inline int vatpic_get_highest_isrpin(struct atpic *atpic) { int bit, pin; int i; - for (i = 0; i <= 7; i++) { - pin = ((i + 7 - atpic->priority) & 0x7); + ATPIC_PIN_FOREACH(pin, atpic, i) { bit = (1 << pin); if (atpic->service & bit) @@ -125,8 +142,7 @@ static __inline int vatpic_get_highest_irrpin(struct atpic *atpic) { int serviced; - int bit, pin; - int i, j; + int bit, pin, tmp; /* * In 'Special Fully-Nested Mode' when an interrupt request from @@ -137,17 +153,21 @@ vatpic_get_highest_irrpin(struct atpic *atpic) if (atpic->sfn) serviced &= ~(1 << 2); - for (i = 0; i <= 7; i++) { - pin = ((i + 7 - atpic->priority) & 0x7); - bit = (1 << pin); - if (serviced & bit) + ATPIC_PIN_FOREACH(pin, atpic, tmp) { + bit = 1 << pin; + + /* + * If there is already an interrupt in service at the same + * or higher priority then bail. + */ + if ((serviced & bit) != 0) break; - } - for (j = 0; j < i; j++) { - pin = ((j + 7 - atpic->priority) & 0x7); - bit = (1 << pin); - if (atpic->request & bit && (~atpic->mask & bit)) + /* + * If an interrupt is asserted and not masked then return + * the corresponding 'pin' to the caller. + */ + if ((atpic->request & bit) != 0 && (atpic->mask & bit) == 0) return (pin); } @@ -238,8 +258,9 @@ vatpic_icw1(struct vatpic *vatpic, struct atpic *atpic, uint8_t val) atpic->icw_num = 1; atpic->mask = 0; - atpic->priority = 0; + atpic->lowprio = 7; atpic->rd_cmd_reg = 0; + atpic->poll = 0; if ((val & ICW1_SNGL) != 0) { VATPIC_CTR0(vatpic, "vatpic cascade mode required"); @@ -291,6 +312,15 @@ vatpic_icw4(struct vatpic *vatpic, struct atpic *atpic, uint8_t val) if ((val & ICW4_AEOI) != 0) atpic->aeoi = true; + if ((val & ICW4_SFNM) != 0) { + if (master_atpic(vatpic, atpic)) { + atpic->sfn = true; + } else { + VATPIC_CTR1(vatpic, "Ignoring special fully nested " + "mode on slave atpic: %#x", val); + } + } + atpic->icw_num = 0; atpic->ready = true; @@ -329,11 +359,11 @@ vatpic_ocw2(struct vatpic *vatpic, struct atpic *atpic, uint8_t val) atpic->service &= ~(1 << isr_bit); if (atpic->rotate) - atpic->priority = isr_bit; + atpic->lowprio = isr_bit; } } else if ((val & OCW2_SL) != 0 && atpic->rotate == true) { /* specific priority */ - atpic->priority = val & 0x7; + atpic->lowprio = val & 0x7; } return (0); @@ -344,11 +374,17 @@ vatpic_ocw3(struct vatpic *vatpic, struct atpic *atpic, uint8_t val) { VATPIC_CTR1(vatpic, "atpic ocw3 0x%x", val); - atpic->poll = ((val & OCW3_P) != 0); + if (val & OCW3_ESMM) { + VATPIC_CTR0(vatpic, "atpic special mask mode not implemented"); + return (-1); + } if (val & OCW3_RR) { /* read register command */ atpic->rd_cmd_reg = val & OCW3_RIS; + + /* Polling mode */ + atpic->poll = ((val & OCW3_P) != 0); } return (0); @@ -388,6 +424,8 @@ vatpic_set_pinstate(struct vatpic *vatpic, int pin, bool newstate) } else if (oldcnt == 1 && newcnt == 0) { /* falling edge */ VATPIC_CTR1(vatpic, "atpic pin%d: deasserted", pin); + if (level) + atpic->request &= ~(1 << (pin & 0x7)); } else { VATPIC_CTR3(vatpic, "atpic pin%d: %s, ignored, acnt %d", pin, newstate ? "asserted" : "deasserted", newcnt); @@ -528,7 +566,7 @@ vatpic_pin_accepted(struct atpic *atpic, int pin) if (atpic->aeoi == true) { if (atpic->rotate == true) - atpic->priority = pin; + atpic->lowprio = pin; } else { atpic->service |= (1 << pin); } @@ -566,12 +604,19 @@ static int vatpic_read(struct vatpic *vatpic, struct atpic *atpic, bool in, int port, int bytes, uint32_t *eax) { + int pin; + VATPIC_LOCK(vatpic); if (atpic->poll) { - VATPIC_CTR0(vatpic, "vatpic polled mode not supported"); - VATPIC_UNLOCK(vatpic); - return (-1); + atpic->poll = 0; + pin = vatpic_get_highest_irrpin(atpic); + if (pin >= 0) { + vatpic_pin_accepted(atpic, pin); + *eax = 0x80 | pin; + } else { + *eax = 0; + } } else { if (port & ICU_IMR_OFFSET) { /* read interrrupt mask register */ @@ -641,7 +686,7 @@ vatpic_write(struct vatpic *vatpic, struct atpic *atpic, bool in, int port, } int -vatpic_master_handler(void *vm, int vcpuid, bool in, int port, int bytes, +vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, uint32_t *eax) { struct vatpic *vatpic; @@ -661,7 +706,7 @@ vatpic_master_handler(void *vm, int vcpuid, bool in, int port, int bytes, } int -vatpic_slave_handler(void *vm, int vcpuid, bool in, int port, int bytes, +vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, uint32_t *eax) { struct vatpic *vatpic; @@ -681,7 +726,7 @@ vatpic_slave_handler(void *vm, int vcpuid, bool in, int port, int bytes, } int -vatpic_elc_handler(void *vm, int vcpuid, bool in, int port, int bytes, +vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, uint32_t *eax) { struct vatpic *vatpic; diff --git a/sys/amd64/vmm/io/vatpic.h b/sys/amd64/vmm/io/vatpic.h index 84d5651..d4a1be1 100644 --- a/sys/amd64/vmm/io/vatpic.h +++ b/sys/amd64/vmm/io/vatpic.h @@ -39,11 +39,11 @@ struct vatpic *vatpic_init(struct vm *vm); void vatpic_cleanup(struct vatpic *vatpic); -int vatpic_master_handler(void *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *eax); -int vatpic_slave_handler(void *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *eax); -int vatpic_elc_handler(void *vm, int vcpuid, bool in, int port, int bytes, +int vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, + int bytes, uint32_t *eax); +int vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, + int bytes, uint32_t *eax); +int vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, uint32_t *eax); int vatpic_assert_irq(struct vm *vm, int irq); diff --git a/sys/amd64/vmm/io/vatpit.c b/sys/amd64/vmm/io/vatpit.c index 9132bae..842253d 100644 --- a/sys/amd64/vmm/io/vatpit.c +++ b/sys/amd64/vmm/io/vatpit.c @@ -317,7 +317,7 @@ vatpit_update_mode(struct vatpit *vatpit, uint8_t val) } int -vatpit_handler(void *vm, int vcpuid, bool in, int port, int bytes, +vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, uint32_t *eax) { struct vatpit *vatpit; @@ -400,7 +400,7 @@ vatpit_handler(void *vm, int vcpuid, bool in, int port, int bytes, } int -vatpit_nmisc_handler(void *vm, int vcpuid, bool in, int port, int bytes, +vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, uint32_t *eax) { struct vatpit *vatpit; diff --git a/sys/amd64/vmm/io/vatpit.h b/sys/amd64/vmm/io/vatpit.h index 3350455..5719c9c 100644 --- a/sys/amd64/vmm/io/vatpit.h +++ b/sys/amd64/vmm/io/vatpit.h @@ -37,9 +37,9 @@ struct vatpit *vatpit_init(struct vm *vm); void vatpit_cleanup(struct vatpit *vatpit); -int vatpit_handler(void *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *eax); -int vatpit_nmisc_handler(void *vm, int vcpuid, bool in, int port, int bytes, +int vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, uint32_t *eax); +int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port, + int bytes, uint32_t *eax); #endif /* _VATPIT_H_ */ diff --git a/sys/amd64/vmm/io/vpmtmr.c b/sys/amd64/vmm/io/vpmtmr.c new file mode 100644 index 0000000..09f763f --- /dev/null +++ b/sys/amd64/vmm/io/vpmtmr.c @@ -0,0 +1,104 @@ +/*- + * Copyright (c) 2014, Neel Natu (neel@freebsd.org) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/queue.h> +#include <sys/cpuset.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/systm.h> + +#include <machine/vmm.h> + +#include "vpmtmr.h" + +/* + * The ACPI Power Management timer is a free-running 24- or 32-bit + * timer with a frequency of 3.579545MHz + * + * This implementation will be 32-bits + */ + +#define PMTMR_FREQ 3579545 /* 3.579545MHz */ + +struct vpmtmr { + sbintime_t freq_sbt; + sbintime_t baseuptime; + uint32_t baseval; +}; + +static MALLOC_DEFINE(M_VPMTMR, "vpmtmr", "bhyve virtual acpi timer"); + +struct vpmtmr * +vpmtmr_init(struct vm *vm) +{ + struct vpmtmr *vpmtmr; + struct bintime bt; + + vpmtmr = malloc(sizeof(struct vpmtmr), M_VPMTMR, M_WAITOK | M_ZERO); + vpmtmr->baseuptime = sbinuptime(); + vpmtmr->baseval = 0; + + FREQ2BT(PMTMR_FREQ, &bt); + vpmtmr->freq_sbt = bttosbt(bt); + + return (vpmtmr); +} + +void +vpmtmr_cleanup(struct vpmtmr *vpmtmr) +{ + + free(vpmtmr, M_VPMTMR); +} + +int +vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, + uint32_t *val) +{ + struct vpmtmr *vpmtmr; + sbintime_t now, delta; + + if (!in || bytes != 4) + return (-1); + + vpmtmr = vm_pmtmr(vm); + + /* + * No locking needed because 'baseuptime' and 'baseval' are + * written only during initialization. + */ + now = sbinuptime(); + delta = now - vpmtmr->baseuptime; + KASSERT(delta >= 0, ("vpmtmr_handler: uptime went backwards: " + "%#lx to %#lx", vpmtmr->baseuptime, now)); + *val = vpmtmr->baseval + delta / vpmtmr->freq_sbt; + + return (0); +} diff --git a/sys/amd64/vmm/io/vpmtmr.h b/sys/amd64/vmm/io/vpmtmr.h new file mode 100644 index 0000000..039a281 --- /dev/null +++ b/sys/amd64/vmm/io/vpmtmr.h @@ -0,0 +1,42 @@ +/*- + * Copyright (c) 2014 Neel Natu (neel@freebsd.org) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VPMTMR_H_ +#define _VPMTMR_H_ + +#define IO_PMTMR 0x408 + +struct vpmtmr; + +struct vpmtmr *vpmtmr_init(struct vm *vm); +void vpmtmr_cleanup(struct vpmtmr *pmtmr); + +int vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, + uint32_t *val); + +#endif diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 8c545f0..4739a86 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -74,6 +74,7 @@ __FBSDID("$FreeBSD$"); #include "vhpet.h" #include "vioapic.h" #include "vlapic.h" +#include "vpmtmr.h" #include "vmm_ipi.h" #include "vmm_stat.h" #include "vmm_lapic.h" @@ -134,6 +135,7 @@ struct vm { struct vioapic *vioapic; /* (i) virtual ioapic */ struct vatpic *vatpic; /* (i) virtual atpic */ struct vatpit *vatpit; /* (i) virtual atpit */ + struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */ volatile cpuset_t active_cpus; /* (i) active vcpus */ int suspend; /* (i) stop VM execution */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ @@ -373,6 +375,7 @@ vm_init(struct vm *vm, bool create) vm->vhpet = vhpet_init(vm); vm->vatpic = vatpic_init(vm); vm->vatpit = vatpit_init(vm); + vm->vpmtmr = vpmtmr_init(vm); CPU_ZERO(&vm->active_cpus); @@ -399,7 +402,7 @@ vm_create(const char *name, struct vm **retvm) if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) return (EINVAL); - vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); + vmspace = VMSPACE_ALLOC(0, VM_MAXUSER_ADDRESS); if (vmspace == NULL) return (ENOMEM); @@ -435,6 +438,7 @@ vm_cleanup(struct vm *vm, bool destroy) if (vm->iommu != NULL) iommu_destroy_domain(vm->iommu); + vpmtmr_cleanup(vm->vpmtmr); vatpit_cleanup(vm->vatpit); vhpet_cleanup(vm->vhpet); vatpic_cleanup(vm->vatpic); @@ -2212,6 +2216,13 @@ vm_atpit(struct vm *vm) return (vm->vatpit); } +struct vpmtmr * +vm_pmtmr(struct vm *vm) +{ + + return (vm->vpmtmr); +} + enum vm_reg_name vm_segment_name(int seg) { diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c index 564ca74..e553599 100644 --- a/sys/amd64/vmm/vmm_ioport.c +++ b/sys/amd64/vmm/vmm_ioport.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include "vatpic.h" #include "vatpit.h" +#include "vpmtmr.h" #include "vmm_ioport.h" #include "vmm_ktr.h" @@ -58,6 +59,7 @@ ioport_handler_func_t ioport_handler[MAX_IOPORTS] = { [IO_ICU2 + ICU_IMR_OFFSET] = vatpic_slave_handler, [IO_ELCR1] = vatpic_elc_handler, [IO_ELCR2] = vatpic_elc_handler, + [IO_PMTMR] = vpmtmr_handler, }; #ifdef KTR diff --git a/sys/amd64/vmm/vmm_ioport.h b/sys/amd64/vmm/vmm_ioport.h index 84a4cf1..ba51989 100644 --- a/sys/amd64/vmm/vmm_ioport.h +++ b/sys/amd64/vmm/vmm_ioport.h @@ -29,7 +29,7 @@ #ifndef _VMM_IOPORT_H_ #define _VMM_IOPORT_H_ -typedef int (*ioport_handler_func_t)(void *vm, int vcpuid, +typedef int (*ioport_handler_func_t)(struct vm *vm, int vcpuid, bool in, int port, int bytes, uint32_t *val); int vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu); diff --git a/sys/modules/linux/Makefile b/sys/modules/linux/Makefile index 1c122ee..c10cb10 100644 --- a/sys/modules/linux/Makefile +++ b/sys/modules/linux/Makefile @@ -16,6 +16,7 @@ SRCS= linux_fork.c linux${SFX}_dummy.c linux_emul.c linux_file.c \ linux_timer.c \ opt_inet6.h opt_compat.h opt_kdtrace.h opt_posix.h opt_usb.h \ vnode_if.h device_if.h bus_if.h assym.s +DPSRCS= linux${SFX}_genassym.c # XXX: for assym.s SRCS+= opt_kstack_pages.h opt_nfs.h opt_compat.h opt_hwpmc_hooks.h diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile index 45025a8..6aeaf80 100644 --- a/sys/modules/vmm/Makefile +++ b/sys/modules/vmm/Makefile @@ -4,6 +4,7 @@ KMOD= vmm SRCS= opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h SRCS+= vmx_assym.h svm_assym.h +DPSRCS= vmx_genassym.c svm_genassym.c CFLAGS+= -DVMM_KEEP_STATS -DSMP CFLAGS+= -I${.CURDIR}/../../amd64/vmm @@ -33,7 +34,8 @@ SRCS+= iommu.c \ vatpit.c \ vhpet.c \ vioapic.c \ - vlapic.c + vlapic.c \ + vpmtmr.c # intel-specific files .PATH: ${.CURDIR}/../../amd64/vmm/intel @@ -52,7 +54,7 @@ SRCS+= vmcb.c \ npt.c \ amdv.c \ svm_msr.c - + CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o vmx_assym.h: vmx_genassym.o diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index 1c95f77..377a2e6 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -31,7 +31,6 @@ SRCS= \ pci_virtio_rnd.c \ pci_uart.c \ pm.c \ - pmtmr.c \ post.c \ rtc.c \ smbiostbl.c \ diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c index cbe5ac3..8687e9a 100644 --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -43,14 +43,18 @@ __FBSDID("$FreeBSD$"); #include <string.h> #include <pthread.h> #include <pthread_np.h> +#include <signal.h> #include <unistd.h> +#include <machine/atomic.h> + #include "bhyverun.h" +#include "mevent.h" #include "block_if.h" #define BLOCKIF_SIG 0xb109b109 -#define BLOCKIF_MAXREQ 32 +#define BLOCKIF_MAXREQ 33 enum blockop { BOP_READ, @@ -60,7 +64,9 @@ enum blockop { enum blockstat { BST_FREE, - BST_INUSE + BST_PEND, + BST_BUSY, + BST_DONE }; struct blockif_elem { @@ -68,6 +74,7 @@ struct blockif_elem { struct blockif_req *be_req; enum blockop be_op; enum blockstat be_status; + pthread_t be_tid; }; struct blockif_ctxt { @@ -81,13 +88,25 @@ struct blockif_ctxt { pthread_cond_t bc_cond; int bc_closing; - /* Request elements and free/inuse queues */ + /* Request elements and free/pending/busy queues */ TAILQ_HEAD(, blockif_elem) bc_freeq; - TAILQ_HEAD(, blockif_elem) bc_inuseq; + TAILQ_HEAD(, blockif_elem) bc_pendq; + TAILQ_HEAD(, blockif_elem) bc_busyq; u_int bc_req_count; struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; }; +static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; + +struct blockif_sig_elem { + pthread_mutex_t bse_mtx; + pthread_cond_t bse_cond; + int bse_pending; + struct blockif_sig_elem *bse_next; +}; + +static struct blockif_sig_elem *blockif_bse_head; + static int blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) @@ -101,10 +120,10 @@ blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, assert(be->be_status == BST_FREE); TAILQ_REMOVE(&bc->bc_freeq, be, be_link); - be->be_status = BST_INUSE; + be->be_status = BST_PEND; be->be_req = breq; be->be_op = op; - TAILQ_INSERT_TAIL(&bc->bc_inuseq, be, be_link); + TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); bc->bc_req_count++; @@ -112,26 +131,38 @@ blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, } static int -blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem *el) +blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem **bep) { struct blockif_elem *be; if (bc->bc_req_count == 0) return (ENOENT); - be = TAILQ_FIRST(&bc->bc_inuseq); + be = TAILQ_FIRST(&bc->bc_pendq); assert(be != NULL); - assert(be->be_status == BST_INUSE); - *el = *be; + assert(be->be_status == BST_PEND); + TAILQ_REMOVE(&bc->bc_pendq, be, be_link); + be->be_status = BST_BUSY; + be->be_tid = bc->bc_btid; + TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); + + *bep = be; + + return (0); +} + +static void +blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) +{ + assert(be->be_status == BST_DONE); - TAILQ_REMOVE(&bc->bc_inuseq, be, be_link); + TAILQ_REMOVE(&bc->bc_busyq, be, be_link); + be->be_tid = 0; be->be_status = BST_FREE; be->be_req = NULL; TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); - - bc->bc_req_count--; - return (0); + bc->bc_req_count--; } static void @@ -163,6 +194,8 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) break; } + be->be_status = BST_DONE; + (*br->br_callback)(br, err); } @@ -170,16 +203,17 @@ static void * blockif_thr(void *arg) { struct blockif_ctxt *bc; - struct blockif_elem req; + struct blockif_elem *be; bc = arg; for (;;) { pthread_mutex_lock(&bc->bc_mtx); - while (!blockif_dequeue(bc, &req)) { + while (!blockif_dequeue(bc, &be)) { pthread_mutex_unlock(&bc->bc_mtx); - blockif_proc(bc, &req); + blockif_proc(bc, be); pthread_mutex_lock(&bc->bc_mtx); + blockif_complete(bc, be); } pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); pthread_mutex_unlock(&bc->bc_mtx); @@ -195,6 +229,38 @@ blockif_thr(void *arg) return (NULL); } +static void +blockif_sigcont_handler(int signal, enum ev_type type, void *arg) +{ + struct blockif_sig_elem *bse; + + for (;;) { + /* + * Process the entire list even if not intended for + * this thread. + */ + do { + bse = blockif_bse_head; + if (bse == NULL) + return; + } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, + (uintptr_t)bse, + (uintptr_t)bse->bse_next)); + + pthread_mutex_lock(&bse->bse_mtx); + bse->bse_pending = 0; + pthread_cond_signal(&bse->bse_cond); + pthread_mutex_unlock(&bse->bse_mtx); + } +} + +static void +blockif_init(void) +{ + mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); + (void) signal(SIGCONT, SIG_IGN); +} + struct blockif_ctxt * blockif_open(const char *optstr, const char *ident) { @@ -206,6 +272,8 @@ blockif_open(const char *optstr, const char *ident) int extra, fd, i, sectsz; int nocache, sync, ro; + pthread_once(&blockif_once, blockif_init); + nocache = 0; sync = 0; ro = 0; @@ -280,7 +348,8 @@ blockif_open(const char *optstr, const char *ident) pthread_mutex_init(&bc->bc_mtx, NULL); pthread_cond_init(&bc->bc_cond, NULL); TAILQ_INIT(&bc->bc_freeq); - TAILQ_INIT(&bc->bc_inuseq); + TAILQ_INIT(&bc->bc_pendq); + TAILQ_INIT(&bc->bc_busyq); bc->bc_req_count = 0; for (i = 0; i < BLOCKIF_MAXREQ; i++) { bc->bc_reqs[i].be_status = BST_FREE; @@ -357,23 +426,76 @@ blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) assert(bc->bc_magic == BLOCKIF_SIG); pthread_mutex_lock(&bc->bc_mtx); - TAILQ_FOREACH(be, &bc->bc_inuseq, be_link) { + /* + * Check pending requests. + */ + TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { + if (be->be_req == breq) + break; + } + if (be != NULL) { + /* + * Found it. + */ + TAILQ_REMOVE(&bc->bc_pendq, be, be_link); + be->be_status = BST_FREE; + be->be_req = NULL; + TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); + bc->bc_req_count--; + pthread_mutex_unlock(&bc->bc_mtx); + + return (0); + } + + /* + * Check in-flight requests. + */ + TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { if (be->be_req == breq) break; } if (be == NULL) { + /* + * Didn't find it. + */ pthread_mutex_unlock(&bc->bc_mtx); return (EINVAL); } - TAILQ_REMOVE(&bc->bc_inuseq, be, be_link); - be->be_status = BST_FREE; - be->be_req = NULL; - TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); - bc->bc_req_count--; + /* + * Interrupt the processing thread to force it return + * prematurely via it's normal callback path. + */ + while (be->be_status == BST_BUSY) { + struct blockif_sig_elem bse, *old_head; + + pthread_mutex_init(&bse.bse_mtx, NULL); + pthread_cond_init(&bse.bse_cond, NULL); + + bse.bse_pending = 1; + + do { + old_head = blockif_bse_head; + bse.bse_next = old_head; + } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, + (uintptr_t)old_head, + (uintptr_t)&bse)); + + pthread_kill(be->be_tid, SIGCONT); + + pthread_mutex_lock(&bse.bse_mtx); + while (bse.bse_pending) + pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); + pthread_mutex_unlock(&bse.bse_mtx); + } + pthread_mutex_unlock(&bc->bc_mtx); - return (0); + /* + * The processing thread has been interrupted. Since it's not + * clear if the callback has been invoked yet, return EBUSY. + */ + return (EBUSY); } int @@ -478,7 +600,7 @@ blockif_queuesz(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); - return (BLOCKIF_MAXREQ); + return (BLOCKIF_MAXREQ - 1); } int diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c index 42aa0b3..ab40854 100644 --- a/usr.sbin/bhyve/pci_ahci.c +++ b/usr.sbin/bhyve/pci_ahci.c @@ -367,11 +367,15 @@ ahci_check_stopped(struct ahci_port *p) { /* * If we are no longer processing the command list and nothing - * is in-flight, clear the running bit. + * is in-flight, clear the running bit, the current command + * slot, the command issue and active bits. */ if (!(p->cmd & AHCI_P_CMD_ST)) { - if (p->pending == 0) + if (p->pending == 0) { p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK); + p->ci = 0; + p->sact = 0; + } } } diff --git a/usr.sbin/bhyve/pmtmr.c b/usr.sbin/bhyve/pmtmr.c deleted file mode 100644 index 3a46f9b..0000000 --- a/usr.sbin/bhyve/pmtmr.c +++ /dev/null @@ -1,173 +0,0 @@ -/*- - * Copyright (c) 2012 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> -#include <sys/sysctl.h> -#include <sys/time.h> -#include <machine/cpufunc.h> - -#include <stdio.h> -#include <stdlib.h> -#include <time.h> -#include <assert.h> -#include <pthread.h> - -#include "acpi.h" -#include "inout.h" - -/* - * The ACPI Power Management timer is a free-running 24- or 32-bit - * timer with a frequency of 3.579545MHz - * - * This implementation will be 32-bits - */ - -#define PMTMR_FREQ 3579545 /* 3.579545MHz */ - -static pthread_mutex_t pmtmr_mtx; -static pthread_once_t pmtmr_once = PTHREAD_ONCE_INIT; - -static uint64_t pmtmr_old; - -static uint64_t pmtmr_tscf; -static uint64_t pmtmr_tsc_old; - -static clockid_t clockid = CLOCK_UPTIME_FAST; -static struct timespec pmtmr_uptime_old; - -#define timespecsub(vvp, uvp) \ - do { \ - (vvp)->tv_sec -= (uvp)->tv_sec; \ - (vvp)->tv_nsec -= (uvp)->tv_nsec; \ - if ((vvp)->tv_nsec < 0) { \ - (vvp)->tv_sec--; \ - (vvp)->tv_nsec += 1000000000; \ - } \ - } while (0) - -static uint64_t -timespec_to_pmtmr(const struct timespec *tsnew, const struct timespec *tsold) -{ - struct timespec tsdiff; - int64_t nsecs; - - tsdiff = *tsnew; - timespecsub(&tsdiff, tsold); - nsecs = tsdiff.tv_sec * 1000000000 + tsdiff.tv_nsec; - assert(nsecs >= 0); - - return (nsecs * PMTMR_FREQ / 1000000000 + pmtmr_old); -} - -static uint64_t -tsc_to_pmtmr(uint64_t tsc_new, uint64_t tsc_old) -{ - - return ((tsc_new - tsc_old) * PMTMR_FREQ / pmtmr_tscf + pmtmr_old); -} - -static void -pmtmr_init(void) -{ - size_t len; - int smp_tsc, err; - struct timespec tsnew, tsold = { 0 }; - - len = sizeof(smp_tsc); - err = sysctlbyname("kern.timecounter.smp_tsc", &smp_tsc, &len, NULL, 0); - assert(err == 0); - - if (smp_tsc) { - len = sizeof(pmtmr_tscf); - err = sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len, - NULL, 0); - assert(err == 0); - - pmtmr_tsc_old = rdtsc(); - pmtmr_old = tsc_to_pmtmr(pmtmr_tsc_old, 0); - } else { - if (getenv("BHYVE_PMTMR_PRECISE") != NULL) - clockid = CLOCK_UPTIME; - - err = clock_gettime(clockid, &tsnew); - assert(err == 0); - - pmtmr_uptime_old = tsnew; - pmtmr_old = timespec_to_pmtmr(&tsnew, &tsold); - } - pthread_mutex_init(&pmtmr_mtx, NULL); -} - -static uint32_t -pmtmr_val(void) -{ - struct timespec tsnew; - uint64_t pmtmr_tsc_new; - uint64_t pmtmr_new; - int error; - - pthread_once(&pmtmr_once, pmtmr_init); - - pthread_mutex_lock(&pmtmr_mtx); - - if (pmtmr_tscf) { - pmtmr_tsc_new = rdtsc(); - pmtmr_new = tsc_to_pmtmr(pmtmr_tsc_new, pmtmr_tsc_old); - pmtmr_tsc_old = pmtmr_tsc_new; - } else { - error = clock_gettime(clockid, &tsnew); - assert(error == 0); - - pmtmr_new = timespec_to_pmtmr(&tsnew, &pmtmr_uptime_old); - pmtmr_uptime_old = tsnew; - } - pmtmr_old = pmtmr_new; - - pthread_mutex_unlock(&pmtmr_mtx); - - return (pmtmr_new); -} - -static int -pmtmr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - assert(in == 1); - - if (bytes != 4) - return (-1); - - *eax = pmtmr_val(); - - return (0); -} - -INOUT_PORT(pmtmr, IO_PMTMR, IOPORT_F_IN, pmtmr_handler); diff --git a/usr.sbin/bhyve/rtc.c b/usr.sbin/bhyve/rtc.c index b3631fc..459c900 100644 --- a/usr.sbin/bhyve/rtc.c +++ b/usr.sbin/bhyve/rtc.c @@ -375,4 +375,8 @@ rtc_dsdt(void) } LPC_DSDT(rtc_dsdt); +/* + * Reserve the extended RTC I/O ports although they are not emulated at this + * time. + */ SYSRES_IO(0x72, 6); |