diff options
Diffstat (limited to 'sys/amd64')
27 files changed, 336 insertions, 1398 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index c3aac33..4455cab 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -174,6 +174,22 @@ IDTVEC(xen_intr_upcall) jmp doreti #endif +#ifdef HYPERV +/* + * This is the Hyper-V vmbus channel direct callback interrupt. + * Only used when it is running on Hyper-V. + */ + .text + SUPERALIGN_TEXT +IDTVEC(hv_vmbus_callback) + PUSH_FRAME + FAKE_MCOUNT(TF_RIP(%rsp)) + movq %rsp, %rdi + call hv_vector_handler + MEXITCOUNT + jmp doreti +#endif + #ifdef SMP /* * Global address space TLB shootdown. diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index c81495a..83ca548 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -81,28 +81,11 @@ __FBSDID("$FreeBSD$"); #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) -/* lock region used by kernel profiling */ -int mcount_lock; - -int mp_naps; /* # of Applications processors */ -int boot_cpu_id = -1; /* designated BSP */ - -extern struct pcpu __pcpu[]; - -/* AP uses this during bootstrap. Do not staticize. */ -char *bootSTK; -int bootAP; - -/* Free these after use */ -void *bootstacks[MAXCPU]; +extern struct pcpu __pcpu[]; /* Temporary variables for init_secondary() */ char *doublefault_stack; char *nmi_stack; -void *dpcpu; - -struct pcb stoppcbs[MAXCPU]; -struct susppcb **susppcbs; /* Variables needed for SMP tlb shootdown. */ vm_offset_t smp_tlb_addr2; @@ -112,309 +95,16 @@ uint64_t pcid_cr3; pmap_t smp_tlb_pmap; extern int invpcid_works; -#ifdef COUNT_IPIS -/* Interrupt counts. */ -static u_long *ipi_preempt_counts[MAXCPU]; -static u_long *ipi_ast_counts[MAXCPU]; -u_long *ipi_invltlb_counts[MAXCPU]; -u_long *ipi_invlrng_counts[MAXCPU]; -u_long *ipi_invlpg_counts[MAXCPU]; -u_long *ipi_invlcache_counts[MAXCPU]; -u_long *ipi_rendezvous_counts[MAXCPU]; -static u_long *ipi_hardclock_counts[MAXCPU]; -#endif - -/* Default cpu_ops implementation. */ -struct cpu_ops cpu_ops; - extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); -extern int pmap_pcid_enabled; - /* * Local data and functions. */ -static volatile cpuset_t ipi_nmi_pending; - -/* used to hold the AP's until we are ready to release them */ -struct mtx ap_boot_mtx; - -/* Set to 1 once we're ready to let the APs out of the pen. */ -static volatile int aps_ready = 0; - -/* - * Store data from cpu_add() until later in the boot when we actually setup - * the APs. - */ -struct cpu_info { - int cpu_present:1; - int cpu_bsp:1; - int cpu_disabled:1; - int cpu_hyperthread:1; -} static cpu_info[MAX_APIC_ID + 1]; -int cpu_apic_ids[MAXCPU]; -int apic_cpuids[MAX_APIC_ID + 1]; - -/* Holds pending bitmap based IPIs per CPU */ -volatile u_int cpu_ipi_pending[MAXCPU]; - -static u_int boot_address; -static int cpu_logical; /* logical cpus per core */ -static int cpu_cores; /* cores per package */ - -static void assign_cpu_ids(void); -static void set_interrupt_apic_ids(void); static int start_ap(int apic_id); -static void release_aps(void *dummy); -static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ -static int hyperthreading_allowed = 1; static u_int bootMP_size; - -static void -mem_range_AP_init(void) -{ - if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) - mem_range_softc.mr_op->initAP(&mem_range_softc); -} - -static void -topo_probe_amd(void) -{ - int core_id_bits; - int id; - - /* AMD processors do not support HTT. */ - cpu_logical = 1; - - if ((amd_feature2 & AMDID2_CMP) == 0) { - cpu_cores = 1; - return; - } - - core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >> - AMDID_COREID_SIZE_SHIFT; - if (core_id_bits == 0) { - cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1; - return; - } - - /* Fam 10h and newer should get here. */ - for (id = 0; id <= MAX_APIC_ID; id++) { - /* Check logical CPU availability. */ - if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled) - continue; - /* Check if logical CPU has the same package ID. */ - if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits)) - continue; - cpu_cores++; - } -} - -/* - * Round up to the next power of two, if necessary, and then - * take log2. - * Returns -1 if argument is zero. - */ -static __inline int -mask_width(u_int x) -{ - - return (fls(x << (1 - powerof2(x))) - 1); -} - -static void -topo_probe_0x4(void) -{ - u_int p[4]; - int pkg_id_bits; - int core_id_bits; - int max_cores; - int max_logical; - int id; - - /* Both zero and one here mean one logical processor per package. */ - max_logical = (cpu_feature & CPUID_HTT) != 0 ? - (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1; - if (max_logical <= 1) - return; - - /* - * Because of uniformity assumption we examine only - * those logical processors that belong to the same - * package as BSP. Further, we count number of - * logical processors that belong to the same core - * as BSP thus deducing number of threads per core. - */ - if (cpu_high >= 0x4) { - cpuid_count(0x04, 0, p); - max_cores = ((p[0] >> 26) & 0x3f) + 1; - } else - max_cores = 1; - core_id_bits = mask_width(max_logical/max_cores); - if (core_id_bits < 0) - return; - pkg_id_bits = core_id_bits + mask_width(max_cores); - - for (id = 0; id <= MAX_APIC_ID; id++) { - /* Check logical CPU availability. */ - if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled) - continue; - /* Check if logical CPU has the same package ID. */ - if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits)) - continue; - cpu_cores++; - /* Check if logical CPU has the same package and core IDs. */ - if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits)) - cpu_logical++; - } - - KASSERT(cpu_cores >= 1 && cpu_logical >= 1, - ("topo_probe_0x4 couldn't find BSP")); - - cpu_cores /= cpu_logical; - hyperthreading_cpus = cpu_logical; -} - -static void -topo_probe_0xb(void) -{ - u_int p[4]; - int bits; - int cnt; - int i; - int logical; - int type; - int x; - - /* We only support three levels for now. */ - for (i = 0; i < 3; i++) { - cpuid_count(0x0b, i, p); - - /* Fall back if CPU leaf 11 doesn't really exist. */ - if (i == 0 && p[1] == 0) { - topo_probe_0x4(); - return; - } - - bits = p[0] & 0x1f; - logical = p[1] &= 0xffff; - type = (p[2] >> 8) & 0xff; - if (type == 0 || logical == 0) - break; - /* - * Because of uniformity assumption we examine only - * those logical processors that belong to the same - * package as BSP. - */ - for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) { - if (!cpu_info[x].cpu_present || - cpu_info[x].cpu_disabled) - continue; - if (x >> bits == boot_cpu_id >> bits) - cnt++; - } - if (type == CPUID_TYPE_SMT) - cpu_logical = cnt; - else if (type == CPUID_TYPE_CORE) - cpu_cores = cnt; - } - if (cpu_logical == 0) - cpu_logical = 1; - cpu_cores /= cpu_logical; -} - -/* - * Both topology discovery code and code that consumes topology - * information assume top-down uniformity of the topology. - * That is, all physical packages must be identical and each - * core in a package must have the same number of threads. - * Topology information is queried only on BSP, on which this - * code runs and for which it can query CPUID information. - * Then topology is extrapolated on all packages using the - * uniformity assumption. - */ -static void -topo_probe(void) -{ - static int cpu_topo_probed = 0; - - if (cpu_topo_probed) - return; - - CPU_ZERO(&logical_cpus_mask); - if (mp_ncpus <= 1) - cpu_cores = cpu_logical = 1; - else if (cpu_vendor_id == CPU_VENDOR_AMD) - topo_probe_amd(); - else if (cpu_vendor_id == CPU_VENDOR_INTEL) { - /* - * See Intel(R) 64 Architecture Processor - * Topology Enumeration article for details. - * - * Note that 0x1 <= cpu_high < 4 case should be - * compatible with topo_probe_0x4() logic when - * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1) - * or it should trigger the fallback otherwise. - */ - if (cpu_high >= 0xb) - topo_probe_0xb(); - else if (cpu_high >= 0x1) - topo_probe_0x4(); - } - - /* - * Fallback: assume each logical CPU is in separate - * physical package. That is, no multi-core, no SMT. - */ - if (cpu_cores == 0 || cpu_logical == 0) - cpu_cores = cpu_logical = 1; - cpu_topo_probed = 1; -} - -struct cpu_group * -cpu_topo(void) -{ - int cg_flags; - - /* - * Determine whether any threading flags are - * necessry. - */ - topo_probe(); - if (cpu_logical > 1 && hyperthreading_cpus) - cg_flags = CG_FLAG_HTT; - else if (cpu_logical > 1) - cg_flags = CG_FLAG_SMT; - else - cg_flags = 0; - if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { - printf("WARNING: Non-uniform processors.\n"); - printf("WARNING: Using suboptimal topology.\n"); - return (smp_topo_none()); - } - /* - * No multi-core or hyper-threaded. - */ - if (cpu_logical * cpu_cores == 1) - return (smp_topo_none()); - /* - * Only HTT no multi-core. - */ - if (cpu_logical > 1 && cpu_cores == 1) - return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags)); - /* - * Only multi-core no HTT. - */ - if (cpu_cores > 1 && cpu_logical == 1) - return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags)); - /* - * Both HTT and multi-core. - */ - return (smp_topo_2level(CG_SHARE_L2, cpu_cores, - CG_SHARE_L1, cpu_logical, cg_flags)); -} +static u_int boot_address; /* * Calculate usable address in base memory for AP trampoline code. @@ -433,85 +123,6 @@ mp_bootaddress(u_int basemem) return mptramp_pagetables; } -void -cpu_add(u_int apic_id, char boot_cpu) -{ - - if (apic_id > MAX_APIC_ID) { - panic("SMP: APIC ID %d too high", apic_id); - return; - } - KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", - apic_id)); - cpu_info[apic_id].cpu_present = 1; - if (boot_cpu) { - KASSERT(boot_cpu_id == -1, - ("CPU %d claims to be BSP, but CPU %d already is", apic_id, - boot_cpu_id)); - boot_cpu_id = apic_id; - cpu_info[apic_id].cpu_bsp = 1; - } - if (mp_ncpus < MAXCPU) { - mp_ncpus++; - mp_maxid = mp_ncpus - 1; - } - if (bootverbose) - printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : - "AP"); -} - -void -cpu_mp_setmaxid(void) -{ - - /* - * mp_maxid should be already set by calls to cpu_add(). - * Just sanity check its value here. - */ - if (mp_ncpus == 0) - KASSERT(mp_maxid == 0, - ("%s: mp_ncpus is zero, but mp_maxid is not", __func__)); - else if (mp_ncpus == 1) - mp_maxid = 0; - else - KASSERT(mp_maxid >= mp_ncpus - 1, - ("%s: counters out of sync: max %d, count %d", __func__, - mp_maxid, mp_ncpus)); -} - -int -cpu_mp_probe(void) -{ - - /* - * Always record BSP in CPU map so that the mbuf init code works - * correctly. - */ - CPU_SETOF(0, &all_cpus); - if (mp_ncpus == 0) { - /* - * No CPUs were found, so this must be a UP system. Setup - * the variables to represent a system with a single CPU - * with an id of 0. - */ - mp_ncpus = 1; - return (0); - } - - /* At least one CPU was found. */ - if (mp_ncpus == 1) { - /* - * One CPU was found, so this must be a UP system with - * an I/O APIC. - */ - mp_maxid = 0; - return (0); - } - - /* At least two CPUs were found. */ - return (1); -} - /* * Initialize the IPI handlers and start up the AP's. */ @@ -575,47 +186,6 @@ cpu_mp_start(void) /* - * Print various information about the SMP system hardware and setup. - */ -void -cpu_mp_announce(void) -{ - const char *hyperthread; - int i; - - printf("FreeBSD/SMP: %d package(s) x %d core(s)", - mp_ncpus / (cpu_cores * cpu_logical), cpu_cores); - if (hyperthreading_cpus > 1) - printf(" x %d HTT threads", cpu_logical); - else if (cpu_logical > 1) - printf(" x %d SMT threads", cpu_logical); - printf("\n"); - - /* List active CPUs first. */ - printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); - for (i = 1; i < mp_ncpus; i++) { - if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread) - hyperthread = "/HT"; - else - hyperthread = ""; - printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread, - cpu_apic_ids[i]); - } - - /* List disabled CPUs last. */ - for (i = 0; i <= MAX_APIC_ID; i++) { - if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled) - continue; - if (cpu_info[i].cpu_hyperthread) - hyperthread = "/HT"; - else - hyperthread = ""; - printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread, - i); - } -} - -/* * AP CPU's call this to initialize themselves. */ void @@ -624,7 +194,6 @@ init_secondary(void) struct pcpu *pc; struct nmi_pcpu *np; u_int64_t msr, cr0; - u_int cpuid; int cpu, gsel_tss, x; struct region_descriptor ap_gdt; @@ -712,94 +281,7 @@ init_secondary(void) while (!aps_ready) ia32_pause(); - /* - * On real hardware, switch to x2apic mode if possible. Do it - * after aps_ready was signalled, to avoid manipulating the - * mode while BSP might still want to send some IPI to us - * (second startup IPI is ignored on modern hardware etc). - */ - lapic_xapic_mode(); - - /* Initialize the PAT MSR. */ - pmap_init_pat(); - - /* set up CPU registers and state */ - cpu_setregs(); - - /* set up SSE/NX */ - initializecpu(); - - /* set up FPU state on the AP */ - fpuinit(); - - if (cpu_ops.cpu_init) - cpu_ops.cpu_init(); - - /* A quick check from sanity claus */ - cpuid = PCPU_GET(cpuid); - if (PCPU_GET(apic_id) != lapic_id()) { - printf("SMP: cpuid = %d\n", cpuid); - printf("SMP: actual apic_id = %d\n", lapic_id()); - printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); - panic("cpuid mismatch! boom!!"); - } - - /* Initialize curthread. */ - KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); - PCPU_SET(curthread, PCPU_GET(idlethread)); - - mca_init(); - - mtx_lock_spin(&ap_boot_mtx); - - /* Init local apic for irq's */ - lapic_setup(1); - - /* Set memory range attributes for this CPU to match the BSP */ - mem_range_AP_init(); - - smp_cpus++; - - CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid); - printf("SMP: AP CPU #%d Launched!\n", cpuid); - - /* Determine if we are a logical CPU. */ - /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ - if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) - CPU_SET(cpuid, &logical_cpus_mask); - - if (bootverbose) - lapic_dump("AP"); - - if (smp_cpus == mp_ncpus) { - /* enable IPI's, tlb shootdown, freezes etc */ - atomic_store_rel_int(&smp_started, 1); - } - - /* - * Enable global pages TLB extension - * This also implicitly flushes the TLB - */ - - load_cr4(rcr4() | CR4_PGE); - if (pmap_pcid_enabled) - load_cr4(rcr4() | CR4_PCIDE); - load_ds(_udatasel); - load_es(_udatasel); - load_fs(_ufssel); - mtx_unlock_spin(&ap_boot_mtx); - - /* Wait until all the AP's are up. */ - while (smp_started == 0) - ia32_pause(); - - /* Start per-CPU event timers. */ - cpu_initclocks_ap(); - - sched_throw(NULL); - - panic("scheduler returned us to %s", __func__); - /* NOTREACHED */ + init_secondary_tail(); } /******************************************************************* @@ -807,108 +289,6 @@ init_secondary(void) */ /* - * We tell the I/O APIC code about all the CPUs we want to receive - * interrupts. If we don't want certain CPUs to receive IRQs we - * can simply not tell the I/O APIC code about them in this function. - * We also do not tell it about the BSP since it tells itself about - * the BSP internally to work with UP kernels and on UP machines. - */ -static void -set_interrupt_apic_ids(void) -{ - u_int i, apic_id; - - for (i = 0; i < MAXCPU; i++) { - apic_id = cpu_apic_ids[i]; - if (apic_id == -1) - continue; - if (cpu_info[apic_id].cpu_bsp) - continue; - if (cpu_info[apic_id].cpu_disabled) - continue; - - /* Don't let hyperthreads service interrupts. */ - if (cpu_logical > 1 && - apic_id % cpu_logical != 0) - continue; - - intr_add_cpu(i); - } -} - -/* - * Assign logical CPU IDs to local APICs. - */ -static void -assign_cpu_ids(void) -{ - u_int i; - - TUNABLE_INT_FETCH("machdep.hyperthreading_allowed", - &hyperthreading_allowed); - - /* Check for explicitly disabled CPUs. */ - for (i = 0; i <= MAX_APIC_ID; i++) { - if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) - continue; - - if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) { - cpu_info[i].cpu_hyperthread = 1; - - /* - * Don't use HT CPU if it has been disabled by a - * tunable. - */ - if (hyperthreading_allowed == 0) { - cpu_info[i].cpu_disabled = 1; - continue; - } - } - - /* Don't use this CPU if it has been disabled by a tunable. */ - if (resource_disabled("lapic", i)) { - cpu_info[i].cpu_disabled = 1; - continue; - } - } - - if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) { - hyperthreading_cpus = 0; - cpu_logical = 1; - } - - /* - * Assign CPU IDs to local APIC IDs and disable any CPUs - * beyond MAXCPU. CPU 0 is always assigned to the BSP. - * - * To minimize confusion for userland, we attempt to number - * CPUs such that all threads and cores in a package are - * grouped together. For now we assume that the BSP is always - * the first thread in a package and just start adding APs - * starting with the BSP's APIC ID. - */ - mp_ncpus = 1; - cpu_apic_ids[0] = boot_cpu_id; - apic_cpuids[boot_cpu_id] = 0; - for (i = boot_cpu_id + 1; i != boot_cpu_id; - i == MAX_APIC_ID ? i = 0 : i++) { - if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || - cpu_info[i].cpu_disabled) - continue; - - if (mp_ncpus < MAXCPU) { - cpu_apic_ids[mp_ncpus] = i; - apic_cpuids[i] = mp_ncpus; - mp_ncpus++; - } else - cpu_info[i].cpu_disabled = 1; - } - KASSERT(mp_maxid >= mp_ncpus - 1, - ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, - mp_ncpus)); -} - -/* * start each AP in our list */ int @@ -1026,129 +406,6 @@ start_ap(int apic_id) return 0; /* return FAILURE */ } -#ifdef COUNT_XINVLTLB_HITS -u_int xhits_gbl[MAXCPU]; -u_int xhits_pg[MAXCPU]; -u_int xhits_rng[MAXCPU]; -static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, - sizeof(xhits_gbl), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, - sizeof(xhits_pg), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, - sizeof(xhits_rng), "IU", ""); - -u_int ipi_global; -u_int ipi_page; -u_int ipi_range; -u_int ipi_range_size; -SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); -SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); -SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); -SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, - &ipi_range_size, 0, ""); - -u_int ipi_masked_global; -u_int ipi_masked_page; -u_int ipi_masked_range; -u_int ipi_masked_range_size; -SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, - &ipi_masked_global, 0, ""); -SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, - &ipi_masked_page, 0, ""); -SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, - &ipi_masked_range, 0, ""); -SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, - &ipi_masked_range_size, 0, ""); -#endif /* COUNT_XINVLTLB_HITS */ - -/* - * Init and startup IPI. - */ -void -ipi_startup(int apic_id, int vector) -{ - - /* - * This attempts to follow the algorithm described in the - * Intel Multiprocessor Specification v1.4 in section B.4. - * For each IPI, we allow the local APIC ~20us to deliver the - * IPI. If that times out, we panic. - */ - - /* - * first we do an INIT IPI: this INIT IPI might be run, resetting - * and running the target CPU. OR this INIT IPI might be latched (P5 - * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be - * ignored. - */ - lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL | - APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); - lapic_ipi_wait(100); - - /* Explicitly deassert the INIT IPI. */ - lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL | - APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, - apic_id); - - DELAY(10000); /* wait ~10mS */ - - /* - * next we do a STARTUP IPI: the previous INIT IPI might still be - * latched, (P5 bug) this 1st STARTUP would then terminate - * immediately, and the previously started INIT IPI would continue. OR - * the previous INIT IPI has already run. and this STARTUP IPI will - * run. OR the previous INIT IPI was ignored. and this STARTUP IPI - * will run. - */ - lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | - APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | - vector, apic_id); - if (!lapic_ipi_wait(100)) - panic("Failed to deliver first STARTUP IPI to APIC %d", - apic_id); - DELAY(200); /* wait ~200uS */ - - /* - * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF - * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR - * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is - * recognized after hardware RESET or INIT IPI. - */ - lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | - APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | - vector, apic_id); - if (!lapic_ipi_wait(100)) - panic("Failed to deliver second STARTUP IPI to APIC %d", - apic_id); - - DELAY(200); /* wait ~200uS */ -} - -/* - * Send an IPI to specified CPU handling the bitmap logic. - */ -static void -ipi_send_cpu(int cpu, u_int ipi) -{ - u_int bitmap, old_pending, new_pending; - - KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); - - if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - do { - old_pending = cpu_ipi_pending[cpu]; - new_pending = old_pending | bitmap; - } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], - old_pending, new_pending)); - if (old_pending) - return; - } - lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); -} - /* * Flush the TLB on all other CPU's */ @@ -1228,26 +485,6 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap, } void -smp_cache_flush(void) -{ - - if (smp_started) - smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0); -} - -void -smp_invltlb(pmap_t pmap) -{ - - if (smp_started) { - smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_global++; -#endif - } -} - -void smp_invlpg(pmap_t pmap, vm_offset_t addr) { @@ -1312,210 +549,23 @@ smp_masked_invlpg_range(cpuset_t mask, pmap_t pmap, vm_offset_t addr1, } void -ipi_bitmap_handler(struct trapframe frame) -{ - struct trapframe *oldframe; - struct thread *td; - int cpu = PCPU_GET(cpuid); - u_int ipi_bitmap; - - critical_enter(); - td = curthread; - td->td_intr_nesting_level++; - oldframe = td->td_intr_frame; - td->td_intr_frame = &frame; - ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); - if (ipi_bitmap & (1 << IPI_PREEMPT)) { -#ifdef COUNT_IPIS - (*ipi_preempt_counts[cpu])++; -#endif - sched_preempt(td); - } - if (ipi_bitmap & (1 << IPI_AST)) { -#ifdef COUNT_IPIS - (*ipi_ast_counts[cpu])++; -#endif - /* Nothing to do for AST */ - } - if (ipi_bitmap & (1 << IPI_HARDCLOCK)) { -#ifdef COUNT_IPIS - (*ipi_hardclock_counts[cpu])++; -#endif - hardclockintr(); - } - td->td_intr_frame = oldframe; - td->td_intr_nesting_level--; - critical_exit(); -} - -/* - * send an IPI to a set of cpus. - */ -void -ipi_selected(cpuset_t cpus, u_int ipi) -{ - int cpu; - - /* - * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit - * of help in order to understand what is the source. - * Set the mask of receiving CPUs for this purpose. - */ - if (ipi == IPI_STOP_HARD) - CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - - while ((cpu = CPU_FFS(&cpus)) != 0) { - cpu--; - CPU_CLR(cpu, &cpus); - CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); - ipi_send_cpu(cpu, ipi); - } -} - -/* - * send an IPI to a specific CPU. - */ -void -ipi_cpu(int cpu, u_int ipi) -{ - - /* - * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit - * of help in order to understand what is the source. - * Set the mask of receiving CPUs for this purpose. - */ - if (ipi == IPI_STOP_HARD) - CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); - - CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); - ipi_send_cpu(cpu, ipi); -} - -/* - * send an IPI to all CPUs EXCEPT myself - */ -void -ipi_all_but_self(u_int ipi) +smp_cache_flush(void) { - cpuset_t other_cpus; - - other_cpus = all_cpus; - CPU_CLR(PCPU_GET(cpuid), &other_cpus); - if (IPI_IS_BITMAPED(ipi)) { - ipi_selected(other_cpus, ipi); - return; - } - - /* - * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit - * of help in order to understand what is the source. - * Set the mask of receiving CPUs for this purpose. - */ - if (ipi == IPI_STOP_HARD) - CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); - - CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); + if (smp_started) + smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0); } -int -ipi_nmi_handler() -{ - u_int cpuid; - - /* - * As long as there is not a simple way to know about a NMI's - * source, if the bitmask for the current CPU is present in - * the global pending bitword an IPI_STOP_HARD has been issued - * and should be handled. - */ - cpuid = PCPU_GET(cpuid); - if (!CPU_ISSET(cpuid, &ipi_nmi_pending)) - return (1); - - CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending); - cpustop_handler(); - return (0); -} - -/* - * Handle an IPI_STOP by saving our current context and spinning until we - * are resumed. - */ void -cpustop_handler(void) -{ - u_int cpu; - - cpu = PCPU_GET(cpuid); - - savectx(&stoppcbs[cpu]); - - /* Indicate that we are stopped */ - CPU_SET_ATOMIC(cpu, &stopped_cpus); - - /* Wait for restart */ - while (!CPU_ISSET(cpu, &started_cpus)) - ia32_pause(); - - CPU_CLR_ATOMIC(cpu, &started_cpus); - CPU_CLR_ATOMIC(cpu, &stopped_cpus); +smp_invltlb(pmap_t pmap) +{ -#ifdef DDB - amd64_db_resume_dbreg(); + if (smp_started) { + smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_global++; #endif - - if (cpu == 0 && cpustop_restartfunc != NULL) { - cpustop_restartfunc(); - cpustop_restartfunc = NULL; - } -} - -/* - * Handle an IPI_SUSPEND by saving our current context and spinning until we - * are resumed. - */ -void -cpususpend_handler(void) -{ - u_int cpu; - - mtx_assert(&smp_ipi_mtx, MA_NOTOWNED); - - cpu = PCPU_GET(cpuid); - if (savectx(&susppcbs[cpu]->sp_pcb)) { - fpususpend(susppcbs[cpu]->sp_fpususpend); - wbinvd(); - CPU_SET_ATOMIC(cpu, &suspended_cpus); - } else { - fpuresume(susppcbs[cpu]->sp_fpususpend); - pmap_init_pat(); - initializecpu(); - PCPU_SET(switchtime, 0); - PCPU_SET(switchticks, ticks); - - /* Indicate that we are resumed */ - CPU_CLR_ATOMIC(cpu, &suspended_cpus); } - - /* Wait for resume */ - while (!CPU_ISSET(cpu, &started_cpus)) - ia32_pause(); - - if (cpu_ops.cpu_resume) - cpu_ops.cpu_resume(); - if (vmm_resume_p) - vmm_resume_p(); - - /* Resume MCA and local APIC */ - lapic_xapic_mode(); - mca_resume(); - lapic_setup(0); - - CPU_CLR_ATOMIC(cpu, &started_cpus); - /* Indicate that we are resumed */ - CPU_CLR_ATOMIC(cpu, &suspended_cpus); } /* @@ -1678,63 +728,3 @@ invlrng_handler(void) atomic_add_int(&smp_tlb_wait, 1); } - -void -invlcache_handler(void) -{ -#ifdef COUNT_IPIS - (*ipi_invlcache_counts[PCPU_GET(cpuid)])++; -#endif /* COUNT_IPIS */ - - wbinvd(); - atomic_add_int(&smp_tlb_wait, 1); -} - -/* - * This is called once the rest of the system is up and running and we're - * ready to let the AP's out of the pen. - */ -static void -release_aps(void *dummy __unused) -{ - - if (mp_ncpus == 1) - return; - atomic_store_rel_int(&aps_ready, 1); - while (smp_started == 0) - ia32_pause(); -} -SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); - -#ifdef COUNT_IPIS -/* - * Setup interrupt counters for IPI handlers. - */ -static void -mp_ipi_intrcnt(void *dummy) -{ - char buf[64]; - int i; - - CPU_FOREACH(i) { - snprintf(buf, sizeof(buf), "cpu%d:invltlb", i); - intrcnt_add(buf, &ipi_invltlb_counts[i]); - snprintf(buf, sizeof(buf), "cpu%d:invlrng", i); - intrcnt_add(buf, &ipi_invlrng_counts[i]); - snprintf(buf, sizeof(buf), "cpu%d:invlpg", i); - intrcnt_add(buf, &ipi_invlpg_counts[i]); - snprintf(buf, sizeof(buf), "cpu%d:invlcache", i); - intrcnt_add(buf, &ipi_invlcache_counts[i]); - snprintf(buf, sizeof(buf), "cpu%d:preempt", i); - intrcnt_add(buf, &ipi_preempt_counts[i]); - snprintf(buf, sizeof(buf), "cpu%d:ast", i); - intrcnt_add(buf, &ipi_ast_counts[i]); - snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i); - intrcnt_add(buf, &ipi_rendezvous_counts[i]); - snprintf(buf, sizeof(buf), "cpu%d:hardclock", i); - intrcnt_add(buf, &ipi_hardclock_counts[i]); - } -} -SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL); -#endif - diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index bdaca33..c24dd5a 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -340,7 +340,9 @@ device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device -# HyperV drivers +# HyperV drivers and enchancement support +# NOTE: HYPERV depends on hyperv. They must be added or removed together. +options HYPERV # Hyper-V kernel infrastructure device hyperv # HyperV drivers # Xen HVM Guest Optimizations diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES index 9b697f0..e0fe465 100644 --- a/sys/amd64/conf/NOTES +++ b/sys/amd64/conf/NOTES @@ -494,6 +494,8 @@ device virtio_balloon # VirtIO Memory Balloon device device virtio_random # VirtIO Entropy device device virtio_console # VirtIO Console device +# Microsoft Hyper-V enchancement support +options HYPERV # Hyper-V kernel infrastructure device hyperv # HyperV drivers # Xen HVM Guest Optimizations diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index 3a4b6b3..034a693 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -35,6 +35,39 @@ extern int mp_naps; extern int boot_cpu_id; extern struct pcb stoppcbs[]; extern int cpu_apic_ids[]; +extern int bootAP; +extern void *dpcpu; +extern char *bootSTK; +extern int bootAP; +extern void *bootstacks[]; +extern volatile u_int cpu_ipi_pending[]; +extern volatile int aps_ready; +extern struct mtx ap_boot_mtx; +extern int cpu_logical; +extern int cpu_cores; +extern int pmap_pcid_enabled; +extern u_int xhits_gbl[]; +extern u_int xhits_pg[]; +extern u_int xhits_rng[]; +extern u_int ipi_global; +extern u_int ipi_page; +extern u_int ipi_range; +extern u_int ipi_range_size; +extern u_int ipi_masked_global; +extern u_int ipi_masked_page; +extern u_int ipi_masked_range; +extern u_int ipi_masked_range_size; + +extern volatile int smp_tlb_wait; + +struct cpu_info { + int cpu_present:1; + int cpu_bsp:1; + int cpu_disabled:1; + int cpu_hyperthread:1; +}; +extern struct cpu_info cpu_info[]; + #ifdef COUNT_IPIS extern u_long *ipi_invltlb_counts[MAXCPU]; extern u_long *ipi_invlrng_counts[MAXCPU]; @@ -60,9 +93,11 @@ inthand_t struct pmap; /* functions in mp_machdep.c */ +void assign_cpu_ids(void); void cpu_add(u_int apic_id, char boot_cpu); void cpustop_handler(void); void cpususpend_handler(void); +void init_secondary_tail(void); void invltlb_handler(void); void invltlb_pcid_handler(void); void invlpg_handler(void); @@ -77,6 +112,7 @@ void ipi_cpu(int cpu, u_int ipi); int ipi_nmi_handler(void); void ipi_selected(cpuset_t cpus, u_int ipi); u_int mp_bootaddress(u_int); +void set_interrupt_apic_ids(void); void smp_cache_flush(void); void smp_invlpg(struct pmap *pmap, vm_offset_t addr); void smp_masked_invlpg(cpuset_t mask, struct pmap *pmap, vm_offset_t addr); @@ -87,6 +123,9 @@ void smp_masked_invlpg_range(cpuset_t mask, struct pmap *pmap, void smp_invltlb(struct pmap *pmap); void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap); int native_start_all_aps(void); +void mem_range_AP_init(void); +void topo_probe(void); +void ipi_send_cpu(int cpu, u_int ipi); #endif /* !LOCORE */ #endif /* SMP */ diff --git a/sys/amd64/include/vm.h b/sys/amd64/include/vm.h index 6573e37..22d2eca 100644 --- a/sys/amd64/include/vm.h +++ b/sys/amd64/include/vm.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009 Advanced Computing Technologies LLC + * Copyright (c) 2009 Hudson River Trading LLC * Written by: John H. Baldwin <jhb@FreeBSD.org> * All rights reserved. * diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 52294bd..7c617be 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -204,13 +204,12 @@ int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state); int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state); int vm_apicid2vcpuid(struct vm *vm, int apicid); int vm_activate_cpu(struct vm *vm, int vcpu); -cpuset_t vm_active_cpus(struct vm *vm); -cpuset_t vm_suspended_cpus(struct vm *vm); struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip); +#ifdef _SYS__CPUSET_H_ /* * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'. * The rendezvous 'func(arg)' is not allowed to do anything that will @@ -228,6 +227,9 @@ void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip); typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg); void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, vm_rendezvous_func_t func, void *arg); +cpuset_t vm_active_cpus(struct vm *vm); +cpuset_t vm_suspended_cpus(struct vm *vm); +#endif /* _SYS__CPUSET_H_ */ static __inline int vcpu_rendezvous_pending(void *rendezvous_cookie) diff --git a/sys/amd64/include/xen/xenfunc.h b/sys/amd64/include/xen/xenfunc.h index d03d4f6..d8a6b5c 100644 --- a/sys/amd64/include/xen/xenfunc.h +++ b/sys/amd64/include/xen/xenfunc.h @@ -29,12 +29,7 @@ #ifndef _XEN_XENFUNC_H_ #define _XEN_XENFUNC_H_ -#ifdef XENHVM #include <machine/xen/xenvar.h> -#else -#include <machine/xen/xenpmap.h> -#include <machine/segments.h> -#endif #define BKPT __asm__("int3"); #define XPQ_CALL_DEPTH 5 @@ -64,10 +59,6 @@ void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line); #define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0) #endif -#ifndef XENHVM -void xen_update_descriptor(union descriptor *, union descriptor *); -#endif - extern struct mtx balloon_lock; #if 0 #define balloon_lock(__flags) mtx_lock_irqsave(&balloon_lock, __flags) diff --git a/sys/amd64/include/xen/xenpmap.h b/sys/amd64/include/xen/xenpmap.h deleted file mode 100644 index d768dad..0000000 --- a/sys/amd64/include/xen/xenpmap.h +++ /dev/null @@ -1,227 +0,0 @@ -/* - * - * Copyright (c) 2004 Christian Limpach. - * Copyright (c) 2004,2005 Kip Macy - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef _XEN_XENPMAP_H_ -#define _XEN_XENPMAP_H_ - -#include <machine/xen/features.h> - -void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int); -void xen_pt_switch(vm_paddr_t); -void xen_set_ldt(vm_paddr_t, unsigned long); -void xen_pgdpt_pin(vm_paddr_t); -void xen_pgd_pin(vm_paddr_t); -void xen_pgd_unpin(vm_paddr_t); -void xen_pt_pin(vm_paddr_t); -void xen_pt_unpin(vm_paddr_t); -void xen_flush_queue(void); -void xen_check_queue(void); -#if 0 -void pmap_ref(pt_entry_t *pte, vm_paddr_t ma); -#endif - -#ifdef INVARIANTS -#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__) -#else -#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0) -#endif - -#ifdef PMAP_DEBUG -#define PMAP_REF pmap_ref -#define PMAP_DEC_REF_PAGE pmap_dec_ref_page -#define PMAP_MARK_PRIV pmap_mark_privileged -#define PMAP_MARK_UNPRIV pmap_mark_unprivileged -#else -#define PMAP_MARK_PRIV(a) -#define PMAP_MARK_UNPRIV(a) -#define PMAP_REF(a, b) -#define PMAP_DEC_REF_PAGE(a) -#endif - -#define ALWAYS_SYNC 0 - -#ifdef PT_DEBUG -#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__) -#else -#define PT_LOG() -#endif - -#define INVALID_P2M_ENTRY (~0UL) - -#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */ - -#define SH_PD_SET_VA 1 -#define SH_PD_SET_VA_MA 2 -#define SH_PD_SET_VA_CLEAR 3 - -struct pmap; -void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type); -#ifdef notyet -static vm_paddr_t -vptetomachpte(vm_paddr_t *pte) -{ - vm_offset_t offset, ppte; - vm_paddr_t pgoffset, retval, *pdir_shadow_ptr; - int pgindex; - - ppte = (vm_offset_t)pte; - pgoffset = (ppte & PAGE_MASK); - offset = ppte - (vm_offset_t)PTmap; - pgindex = ppte >> PDRSHIFT; - - pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow); - retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset; - return (retval); -} -#endif -#define PT_GET(_ptp) \ - (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0)) - -#ifdef WRITABLE_PAGETABLES - -#define PT_SET_VA(_ptp,_npte,sync) do { \ - PMAP_REF((_ptp), xpmap_ptom(_npte)); \ - PT_LOG(); \ - *(_ptp) = xpmap_ptom((_npte)); \ -} while (/*CONSTCOND*/0) -#define PT_SET_VA_MA(_ptp,_npte,sync) do { \ - PMAP_REF((_ptp), (_npte)); \ - PT_LOG(); \ - *(_ptp) = (_npte); \ -} while (/*CONSTCOND*/0) -#define PT_CLEAR_VA(_ptp, sync) do { \ - PMAP_REF((pt_entry_t *)(_ptp), 0); \ - PT_LOG(); \ - *(_ptp) = 0; \ -} while (/*CONSTCOND*/0) - -#define PD_SET_VA(_pmap, _ptp, _npte, sync) do { \ - PMAP_REF((_ptp), xpmap_ptom(_npte)); \ - pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do { \ - PMAP_REF((_ptp), (_npte)); \ - pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \ - PMAP_REF((pt_entry_t *)(_ptp), 0); \ - pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) - -#else /* !WRITABLE_PAGETABLES */ - -#define PT_SET_VA(_ptp,_npte,sync) do { \ - PMAP_REF((_ptp), xpmap_ptom(_npte)); \ - xen_queue_pt_update(vtomach(_ptp), \ - xpmap_ptom(_npte)); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PT_SET_VA_MA(_ptp,_npte,sync) do { \ - PMAP_REF((_ptp), (_npte)); \ - xen_queue_pt_update(vtomach(_ptp), _npte); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PT_CLEAR_VA(_ptp, sync) do { \ - PMAP_REF((pt_entry_t *)(_ptp), 0); \ - xen_queue_pt_update(vtomach(_ptp), 0); \ - if (sync || ALWAYS_SYNC) \ - xen_flush_queue(); \ -} while (/*CONSTCOND*/0) - -#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do { \ - PMAP_REF((_ptp), xpmap_ptom(_npte)); \ - pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do { \ - PMAP_REF((_ptp), (_npte)); \ - pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do { \ - PMAP_REF((pt_entry_t *)(_ptp), 0); \ - pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) - -#endif - -#define PT_SET_MA(_va, _ma) \ -do { \ - PANIC_IF(HYPERVISOR_update_va_mapping(((unsigned long)(_va)),\ - (_ma), \ - UVMF_INVLPG| UVMF_ALL) < 0); \ -} while (/*CONSTCOND*/0) - -#define PT_UPDATES_FLUSH() do { \ - xen_flush_queue(); \ -} while (/*CONSTCOND*/0) - -static __inline vm_paddr_t -xpmap_mtop(vm_paddr_t mpa) -{ - vm_paddr_t tmp = (mpa & PG_FRAME); - - return machtophys(tmp) | (mpa & ~PG_FRAME); -} - -static __inline vm_paddr_t -xpmap_ptom(vm_paddr_t ppa) -{ - vm_paddr_t tmp = (ppa & PG_FRAME); - - return phystomach(tmp) | (ppa & ~PG_FRAME); -} - -static __inline void -set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ -#ifdef notyet - PANIC_IF(max_mapnr && pfn >= max_mapnr); -#endif - if (xen_feature(XENFEAT_auto_translated_physmap)) { -#ifdef notyet - PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY)); -#endif - return; - } - xen_phys_machine[pfn] = mfn; -} - - - - -#endif /* _XEN_XENPMAP_H_ */ diff --git a/sys/amd64/include/xen/xenvar.h b/sys/amd64/include/xen/xenvar.h index d9dbc5d..110a351 100644 --- a/sys/amd64/include/xen/xenvar.h +++ b/sys/amd64/include/xen/xenvar.h @@ -48,68 +48,7 @@ if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__ #define TRACE_DEBUG(argflags, _f, _a...) #endif -#ifdef XENHVM - -static inline vm_paddr_t -phystomach(vm_paddr_t pa) -{ - - return (pa); -} - -static inline vm_paddr_t -machtophys(vm_paddr_t ma) -{ - - return (ma); -} - #define vtomach(va) pmap_kextract((vm_offset_t) (va)) -#define PFNTOMFN(pa) (pa) -#define MFNTOPFN(ma) (ma) - -#define set_phys_to_machine(pfn, mfn) ((void)0) -#define phys_to_machine_mapping_valid(pfn) (TRUE) -#define PT_UPDATES_FLUSH() ((void)0) - -#else - -extern xen_pfn_t *xen_phys_machine; - - -extern xen_pfn_t *xen_machine_phys; -/* Xen starts physical pages after the 4MB ISA hole - - * FreeBSD doesn't - */ - - -#undef ADD_ISA_HOLE /* XXX */ - -#ifdef ADD_ISA_HOLE -#define ISA_INDEX_OFFSET 1024 -#define ISA_PDR_OFFSET 1 -#else -#define ISA_INDEX_OFFSET 0 -#define ISA_PDR_OFFSET 0 -#endif - - -#define PFNTOMFN(i) (xen_phys_machine[(i)]) -#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)]) - -#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE) -#define PTOV(x) (((uintptr_t)(x)) + KERNBASE) - -#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT) -#define PFNTOV(x) PTOV((vm_paddr_t)(x) << PAGE_SHIFT) - -#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT) -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) - -#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT) -#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT) - -#endif void xpq_init(void); diff --git a/sys/amd64/vmm/amd/amdv.c b/sys/amd64/vmm/amd/amdv.c index acb3a3d..3157e21 100644 --- a/sys/amd64/vmm/amd/amdv.c +++ b/sys/amd64/vmm/amd/amdv.c @@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/errno.h> -#include <sys/smp.h> #include <machine/vmm.h> #include "io/iommu.h" diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c index f505ea1..7cc13ca 100644 --- a/sys/amd64/vmm/amd/svm.c +++ b/sys/amd64/vmm/amd/svm.c @@ -802,6 +802,7 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit) case CPU_MODE_REAL: vmexit->u.inst_emul.cs_base = seg.base; vmexit->u.inst_emul.cs_d = 0; + break; case CPU_MODE_PROTECTED: case CPU_MODE_COMPATIBILITY: vmexit->u.inst_emul.cs_base = seg.base; diff --git a/sys/amd64/vmm/amd/svm_msr.c b/sys/amd64/vmm/amd/svm_msr.c index 100af4b..d3a6fe8 100644 --- a/sys/amd64/vmm/amd/svm_msr.c +++ b/sys/amd64/vmm/amd/svm_msr.c @@ -27,12 +27,17 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include <sys/types.h> +#include <sys/param.h> #include <sys/errno.h> +#include <sys/systm.h> #include <machine/cpufunc.h> #include <machine/specialreg.h> +#include <machine/vmm.h> +#include "svm.h" +#include "vmcb.h" +#include "svm_softc.h" #include "svm_msr.h" #ifndef MSR_AMDK8_IPM @@ -105,6 +110,14 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result, int error = 0; switch (num) { + case MSR_MTRRcap: + case MSR_MTRRdefType: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: + case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: + case MSR_MTRR64kBase: + case MSR_SYSCFG: + *result = 0; + break; case MSR_AMDK8_IPM: *result = 0; break; @@ -122,6 +135,15 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu) int error = 0; switch (num) { + case MSR_MTRRcap: + vm_inject_gp(sc->vm, vcpu); + break; + case MSR_MTRRdefType: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: + case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: + case MSR_MTRR64kBase: + case MSR_SYSCFG: + break; /* Ignore writes */ case MSR_AMDK8_IPM: /* * Ignore writes to the "Interrupt Pending Message" MSR. diff --git a/sys/amd64/vmm/amd/vmcb.c b/sys/amd64/vmm/amd/vmcb.c index fb4b2c8..d860169 100644 --- a/sys/amd64/vmm/amd/vmcb.c +++ b/sys/amd64/vmm/amd/vmcb.c @@ -29,7 +29,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> -#include <sys/cpuset.h> #include <machine/segments.h> #include <machine/specialreg.h> diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c index e517778..526b0d1 100644 --- a/sys/amd64/vmm/intel/vmx_msr.c +++ b/sys/amd64/vmm/intel/vmx_msr.c @@ -31,7 +31,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> -#include <sys/cpuset.h> #include <machine/clock.h> #include <machine/cpufunc.h> @@ -396,6 +395,13 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) error = 0; switch (num) { + case MSR_MTRRcap: + case MSR_MTRRdefType: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: + case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: + case MSR_MTRR64kBase: + *val = 0; + break; case MSR_IA32_MISC_ENABLE: *val = misc_enable; break; @@ -427,6 +433,14 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) error = 0; switch (num) { + case MSR_MTRRcap: + vm_inject_gp(vmx->vm, vcpuid); + break; + case MSR_MTRRdefType: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: + case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: + case MSR_MTRR64kBase: + break; /* Ignore writes */ case MSR_IA32_MISC_ENABLE: changed = val ^ misc_enable; /* diff --git a/sys/amd64/vmm/io/vatpic.c b/sys/amd64/vmm/io/vatpic.c index 0df6e7c..6e94f5b 100644 --- a/sys/amd64/vmm/io/vatpic.c +++ b/sys/amd64/vmm/io/vatpic.c @@ -30,7 +30,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/types.h> #include <sys/queue.h> -#include <sys/cpuset.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> diff --git a/sys/amd64/vmm/io/vatpit.c b/sys/amd64/vmm/io/vatpit.c index 842253d..173ef1f 100644 --- a/sys/amd64/vmm/io/vatpit.c +++ b/sys/amd64/vmm/io/vatpit.c @@ -31,7 +31,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/types.h> #include <sys/queue.h> -#include <sys/cpuset.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> diff --git a/sys/amd64/vmm/io/vhpet.c b/sys/amd64/vmm/io/vhpet.c index a4c96cd..1db1c51 100644 --- a/sys/amd64/vmm/io/vhpet.c +++ b/sys/amd64/vmm/io/vhpet.c @@ -36,7 +36,6 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/systm.h> -#include <sys/cpuset.h> #include <dev/acpica/acpi_hpet.h> diff --git a/sys/amd64/vmm/io/vioapic.c b/sys/amd64/vmm/io/vioapic.c index 411887d..e6b8b5a 100644 --- a/sys/amd64/vmm/io/vioapic.c +++ b/sys/amd64/vmm/io/vioapic.c @@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/queue.h> -#include <sys/cpuset.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/systm.h> diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index 7097248..3451e1e 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -547,6 +547,8 @@ vlapic_update_ppr(struct vlapic *vlapic) VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); } +static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); + static void vlapic_process_eoi(struct vlapic *vlapic) { @@ -557,11 +559,7 @@ vlapic_process_eoi(struct vlapic *vlapic) isrptr = &lapic->isr0; tmrptr = &lapic->tmr0; - /* - * The x86 architecture reserves the the first 32 vectors for use - * by the processor. - */ - for (i = 7; i > 0; i--) { + for (i = 7; i >= 0; i--) { idx = i * 4; bitpos = fls(isrptr[idx]); if (bitpos-- != 0) { @@ -570,17 +568,21 @@ vlapic_process_eoi(struct vlapic *vlapic) vlapic->isrvec_stk_top); } isrptr[idx] &= ~(1 << bitpos); + vector = i * 32 + bitpos; + VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d", + vector); VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); vlapic->isrvec_stk_top--; vlapic_update_ppr(vlapic); if ((tmrptr[idx] & (1 << bitpos)) != 0) { - vector = i * 32 + bitpos; vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, vector); } return; } } + VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "Gratuitous EOI"); + vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); } static __inline int @@ -1092,11 +1094,7 @@ vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) irrptr = &lapic->irr0; - /* - * The x86 architecture reserves the the first 32 vectors for use - * by the processor. - */ - for (i = 7; i > 0; i--) { + for (i = 7; i >= 0; i--) { idx = i * 4; val = atomic_load_acq_int(&irrptr[idx]); bitpos = fls(val); diff --git a/sys/amd64/vmm/io/vpmtmr.c b/sys/amd64/vmm/io/vpmtmr.c index 09f763f..1e7bb93 100644 --- a/sys/amd64/vmm/io/vpmtmr.c +++ b/sys/amd64/vmm/io/vpmtmr.c @@ -29,7 +29,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/queue.h> -#include <sys/cpuset.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/systm.h> diff --git a/sys/amd64/vmm/io/vrtc.c b/sys/amd64/vmm/io/vrtc.c index ab9cabb..18ebc4b 100644 --- a/sys/amd64/vmm/io/vrtc.c +++ b/sys/amd64/vmm/io/vrtc.c @@ -30,7 +30,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/queue.h> -#include <sys/cpuset.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/lock.h> @@ -63,9 +62,12 @@ struct rtcdev { uint8_t reg_b; uint8_t reg_c; uint8_t reg_d; - uint8_t nvram[128 - 14]; + uint8_t nvram[36]; + uint8_t century; + uint8_t nvram2[128 - 51]; } __packed; CTASSERT(sizeof(struct rtcdev) == 128); +CTASSERT(offsetof(struct rtcdev, century) == RTC_CENTURY); struct vrtc { struct vm *vm; @@ -139,20 +141,23 @@ update_enabled(struct vrtc *vrtc) } static time_t -vrtc_curtime(struct vrtc *vrtc) +vrtc_curtime(struct vrtc *vrtc, sbintime_t *basetime) { sbintime_t now, delta; - time_t t; + time_t t, secs; KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); t = vrtc->base_rtctime; + *basetime = vrtc->base_uptime; if (update_enabled(vrtc)) { now = sbinuptime(); delta = now - vrtc->base_uptime; KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: " "%#lx to %#lx", vrtc->base_uptime, now)); - t += delta / SBT_1S; + secs = delta / SBT_1S; + t += secs; + *basetime += secs * SBT_1S; } return (t); } @@ -245,6 +250,7 @@ secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update) rtc->day_of_month = rtcset(rtc, ct.day); rtc->month = rtcset(rtc, ct.mon); rtc->year = rtcset(rtc, ct.year % 100); + rtc->century = rtcset(rtc, ct.year / 100); } static int @@ -274,7 +280,7 @@ rtc_to_secs(struct vrtc *vrtc) struct timespec ts; struct rtcdev *rtc; struct vm *vm; - int error, hour, pm, year; + int century, error, hour, pm, year; KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); @@ -358,10 +364,14 @@ rtc_to_secs(struct vrtc *vrtc) VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year); goto fail; } - if (year >= 70) - ct.year = 1900 + year; - else - ct.year = 2000 + year; + + error = rtcget(rtc, rtc->century, ¢ury); + ct.year = century * 100 + year; + if (error || ct.year < POSIX_BASE_YEAR) { + VM_CTR2(vm, "Invalid RTC century %#x/%d", rtc->century, + ct.year); + goto fail; + } error = clock_ct_to_ts(&ct, &ts); if (error || ts.tv_sec < 0) { @@ -373,13 +383,19 @@ rtc_to_secs(struct vrtc *vrtc) } return (ts.tv_sec); /* success */ fail: - return (VRTC_BROKEN_TIME); /* failure */ + /* + * Stop updating the RTC if the date/time fields programmed by + * the guest are invalid. + */ + VM_CTR0(vrtc->vm, "Invalid RTC date/time programming detected"); + return (VRTC_BROKEN_TIME); } static int -vrtc_time_update(struct vrtc *vrtc, time_t newtime) +vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase) { struct rtcdev *rtc; + sbintime_t oldbase; time_t oldtime; uint8_t alarm_sec, alarm_min, alarm_hour; @@ -391,16 +407,21 @@ vrtc_time_update(struct vrtc *vrtc, time_t newtime) alarm_hour = rtc->alarm_hour; oldtime = vrtc->base_rtctime; - VM_CTR2(vrtc->vm, "Updating RTC time from %#lx to %#lx", + VM_CTR2(vrtc->vm, "Updating RTC secs from %#lx to %#lx", oldtime, newtime); + oldbase = vrtc->base_uptime; + VM_CTR2(vrtc->vm, "Updating RTC base uptime from %#lx to %#lx", + oldbase, newbase); + vrtc->base_uptime = newbase; + if (newtime == oldtime) return (0); /* * If 'newtime' indicates that RTC updates are disabled then just * record that and return. There is no need to do alarm interrupt - * processing or update 'base_uptime' in this case. + * processing in this case. */ if (newtime == VRTC_BROKEN_TIME) { vrtc->base_rtctime = VRTC_BROKEN_TIME; @@ -446,8 +467,6 @@ vrtc_time_update(struct vrtc *vrtc, time_t newtime) if (uintr_enabled(vrtc)) vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_UPDATE); - vrtc->base_uptime = sbinuptime(); - return (0); } @@ -518,7 +537,7 @@ static void vrtc_callout_handler(void *arg) { struct vrtc *vrtc = arg; - sbintime_t freqsbt; + sbintime_t freqsbt, basetime; time_t rtctime; int error; @@ -540,8 +559,8 @@ vrtc_callout_handler(void *arg) vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c | RTCIR_PERIOD); if (aintr_enabled(vrtc) || uintr_enabled(vrtc)) { - rtctime = vrtc_curtime(vrtc); - error = vrtc_time_update(vrtc, rtctime); + rtctime = vrtc_curtime(vrtc, &basetime); + error = vrtc_time_update(vrtc, rtctime, basetime); KASSERT(error == 0, ("%s: vrtc_time_update error %d", __func__, error)); } @@ -606,7 +625,7 @@ static int vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval) { struct rtcdev *rtc; - sbintime_t oldfreq, newfreq; + sbintime_t oldfreq, newfreq, basetime; time_t curtime, rtctime; int error; uint8_t oldval, changed; @@ -627,19 +646,13 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval) if (changed & RTCSB_HALT) { if ((newval & RTCSB_HALT) == 0) { rtctime = rtc_to_secs(vrtc); + basetime = sbinuptime(); if (rtctime == VRTC_BROKEN_TIME) { - /* - * Stop updating the RTC if the date/time - * programmed by the guest is not correct. - */ - VM_CTR0(vrtc->vm, "Invalid RTC date/time " - "programming detected"); - if (rtc_flag_broken_time) return (-1); } } else { - curtime = vrtc_curtime(vrtc); + curtime = vrtc_curtime(vrtc, &basetime); KASSERT(curtime == vrtc->base_rtctime, ("%s: mismatch " "between vrtc basetime (%#lx) and curtime (%#lx)", __func__, vrtc->base_rtctime, curtime)); @@ -658,7 +671,7 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval) rtctime = VRTC_BROKEN_TIME; rtc->reg_b &= ~RTCSB_UINTR; } - error = vrtc_time_update(vrtc, rtctime); + error = vrtc_time_update(vrtc, rtctime, basetime); KASSERT(error == 0, ("vrtc_time_update error %d", error)); } @@ -738,7 +751,7 @@ vrtc_set_time(struct vm *vm, time_t secs) vrtc = vm_rtc(vm); VRTC_LOCK(vrtc); - error = vrtc_time_update(vrtc, secs); + error = vrtc_time_update(vrtc, secs, sbinuptime()); VRTC_UNLOCK(vrtc); if (error) { @@ -755,11 +768,12 @@ time_t vrtc_get_time(struct vm *vm) { struct vrtc *vrtc; + sbintime_t basetime; time_t t; vrtc = vm_rtc(vm); VRTC_LOCK(vrtc); - t = vrtc_curtime(vrtc); + t = vrtc_curtime(vrtc, &basetime); VRTC_UNLOCK(vrtc); return (t); @@ -777,7 +791,7 @@ vrtc_nvram_write(struct vm *vm, int offset, uint8_t value) * Don't allow writes to RTC control registers or the date/time fields. */ if (offset < offsetof(struct rtcdev, nvram[0]) || - offset >= sizeof(struct rtcdev)) { + offset == RTC_CENTURY || offset >= sizeof(struct rtcdev)) { VM_CTR1(vrtc->vm, "RTC nvram write to invalid offset %d", offset); return (EINVAL); @@ -796,6 +810,7 @@ int vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval) { struct vrtc *vrtc; + sbintime_t basetime; time_t curtime; uint8_t *ptr; @@ -811,8 +826,8 @@ vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval) /* * Update RTC date/time fields if necessary. */ - if (offset < 10) { - curtime = vrtc_curtime(vrtc); + if (offset < 10 || offset == RTC_CENTURY) { + curtime = vrtc_curtime(vrtc, &basetime); secs_to_rtc(curtime, vrtc, 0); } @@ -852,6 +867,7 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, { struct vrtc *vrtc; struct rtcdev *rtc; + sbintime_t basetime; time_t curtime; int error, offset; @@ -869,16 +885,20 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, } error = 0; - curtime = vrtc_curtime(vrtc); - vrtc_time_update(vrtc, curtime); + curtime = vrtc_curtime(vrtc, &basetime); + vrtc_time_update(vrtc, curtime, basetime); - if (in) { - /* - * Update RTC date/time fields if necessary. - */ - if (offset < 10) - secs_to_rtc(curtime, vrtc, 0); + /* + * Update RTC date/time fields if necessary. + * + * This is not just for reads of the RTC. The side-effect of writing + * the century byte requires other RTC date/time fields (e.g. sec) + * to be updated here. + */ + if (offset < 10 || offset == RTC_CENTURY) + secs_to_rtc(curtime, vrtc, 0); + if (in) { if (offset == 12) { /* * XXX @@ -922,6 +942,18 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, *((uint8_t *)rtc + offset) = *val; break; } + + /* + * XXX some guests (e.g. OpenBSD) write the century byte + * outside of RTCSB_HALT so re-calculate the RTC date/time. + */ + if (offset == RTC_CENTURY && !rtc_halted(vrtc)) { + curtime = rtc_to_secs(vrtc); + error = vrtc_time_update(vrtc, curtime, sbinuptime()); + KASSERT(!error, ("vrtc_time_update error %d", error)); + if (curtime == VRTC_BROKEN_TIME && rtc_flag_broken_time) + error = -1; + } } VRTC_UNLOCK(vrtc); return (error); @@ -971,7 +1003,7 @@ vrtc_init(struct vm *vm) VRTC_LOCK(vrtc); vrtc->base_rtctime = VRTC_BROKEN_TIME; - vrtc_time_update(vrtc, curtime); + vrtc_time_update(vrtc, curtime, sbinuptime()); secs_to_rtc(curtime, vrtc, 0); VRTC_UNLOCK(vrtc); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 6bd5bce..bca9b98 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -1293,8 +1293,12 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) else if (error != 0) panic("%s: vmm_fetch_instruction error %d", __func__, error); - if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) - return (EFAULT); + if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) { + VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx", + vme->rip + cs_base); + *retu = true; /* dump instruction bytes in userspace */ + return (0); + } /* * If the instruction length was not specified then update it now diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index 0b50e92..7172365 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -72,6 +72,8 @@ enum { VIE_OP_TYPE_POP, VIE_OP_TYPE_MOVS, VIE_OP_TYPE_GROUP1, + VIE_OP_TYPE_STOS, + VIE_OP_TYPE_BITTEST, VIE_OP_TYPE_LAST }; @@ -91,6 +93,11 @@ static const struct vie_op two_byte_opcodes[256] = { .op_byte = 0xB7, .op_type = VIE_OP_TYPE_MOVZX, }, + [0xBA] = { + .op_byte = 0xBA, + .op_type = VIE_OP_TYPE_BITTEST, + .op_flags = VIE_OP_F_IMM8, + }, [0xBE] = { .op_byte = 0xBE, .op_type = VIE_OP_TYPE_MOVSX, @@ -146,6 +153,16 @@ static const struct vie_op one_byte_opcodes[256] = { .op_type = VIE_OP_TYPE_MOVS, .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION }, + [0xAA] = { + .op_byte = 0xAA, + .op_type = VIE_OP_TYPE_STOS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION + }, + [0xAB] = { + .op_byte = 0xAB, + .op_type = VIE_OP_TYPE_STOS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION + }, [0xC6] = { /* XXX Group 11 extended opcode - not just MOV */ .op_byte = 0xC6, @@ -803,6 +820,68 @@ done: } static int +emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *arg) +{ + int error, opsize, repeat; + uint64_t val; + uint64_t rcx, rdi, rflags; + + opsize = (vie->op.op_byte == 0xAA) ? 1 : vie->opsize; + repeat = vie->repz_present | vie->repnz_present; + + if (repeat) { + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx); + KASSERT(!error, ("%s: error %d getting rcx", __func__, error)); + + /* + * The count register is %rcx, %ecx or %cx depending on the + * address size of the instruction. + */ + if ((rcx & vie_size2mask(vie->addrsize)) == 0) + return (0); + } + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); + KASSERT(!error, ("%s: error %d getting rax", __func__, error)); + + error = memwrite(vm, vcpuid, gpa, val, opsize, arg); + if (error) + return (error); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi); + KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error)); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); + + if (rflags & PSL_D) + rdi -= opsize; + else + rdi += opsize; + + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi, + vie->addrsize); + KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error)); + + if (repeat) { + rcx = rcx - 1; + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX, + rcx, vie->addrsize); + KASSERT(!error, ("%s: error %d updating rcx", __func__, error)); + + /* + * Repeat the instruction if the count register is not zero. + */ + if ((rcx & vie_size2mask(vie->addrsize)) != 0) + vm_restart_instruction(vm, vcpuid); + } + + return (0); +} + +static int emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { @@ -1262,6 +1341,48 @@ emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (error); } +static int +emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) +{ + uint64_t val, rflags; + int error, bitmask, bitoff; + + /* + * 0F BA is a Group 8 extended opcode. + * + * Currently we only emulate the 'Bit Test' instruction which is + * identified by a ModR/M:reg encoding of 100b. + */ + if ((vie->reg & 7) != 4) + return (EINVAL); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); + + error = memread(vm, vcpuid, gpa, &val, vie->opsize, memarg); + if (error) + return (error); + + /* + * Intel SDM, Vol 2, Table 3-2: + * "Range of Bit Positions Specified by Bit Offset Operands" + */ + bitmask = vie->opsize * 8 - 1; + bitoff = vie->immediate & bitmask; + + /* Copy the bit into the Carry flag in %rflags */ + if (val & (1UL << bitoff)) + rflags |= PSL_C; + else + rflags &= ~PSL_C; + + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); + KASSERT(error == 0, ("%s: error %d updating rflags", __func__, error)); + + return (0); +} + int vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, @@ -1302,6 +1423,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread, memwrite, memarg); break; + case VIE_OP_TYPE_STOS: + error = emulate_stos(vm, vcpuid, gpa, vie, paging, memread, + memwrite, memarg); + break; case VIE_OP_TYPE_AND: error = emulate_and(vm, vcpuid, gpa, vie, memread, memwrite, memarg); @@ -1314,6 +1439,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = emulate_sub(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; + case VIE_OP_TYPE_BITTEST: + error = emulate_bittest(vm, vcpuid, gpa, vie, + memread, memwrite, memarg); + break; default: error = EINVAL; break; diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c index fc68a61..63044e8 100644 --- a/sys/amd64/vmm/vmm_ioport.c +++ b/sys/amd64/vmm/vmm_ioport.c @@ -28,16 +28,10 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> -#include <sys/types.h> -#include <sys/queue.h> -#include <sys/cpuset.h> #include <sys/systm.h> -#include <vm/vm.h> - #include <machine/vmm.h> #include <machine/vmm_instruction_emul.h> -#include <x86/psl.h> #include "vatpic.h" #include "vatpit.h" diff --git a/sys/amd64/vmm/vmm_stat.c b/sys/amd64/vmm/vmm_stat.c index 9ecf9af..4ae5fb9 100644 --- a/sys/amd64/vmm/vmm_stat.c +++ b/sys/amd64/vmm/vmm_stat.c @@ -33,7 +33,6 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/systm.h> #include <sys/malloc.h> -#include <sys/smp.h> #include <machine/vmm.h> #include "vmm_util.h" diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c index c37d21c..45e08b5 100644 --- a/sys/amd64/vmm/x86.c +++ b/sys/amd64/vmm/x86.c @@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/pcpu.h> #include <sys/systm.h> -#include <sys/cpuset.h> #include <sys/sysctl.h> #include <machine/clock.h> @@ -289,9 +288,8 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, /* * Machine check handling is done in the host. - * Hide MTRR capability. */ - regs[3] &= ~(CPUID_MCA | CPUID_MCE | CPUID_MTRR); + regs[3] &= ~(CPUID_MCA | CPUID_MCE); /* * Hide the debug store capability. |