summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/apic_vector.S16
-rw-r--r--sys/amd64/amd64/mp_machdep.c1034
-rw-r--r--sys/amd64/conf/GENERIC4
-rw-r--r--sys/amd64/conf/NOTES2
-rw-r--r--sys/amd64/include/smp.h39
-rw-r--r--sys/amd64/include/vm.h2
-rw-r--r--sys/amd64/include/vmm.h6
-rw-r--r--sys/amd64/include/xen/xenfunc.h9
-rw-r--r--sys/amd64/include/xen/xenpmap.h227
-rw-r--r--sys/amd64/include/xen/xenvar.h61
-rw-r--r--sys/amd64/vmm/amd/amdv.c1
-rw-r--r--sys/amd64/vmm/amd/svm.c1
-rw-r--r--sys/amd64/vmm/amd/svm_msr.c24
-rw-r--r--sys/amd64/vmm/amd/vmcb.c1
-rw-r--r--sys/amd64/vmm/intel/vmx_msr.c16
-rw-r--r--sys/amd64/vmm/io/vatpic.c1
-rw-r--r--sys/amd64/vmm/io/vatpit.c1
-rw-r--r--sys/amd64/vmm/io/vhpet.c1
-rw-r--r--sys/amd64/vmm/io/vioapic.c1
-rw-r--r--sys/amd64/vmm/io/vlapic.c20
-rw-r--r--sys/amd64/vmm/io/vpmtmr.c1
-rw-r--r--sys/amd64/vmm/io/vrtc.c118
-rw-r--r--sys/amd64/vmm/vmm.c8
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c129
-rw-r--r--sys/amd64/vmm/vmm_ioport.c6
-rw-r--r--sys/amd64/vmm/vmm_stat.c1
-rw-r--r--sys/amd64/vmm/x86.c4
27 files changed, 336 insertions, 1398 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index c3aac33..4455cab 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -174,6 +174,22 @@ IDTVEC(xen_intr_upcall)
jmp doreti
#endif
+#ifdef HYPERV
+/*
+ * This is the Hyper-V vmbus channel direct callback interrupt.
+ * Only used when it is running on Hyper-V.
+ */
+ .text
+ SUPERALIGN_TEXT
+IDTVEC(hv_vmbus_callback)
+ PUSH_FRAME
+ FAKE_MCOUNT(TF_RIP(%rsp))
+ movq %rsp, %rdi
+ call hv_vector_handler
+ MEXITCOUNT
+ jmp doreti
+#endif
+
#ifdef SMP
/*
* Global address space TLB shootdown.
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index c81495a..83ca548 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -81,28 +81,11 @@ __FBSDID("$FreeBSD$");
#define BIOS_RESET (0x0f)
#define BIOS_WARM (0x0a)
-/* lock region used by kernel profiling */
-int mcount_lock;
-
-int mp_naps; /* # of Applications processors */
-int boot_cpu_id = -1; /* designated BSP */
-
-extern struct pcpu __pcpu[];
-
-/* AP uses this during bootstrap. Do not staticize. */
-char *bootSTK;
-int bootAP;
-
-/* Free these after use */
-void *bootstacks[MAXCPU];
+extern struct pcpu __pcpu[];
/* Temporary variables for init_secondary() */
char *doublefault_stack;
char *nmi_stack;
-void *dpcpu;
-
-struct pcb stoppcbs[MAXCPU];
-struct susppcb **susppcbs;
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr2;
@@ -112,309 +95,16 @@ uint64_t pcid_cr3;
pmap_t smp_tlb_pmap;
extern int invpcid_works;
-#ifdef COUNT_IPIS
-/* Interrupt counts. */
-static u_long *ipi_preempt_counts[MAXCPU];
-static u_long *ipi_ast_counts[MAXCPU];
-u_long *ipi_invltlb_counts[MAXCPU];
-u_long *ipi_invlrng_counts[MAXCPU];
-u_long *ipi_invlpg_counts[MAXCPU];
-u_long *ipi_invlcache_counts[MAXCPU];
-u_long *ipi_rendezvous_counts[MAXCPU];
-static u_long *ipi_hardclock_counts[MAXCPU];
-#endif
-
-/* Default cpu_ops implementation. */
-struct cpu_ops cpu_ops;
-
extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
-extern int pmap_pcid_enabled;
-
/*
* Local data and functions.
*/
-static volatile cpuset_t ipi_nmi_pending;
-
-/* used to hold the AP's until we are ready to release them */
-struct mtx ap_boot_mtx;
-
-/* Set to 1 once we're ready to let the APs out of the pen. */
-static volatile int aps_ready = 0;
-
-/*
- * Store data from cpu_add() until later in the boot when we actually setup
- * the APs.
- */
-struct cpu_info {
- int cpu_present:1;
- int cpu_bsp:1;
- int cpu_disabled:1;
- int cpu_hyperthread:1;
-} static cpu_info[MAX_APIC_ID + 1];
-int cpu_apic_ids[MAXCPU];
-int apic_cpuids[MAX_APIC_ID + 1];
-
-/* Holds pending bitmap based IPIs per CPU */
-volatile u_int cpu_ipi_pending[MAXCPU];
-
-static u_int boot_address;
-static int cpu_logical; /* logical cpus per core */
-static int cpu_cores; /* cores per package */
-
-static void assign_cpu_ids(void);
-static void set_interrupt_apic_ids(void);
static int start_ap(int apic_id);
-static void release_aps(void *dummy);
-static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */
-static int hyperthreading_allowed = 1;
static u_int bootMP_size;
-
-static void
-mem_range_AP_init(void)
-{
- if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
- mem_range_softc.mr_op->initAP(&mem_range_softc);
-}
-
-static void
-topo_probe_amd(void)
-{
- int core_id_bits;
- int id;
-
- /* AMD processors do not support HTT. */
- cpu_logical = 1;
-
- if ((amd_feature2 & AMDID2_CMP) == 0) {
- cpu_cores = 1;
- return;
- }
-
- core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
- AMDID_COREID_SIZE_SHIFT;
- if (core_id_bits == 0) {
- cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1;
- return;
- }
-
- /* Fam 10h and newer should get here. */
- for (id = 0; id <= MAX_APIC_ID; id++) {
- /* Check logical CPU availability. */
- if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
- continue;
- /* Check if logical CPU has the same package ID. */
- if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits))
- continue;
- cpu_cores++;
- }
-}
-
-/*
- * Round up to the next power of two, if necessary, and then
- * take log2.
- * Returns -1 if argument is zero.
- */
-static __inline int
-mask_width(u_int x)
-{
-
- return (fls(x << (1 - powerof2(x))) - 1);
-}
-
-static void
-topo_probe_0x4(void)
-{
- u_int p[4];
- int pkg_id_bits;
- int core_id_bits;
- int max_cores;
- int max_logical;
- int id;
-
- /* Both zero and one here mean one logical processor per package. */
- max_logical = (cpu_feature & CPUID_HTT) != 0 ?
- (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
- if (max_logical <= 1)
- return;
-
- /*
- * Because of uniformity assumption we examine only
- * those logical processors that belong to the same
- * package as BSP. Further, we count number of
- * logical processors that belong to the same core
- * as BSP thus deducing number of threads per core.
- */
- if (cpu_high >= 0x4) {
- cpuid_count(0x04, 0, p);
- max_cores = ((p[0] >> 26) & 0x3f) + 1;
- } else
- max_cores = 1;
- core_id_bits = mask_width(max_logical/max_cores);
- if (core_id_bits < 0)
- return;
- pkg_id_bits = core_id_bits + mask_width(max_cores);
-
- for (id = 0; id <= MAX_APIC_ID; id++) {
- /* Check logical CPU availability. */
- if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
- continue;
- /* Check if logical CPU has the same package ID. */
- if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits))
- continue;
- cpu_cores++;
- /* Check if logical CPU has the same package and core IDs. */
- if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits))
- cpu_logical++;
- }
-
- KASSERT(cpu_cores >= 1 && cpu_logical >= 1,
- ("topo_probe_0x4 couldn't find BSP"));
-
- cpu_cores /= cpu_logical;
- hyperthreading_cpus = cpu_logical;
-}
-
-static void
-topo_probe_0xb(void)
-{
- u_int p[4];
- int bits;
- int cnt;
- int i;
- int logical;
- int type;
- int x;
-
- /* We only support three levels for now. */
- for (i = 0; i < 3; i++) {
- cpuid_count(0x0b, i, p);
-
- /* Fall back if CPU leaf 11 doesn't really exist. */
- if (i == 0 && p[1] == 0) {
- topo_probe_0x4();
- return;
- }
-
- bits = p[0] & 0x1f;
- logical = p[1] &= 0xffff;
- type = (p[2] >> 8) & 0xff;
- if (type == 0 || logical == 0)
- break;
- /*
- * Because of uniformity assumption we examine only
- * those logical processors that belong to the same
- * package as BSP.
- */
- for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) {
- if (!cpu_info[x].cpu_present ||
- cpu_info[x].cpu_disabled)
- continue;
- if (x >> bits == boot_cpu_id >> bits)
- cnt++;
- }
- if (type == CPUID_TYPE_SMT)
- cpu_logical = cnt;
- else if (type == CPUID_TYPE_CORE)
- cpu_cores = cnt;
- }
- if (cpu_logical == 0)
- cpu_logical = 1;
- cpu_cores /= cpu_logical;
-}
-
-/*
- * Both topology discovery code and code that consumes topology
- * information assume top-down uniformity of the topology.
- * That is, all physical packages must be identical and each
- * core in a package must have the same number of threads.
- * Topology information is queried only on BSP, on which this
- * code runs and for which it can query CPUID information.
- * Then topology is extrapolated on all packages using the
- * uniformity assumption.
- */
-static void
-topo_probe(void)
-{
- static int cpu_topo_probed = 0;
-
- if (cpu_topo_probed)
- return;
-
- CPU_ZERO(&logical_cpus_mask);
- if (mp_ncpus <= 1)
- cpu_cores = cpu_logical = 1;
- else if (cpu_vendor_id == CPU_VENDOR_AMD)
- topo_probe_amd();
- else if (cpu_vendor_id == CPU_VENDOR_INTEL) {
- /*
- * See Intel(R) 64 Architecture Processor
- * Topology Enumeration article for details.
- *
- * Note that 0x1 <= cpu_high < 4 case should be
- * compatible with topo_probe_0x4() logic when
- * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
- * or it should trigger the fallback otherwise.
- */
- if (cpu_high >= 0xb)
- topo_probe_0xb();
- else if (cpu_high >= 0x1)
- topo_probe_0x4();
- }
-
- /*
- * Fallback: assume each logical CPU is in separate
- * physical package. That is, no multi-core, no SMT.
- */
- if (cpu_cores == 0 || cpu_logical == 0)
- cpu_cores = cpu_logical = 1;
- cpu_topo_probed = 1;
-}
-
-struct cpu_group *
-cpu_topo(void)
-{
- int cg_flags;
-
- /*
- * Determine whether any threading flags are
- * necessry.
- */
- topo_probe();
- if (cpu_logical > 1 && hyperthreading_cpus)
- cg_flags = CG_FLAG_HTT;
- else if (cpu_logical > 1)
- cg_flags = CG_FLAG_SMT;
- else
- cg_flags = 0;
- if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
- printf("WARNING: Non-uniform processors.\n");
- printf("WARNING: Using suboptimal topology.\n");
- return (smp_topo_none());
- }
- /*
- * No multi-core or hyper-threaded.
- */
- if (cpu_logical * cpu_cores == 1)
- return (smp_topo_none());
- /*
- * Only HTT no multi-core.
- */
- if (cpu_logical > 1 && cpu_cores == 1)
- return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags));
- /*
- * Only multi-core no HTT.
- */
- if (cpu_cores > 1 && cpu_logical == 1)
- return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags));
- /*
- * Both HTT and multi-core.
- */
- return (smp_topo_2level(CG_SHARE_L2, cpu_cores,
- CG_SHARE_L1, cpu_logical, cg_flags));
-}
+static u_int boot_address;
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -433,85 +123,6 @@ mp_bootaddress(u_int basemem)
return mptramp_pagetables;
}
-void
-cpu_add(u_int apic_id, char boot_cpu)
-{
-
- if (apic_id > MAX_APIC_ID) {
- panic("SMP: APIC ID %d too high", apic_id);
- return;
- }
- KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
- apic_id));
- cpu_info[apic_id].cpu_present = 1;
- if (boot_cpu) {
- KASSERT(boot_cpu_id == -1,
- ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
- boot_cpu_id));
- boot_cpu_id = apic_id;
- cpu_info[apic_id].cpu_bsp = 1;
- }
- if (mp_ncpus < MAXCPU) {
- mp_ncpus++;
- mp_maxid = mp_ncpus - 1;
- }
- if (bootverbose)
- printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
- "AP");
-}
-
-void
-cpu_mp_setmaxid(void)
-{
-
- /*
- * mp_maxid should be already set by calls to cpu_add().
- * Just sanity check its value here.
- */
- if (mp_ncpus == 0)
- KASSERT(mp_maxid == 0,
- ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
- else if (mp_ncpus == 1)
- mp_maxid = 0;
- else
- KASSERT(mp_maxid >= mp_ncpus - 1,
- ("%s: counters out of sync: max %d, count %d", __func__,
- mp_maxid, mp_ncpus));
-}
-
-int
-cpu_mp_probe(void)
-{
-
- /*
- * Always record BSP in CPU map so that the mbuf init code works
- * correctly.
- */
- CPU_SETOF(0, &all_cpus);
- if (mp_ncpus == 0) {
- /*
- * No CPUs were found, so this must be a UP system. Setup
- * the variables to represent a system with a single CPU
- * with an id of 0.
- */
- mp_ncpus = 1;
- return (0);
- }
-
- /* At least one CPU was found. */
- if (mp_ncpus == 1) {
- /*
- * One CPU was found, so this must be a UP system with
- * an I/O APIC.
- */
- mp_maxid = 0;
- return (0);
- }
-
- /* At least two CPUs were found. */
- return (1);
-}
-
/*
* Initialize the IPI handlers and start up the AP's.
*/
@@ -575,47 +186,6 @@ cpu_mp_start(void)
/*
- * Print various information about the SMP system hardware and setup.
- */
-void
-cpu_mp_announce(void)
-{
- const char *hyperthread;
- int i;
-
- printf("FreeBSD/SMP: %d package(s) x %d core(s)",
- mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
- if (hyperthreading_cpus > 1)
- printf(" x %d HTT threads", cpu_logical);
- else if (cpu_logical > 1)
- printf(" x %d SMT threads", cpu_logical);
- printf("\n");
-
- /* List active CPUs first. */
- printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
- for (i = 1; i < mp_ncpus; i++) {
- if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread)
- hyperthread = "/HT";
- else
- hyperthread = "";
- printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread,
- cpu_apic_ids[i]);
- }
-
- /* List disabled CPUs last. */
- for (i = 0; i <= MAX_APIC_ID; i++) {
- if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled)
- continue;
- if (cpu_info[i].cpu_hyperthread)
- hyperthread = "/HT";
- else
- hyperthread = "";
- printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread,
- i);
- }
-}
-
-/*
* AP CPU's call this to initialize themselves.
*/
void
@@ -624,7 +194,6 @@ init_secondary(void)
struct pcpu *pc;
struct nmi_pcpu *np;
u_int64_t msr, cr0;
- u_int cpuid;
int cpu, gsel_tss, x;
struct region_descriptor ap_gdt;
@@ -712,94 +281,7 @@ init_secondary(void)
while (!aps_ready)
ia32_pause();
- /*
- * On real hardware, switch to x2apic mode if possible. Do it
- * after aps_ready was signalled, to avoid manipulating the
- * mode while BSP might still want to send some IPI to us
- * (second startup IPI is ignored on modern hardware etc).
- */
- lapic_xapic_mode();
-
- /* Initialize the PAT MSR. */
- pmap_init_pat();
-
- /* set up CPU registers and state */
- cpu_setregs();
-
- /* set up SSE/NX */
- initializecpu();
-
- /* set up FPU state on the AP */
- fpuinit();
-
- if (cpu_ops.cpu_init)
- cpu_ops.cpu_init();
-
- /* A quick check from sanity claus */
- cpuid = PCPU_GET(cpuid);
- if (PCPU_GET(apic_id) != lapic_id()) {
- printf("SMP: cpuid = %d\n", cpuid);
- printf("SMP: actual apic_id = %d\n", lapic_id());
- printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
- panic("cpuid mismatch! boom!!");
- }
-
- /* Initialize curthread. */
- KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
- PCPU_SET(curthread, PCPU_GET(idlethread));
-
- mca_init();
-
- mtx_lock_spin(&ap_boot_mtx);
-
- /* Init local apic for irq's */
- lapic_setup(1);
-
- /* Set memory range attributes for this CPU to match the BSP */
- mem_range_AP_init();
-
- smp_cpus++;
-
- CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
- printf("SMP: AP CPU #%d Launched!\n", cpuid);
-
- /* Determine if we are a logical CPU. */
- /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
- if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
- CPU_SET(cpuid, &logical_cpus_mask);
-
- if (bootverbose)
- lapic_dump("AP");
-
- if (smp_cpus == mp_ncpus) {
- /* enable IPI's, tlb shootdown, freezes etc */
- atomic_store_rel_int(&smp_started, 1);
- }
-
- /*
- * Enable global pages TLB extension
- * This also implicitly flushes the TLB
- */
-
- load_cr4(rcr4() | CR4_PGE);
- if (pmap_pcid_enabled)
- load_cr4(rcr4() | CR4_PCIDE);
- load_ds(_udatasel);
- load_es(_udatasel);
- load_fs(_ufssel);
- mtx_unlock_spin(&ap_boot_mtx);
-
- /* Wait until all the AP's are up. */
- while (smp_started == 0)
- ia32_pause();
-
- /* Start per-CPU event timers. */
- cpu_initclocks_ap();
-
- sched_throw(NULL);
-
- panic("scheduler returned us to %s", __func__);
- /* NOTREACHED */
+ init_secondary_tail();
}
/*******************************************************************
@@ -807,108 +289,6 @@ init_secondary(void)
*/
/*
- * We tell the I/O APIC code about all the CPUs we want to receive
- * interrupts. If we don't want certain CPUs to receive IRQs we
- * can simply not tell the I/O APIC code about them in this function.
- * We also do not tell it about the BSP since it tells itself about
- * the BSP internally to work with UP kernels and on UP machines.
- */
-static void
-set_interrupt_apic_ids(void)
-{
- u_int i, apic_id;
-
- for (i = 0; i < MAXCPU; i++) {
- apic_id = cpu_apic_ids[i];
- if (apic_id == -1)
- continue;
- if (cpu_info[apic_id].cpu_bsp)
- continue;
- if (cpu_info[apic_id].cpu_disabled)
- continue;
-
- /* Don't let hyperthreads service interrupts. */
- if (cpu_logical > 1 &&
- apic_id % cpu_logical != 0)
- continue;
-
- intr_add_cpu(i);
- }
-}
-
-/*
- * Assign logical CPU IDs to local APICs.
- */
-static void
-assign_cpu_ids(void)
-{
- u_int i;
-
- TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
- &hyperthreading_allowed);
-
- /* Check for explicitly disabled CPUs. */
- for (i = 0; i <= MAX_APIC_ID; i++) {
- if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
- continue;
-
- if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
- cpu_info[i].cpu_hyperthread = 1;
-
- /*
- * Don't use HT CPU if it has been disabled by a
- * tunable.
- */
- if (hyperthreading_allowed == 0) {
- cpu_info[i].cpu_disabled = 1;
- continue;
- }
- }
-
- /* Don't use this CPU if it has been disabled by a tunable. */
- if (resource_disabled("lapic", i)) {
- cpu_info[i].cpu_disabled = 1;
- continue;
- }
- }
-
- if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
- hyperthreading_cpus = 0;
- cpu_logical = 1;
- }
-
- /*
- * Assign CPU IDs to local APIC IDs and disable any CPUs
- * beyond MAXCPU. CPU 0 is always assigned to the BSP.
- *
- * To minimize confusion for userland, we attempt to number
- * CPUs such that all threads and cores in a package are
- * grouped together. For now we assume that the BSP is always
- * the first thread in a package and just start adding APs
- * starting with the BSP's APIC ID.
- */
- mp_ncpus = 1;
- cpu_apic_ids[0] = boot_cpu_id;
- apic_cpuids[boot_cpu_id] = 0;
- for (i = boot_cpu_id + 1; i != boot_cpu_id;
- i == MAX_APIC_ID ? i = 0 : i++) {
- if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
- cpu_info[i].cpu_disabled)
- continue;
-
- if (mp_ncpus < MAXCPU) {
- cpu_apic_ids[mp_ncpus] = i;
- apic_cpuids[i] = mp_ncpus;
- mp_ncpus++;
- } else
- cpu_info[i].cpu_disabled = 1;
- }
- KASSERT(mp_maxid >= mp_ncpus - 1,
- ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
- mp_ncpus));
-}
-
-/*
* start each AP in our list
*/
int
@@ -1026,129 +406,6 @@ start_ap(int apic_id)
return 0; /* return FAILURE */
}
-#ifdef COUNT_XINVLTLB_HITS
-u_int xhits_gbl[MAXCPU];
-u_int xhits_pg[MAXCPU];
-u_int xhits_rng[MAXCPU];
-static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
- sizeof(xhits_gbl), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
- sizeof(xhits_pg), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
- sizeof(xhits_rng), "IU", "");
-
-u_int ipi_global;
-u_int ipi_page;
-u_int ipi_range;
-u_int ipi_range_size;
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW,
- &ipi_range_size, 0, "");
-
-u_int ipi_masked_global;
-u_int ipi_masked_page;
-u_int ipi_masked_range;
-u_int ipi_masked_range_size;
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
- &ipi_masked_global, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
- &ipi_masked_page, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
- &ipi_masked_range, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
- &ipi_masked_range_size, 0, "");
-#endif /* COUNT_XINVLTLB_HITS */
-
-/*
- * Init and startup IPI.
- */
-void
-ipi_startup(int apic_id, int vector)
-{
-
- /*
- * This attempts to follow the algorithm described in the
- * Intel Multiprocessor Specification v1.4 in section B.4.
- * For each IPI, we allow the local APIC ~20us to deliver the
- * IPI. If that times out, we panic.
- */
-
- /*
- * first we do an INIT IPI: this INIT IPI might be run, resetting
- * and running the target CPU. OR this INIT IPI might be latched (P5
- * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
- * ignored.
- */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
- APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
- lapic_ipi_wait(100);
-
- /* Explicitly deassert the INIT IPI. */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
- apic_id);
-
- DELAY(10000); /* wait ~10mS */
-
- /*
- * next we do a STARTUP IPI: the previous INIT IPI might still be
- * latched, (P5 bug) this 1st STARTUP would then terminate
- * immediately, and the previously started INIT IPI would continue. OR
- * the previous INIT IPI has already run. and this STARTUP IPI will
- * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
- * will run.
- */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
- vector, apic_id);
- if (!lapic_ipi_wait(100))
- panic("Failed to deliver first STARTUP IPI to APIC %d",
- apic_id);
- DELAY(200); /* wait ~200uS */
-
- /*
- * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
- * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
- * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
- * recognized after hardware RESET or INIT IPI.
- */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
- vector, apic_id);
- if (!lapic_ipi_wait(100))
- panic("Failed to deliver second STARTUP IPI to APIC %d",
- apic_id);
-
- DELAY(200); /* wait ~200uS */
-}
-
-/*
- * Send an IPI to specified CPU handling the bitmap logic.
- */
-static void
-ipi_send_cpu(int cpu, u_int ipi)
-{
- u_int bitmap, old_pending, new_pending;
-
- KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
-
- if (IPI_IS_BITMAPED(ipi)) {
- bitmap = 1 << ipi;
- ipi = IPI_BITMAP_VECTOR;
- do {
- old_pending = cpu_ipi_pending[cpu];
- new_pending = old_pending | bitmap;
- } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
- old_pending, new_pending));
- if (old_pending)
- return;
- }
- lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
-}
-
/*
* Flush the TLB on all other CPU's
*/
@@ -1228,26 +485,6 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
}
void
-smp_cache_flush(void)
-{
-
- if (smp_started)
- smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0);
-}
-
-void
-smp_invltlb(pmap_t pmap)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_global++;
-#endif
- }
-}
-
-void
smp_invlpg(pmap_t pmap, vm_offset_t addr)
{
@@ -1312,210 +549,23 @@ smp_masked_invlpg_range(cpuset_t mask, pmap_t pmap, vm_offset_t addr1,
}
void
-ipi_bitmap_handler(struct trapframe frame)
-{
- struct trapframe *oldframe;
- struct thread *td;
- int cpu = PCPU_GET(cpuid);
- u_int ipi_bitmap;
-
- critical_enter();
- td = curthread;
- td->td_intr_nesting_level++;
- oldframe = td->td_intr_frame;
- td->td_intr_frame = &frame;
- ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
- if (ipi_bitmap & (1 << IPI_PREEMPT)) {
-#ifdef COUNT_IPIS
- (*ipi_preempt_counts[cpu])++;
-#endif
- sched_preempt(td);
- }
- if (ipi_bitmap & (1 << IPI_AST)) {
-#ifdef COUNT_IPIS
- (*ipi_ast_counts[cpu])++;
-#endif
- /* Nothing to do for AST */
- }
- if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
-#ifdef COUNT_IPIS
- (*ipi_hardclock_counts[cpu])++;
-#endif
- hardclockintr();
- }
- td->td_intr_frame = oldframe;
- td->td_intr_nesting_level--;
- critical_exit();
-}
-
-/*
- * send an IPI to a set of cpus.
- */
-void
-ipi_selected(cpuset_t cpus, u_int ipi)
-{
- int cpu;
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
-
- while ((cpu = CPU_FFS(&cpus)) != 0) {
- cpu--;
- CPU_CLR(cpu, &cpus);
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
- ipi_send_cpu(cpu, ipi);
- }
-}
-
-/*
- * send an IPI to a specific CPU.
- */
-void
-ipi_cpu(int cpu, u_int ipi)
-{
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
-
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
- ipi_send_cpu(cpu, ipi);
-}
-
-/*
- * send an IPI to all CPUs EXCEPT myself
- */
-void
-ipi_all_but_self(u_int ipi)
+smp_cache_flush(void)
{
- cpuset_t other_cpus;
-
- other_cpus = all_cpus;
- CPU_CLR(PCPU_GET(cpuid), &other_cpus);
- if (IPI_IS_BITMAPED(ipi)) {
- ipi_selected(other_cpus, ipi);
- return;
- }
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
-
- CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
- lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
+ if (smp_started)
+ smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0);
}
-int
-ipi_nmi_handler()
-{
- u_int cpuid;
-
- /*
- * As long as there is not a simple way to know about a NMI's
- * source, if the bitmask for the current CPU is present in
- * the global pending bitword an IPI_STOP_HARD has been issued
- * and should be handled.
- */
- cpuid = PCPU_GET(cpuid);
- if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
- return (1);
-
- CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
- cpustop_handler();
- return (0);
-}
-
-/*
- * Handle an IPI_STOP by saving our current context and spinning until we
- * are resumed.
- */
void
-cpustop_handler(void)
-{
- u_int cpu;
-
- cpu = PCPU_GET(cpuid);
-
- savectx(&stoppcbs[cpu]);
-
- /* Indicate that we are stopped */
- CPU_SET_ATOMIC(cpu, &stopped_cpus);
-
- /* Wait for restart */
- while (!CPU_ISSET(cpu, &started_cpus))
- ia32_pause();
-
- CPU_CLR_ATOMIC(cpu, &started_cpus);
- CPU_CLR_ATOMIC(cpu, &stopped_cpus);
+smp_invltlb(pmap_t pmap)
+{
-#ifdef DDB
- amd64_db_resume_dbreg();
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
#endif
-
- if (cpu == 0 && cpustop_restartfunc != NULL) {
- cpustop_restartfunc();
- cpustop_restartfunc = NULL;
- }
-}
-
-/*
- * Handle an IPI_SUSPEND by saving our current context and spinning until we
- * are resumed.
- */
-void
-cpususpend_handler(void)
-{
- u_int cpu;
-
- mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
-
- cpu = PCPU_GET(cpuid);
- if (savectx(&susppcbs[cpu]->sp_pcb)) {
- fpususpend(susppcbs[cpu]->sp_fpususpend);
- wbinvd();
- CPU_SET_ATOMIC(cpu, &suspended_cpus);
- } else {
- fpuresume(susppcbs[cpu]->sp_fpususpend);
- pmap_init_pat();
- initializecpu();
- PCPU_SET(switchtime, 0);
- PCPU_SET(switchticks, ticks);
-
- /* Indicate that we are resumed */
- CPU_CLR_ATOMIC(cpu, &suspended_cpus);
}
-
- /* Wait for resume */
- while (!CPU_ISSET(cpu, &started_cpus))
- ia32_pause();
-
- if (cpu_ops.cpu_resume)
- cpu_ops.cpu_resume();
- if (vmm_resume_p)
- vmm_resume_p();
-
- /* Resume MCA and local APIC */
- lapic_xapic_mode();
- mca_resume();
- lapic_setup(0);
-
- CPU_CLR_ATOMIC(cpu, &started_cpus);
- /* Indicate that we are resumed */
- CPU_CLR_ATOMIC(cpu, &suspended_cpus);
}
/*
@@ -1678,63 +728,3 @@ invlrng_handler(void)
atomic_add_int(&smp_tlb_wait, 1);
}
-
-void
-invlcache_handler(void)
-{
-#ifdef COUNT_IPIS
- (*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- wbinvd();
- atomic_add_int(&smp_tlb_wait, 1);
-}
-
-/*
- * This is called once the rest of the system is up and running and we're
- * ready to let the AP's out of the pen.
- */
-static void
-release_aps(void *dummy __unused)
-{
-
- if (mp_ncpus == 1)
- return;
- atomic_store_rel_int(&aps_ready, 1);
- while (smp_started == 0)
- ia32_pause();
-}
-SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
-
-#ifdef COUNT_IPIS
-/*
- * Setup interrupt counters for IPI handlers.
- */
-static void
-mp_ipi_intrcnt(void *dummy)
-{
- char buf[64];
- int i;
-
- CPU_FOREACH(i) {
- snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
- intrcnt_add(buf, &ipi_invltlb_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
- intrcnt_add(buf, &ipi_invlrng_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
- intrcnt_add(buf, &ipi_invlpg_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:invlcache", i);
- intrcnt_add(buf, &ipi_invlcache_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
- intrcnt_add(buf, &ipi_preempt_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:ast", i);
- intrcnt_add(buf, &ipi_ast_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
- intrcnt_add(buf, &ipi_rendezvous_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
- intrcnt_add(buf, &ipi_hardclock_counts[i]);
- }
-}
-SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
-#endif
-
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index bdaca33..c24dd5a 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -340,7 +340,9 @@ device virtio_blk # VirtIO Block device
device virtio_scsi # VirtIO SCSI device
device virtio_balloon # VirtIO Memory Balloon device
-# HyperV drivers
+# HyperV drivers and enchancement support
+# NOTE: HYPERV depends on hyperv. They must be added or removed together.
+options HYPERV # Hyper-V kernel infrastructure
device hyperv # HyperV drivers
# Xen HVM Guest Optimizations
diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES
index 9b697f0..e0fe465 100644
--- a/sys/amd64/conf/NOTES
+++ b/sys/amd64/conf/NOTES
@@ -494,6 +494,8 @@ device virtio_balloon # VirtIO Memory Balloon device
device virtio_random # VirtIO Entropy device
device virtio_console # VirtIO Console device
+# Microsoft Hyper-V enchancement support
+options HYPERV # Hyper-V kernel infrastructure
device hyperv # HyperV drivers
# Xen HVM Guest Optimizations
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index 3a4b6b3..034a693 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -35,6 +35,39 @@ extern int mp_naps;
extern int boot_cpu_id;
extern struct pcb stoppcbs[];
extern int cpu_apic_ids[];
+extern int bootAP;
+extern void *dpcpu;
+extern char *bootSTK;
+extern int bootAP;
+extern void *bootstacks[];
+extern volatile u_int cpu_ipi_pending[];
+extern volatile int aps_ready;
+extern struct mtx ap_boot_mtx;
+extern int cpu_logical;
+extern int cpu_cores;
+extern int pmap_pcid_enabled;
+extern u_int xhits_gbl[];
+extern u_int xhits_pg[];
+extern u_int xhits_rng[];
+extern u_int ipi_global;
+extern u_int ipi_page;
+extern u_int ipi_range;
+extern u_int ipi_range_size;
+extern u_int ipi_masked_global;
+extern u_int ipi_masked_page;
+extern u_int ipi_masked_range;
+extern u_int ipi_masked_range_size;
+
+extern volatile int smp_tlb_wait;
+
+struct cpu_info {
+ int cpu_present:1;
+ int cpu_bsp:1;
+ int cpu_disabled:1;
+ int cpu_hyperthread:1;
+};
+extern struct cpu_info cpu_info[];
+
#ifdef COUNT_IPIS
extern u_long *ipi_invltlb_counts[MAXCPU];
extern u_long *ipi_invlrng_counts[MAXCPU];
@@ -60,9 +93,11 @@ inthand_t
struct pmap;
/* functions in mp_machdep.c */
+void assign_cpu_ids(void);
void cpu_add(u_int apic_id, char boot_cpu);
void cpustop_handler(void);
void cpususpend_handler(void);
+void init_secondary_tail(void);
void invltlb_handler(void);
void invltlb_pcid_handler(void);
void invlpg_handler(void);
@@ -77,6 +112,7 @@ void ipi_cpu(int cpu, u_int ipi);
int ipi_nmi_handler(void);
void ipi_selected(cpuset_t cpus, u_int ipi);
u_int mp_bootaddress(u_int);
+void set_interrupt_apic_ids(void);
void smp_cache_flush(void);
void smp_invlpg(struct pmap *pmap, vm_offset_t addr);
void smp_masked_invlpg(cpuset_t mask, struct pmap *pmap, vm_offset_t addr);
@@ -87,6 +123,9 @@ void smp_masked_invlpg_range(cpuset_t mask, struct pmap *pmap,
void smp_invltlb(struct pmap *pmap);
void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap);
int native_start_all_aps(void);
+void mem_range_AP_init(void);
+void topo_probe(void);
+void ipi_send_cpu(int cpu, u_int ipi);
#endif /* !LOCORE */
#endif /* SMP */
diff --git a/sys/amd64/include/vm.h b/sys/amd64/include/vm.h
index 6573e37..22d2eca 100644
--- a/sys/amd64/include/vm.h
+++ b/sys/amd64/include/vm.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009 Advanced Computing Technologies LLC
+ * Copyright (c) 2009 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 52294bd..7c617be 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -204,13 +204,12 @@ int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
int vm_apicid2vcpuid(struct vm *vm, int apicid);
int vm_activate_cpu(struct vm *vm, int vcpu);
-cpuset_t vm_active_cpus(struct vm *vm);
-cpuset_t vm_suspended_cpus(struct vm *vm);
struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
+#ifdef _SYS__CPUSET_H_
/*
* Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
* The rendezvous 'func(arg)' is not allowed to do anything that will
@@ -228,6 +227,9 @@ void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
vm_rendezvous_func_t func, void *arg);
+cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_suspended_cpus(struct vm *vm);
+#endif /* _SYS__CPUSET_H_ */
static __inline int
vcpu_rendezvous_pending(void *rendezvous_cookie)
diff --git a/sys/amd64/include/xen/xenfunc.h b/sys/amd64/include/xen/xenfunc.h
index d03d4f6..d8a6b5c 100644
--- a/sys/amd64/include/xen/xenfunc.h
+++ b/sys/amd64/include/xen/xenfunc.h
@@ -29,12 +29,7 @@
#ifndef _XEN_XENFUNC_H_
#define _XEN_XENFUNC_H_
-#ifdef XENHVM
#include <machine/xen/xenvar.h>
-#else
-#include <machine/xen/xenpmap.h>
-#include <machine/segments.h>
-#endif
#define BKPT __asm__("int3");
#define XPQ_CALL_DEPTH 5
@@ -64,10 +59,6 @@ void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line);
#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0)
#endif
-#ifndef XENHVM
-void xen_update_descriptor(union descriptor *, union descriptor *);
-#endif
-
extern struct mtx balloon_lock;
#if 0
#define balloon_lock(__flags) mtx_lock_irqsave(&balloon_lock, __flags)
diff --git a/sys/amd64/include/xen/xenpmap.h b/sys/amd64/include/xen/xenpmap.h
deleted file mode 100644
index d768dad..0000000
--- a/sys/amd64/include/xen/xenpmap.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004,2005 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _XEN_XENPMAP_H_
-#define _XEN_XENPMAP_H_
-
-#include <machine/xen/features.h>
-
-void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int);
-void xen_pt_switch(vm_paddr_t);
-void xen_set_ldt(vm_paddr_t, unsigned long);
-void xen_pgdpt_pin(vm_paddr_t);
-void xen_pgd_pin(vm_paddr_t);
-void xen_pgd_unpin(vm_paddr_t);
-void xen_pt_pin(vm_paddr_t);
-void xen_pt_unpin(vm_paddr_t);
-void xen_flush_queue(void);
-void xen_check_queue(void);
-#if 0
-void pmap_ref(pt_entry_t *pte, vm_paddr_t ma);
-#endif
-
-#ifdef INVARIANTS
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__)
-#else
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0)
-#endif
-
-#ifdef PMAP_DEBUG
-#define PMAP_REF pmap_ref
-#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
-#define PMAP_MARK_PRIV pmap_mark_privileged
-#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
-#else
-#define PMAP_MARK_PRIV(a)
-#define PMAP_MARK_UNPRIV(a)
-#define PMAP_REF(a, b)
-#define PMAP_DEC_REF_PAGE(a)
-#endif
-
-#define ALWAYS_SYNC 0
-
-#ifdef PT_DEBUG
-#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__)
-#else
-#define PT_LOG()
-#endif
-
-#define INVALID_P2M_ENTRY (~0UL)
-
-#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */
-
-#define SH_PD_SET_VA 1
-#define SH_PD_SET_VA_MA 2
-#define SH_PD_SET_VA_CLEAR 3
-
-struct pmap;
-void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type);
-#ifdef notyet
-static vm_paddr_t
-vptetomachpte(vm_paddr_t *pte)
-{
- vm_offset_t offset, ppte;
- vm_paddr_t pgoffset, retval, *pdir_shadow_ptr;
- int pgindex;
-
- ppte = (vm_offset_t)pte;
- pgoffset = (ppte & PAGE_MASK);
- offset = ppte - (vm_offset_t)PTmap;
- pgindex = ppte >> PDRSHIFT;
-
- pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow);
- retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset;
- return (retval);
-}
-#endif
-#define PT_GET(_ptp) \
- (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0))
-
-#ifdef WRITABLE_PAGETABLES
-
-#define PT_SET_VA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- PT_LOG(); \
- *(_ptp) = xpmap_ptom((_npte)); \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- PT_LOG(); \
- *(_ptp) = (_npte); \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- PT_LOG(); \
- *(_ptp) = 0; \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptp, _npte, sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#else /* !WRITABLE_PAGETABLES */
-
-#define PT_SET_VA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- xen_queue_pt_update(vtomach(_ptp), \
- xpmap_ptom(_npte)); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- xen_queue_pt_update(vtomach(_ptp), _npte); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- xen_queue_pt_update(vtomach(_ptp), 0); \
- if (sync || ALWAYS_SYNC) \
- xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#endif
-
-#define PT_SET_MA(_va, _ma) \
-do { \
- PANIC_IF(HYPERVISOR_update_va_mapping(((unsigned long)(_va)),\
- (_ma), \
- UVMF_INVLPG| UVMF_ALL) < 0); \
-} while (/*CONSTCOND*/0)
-
-#define PT_UPDATES_FLUSH() do { \
- xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-static __inline vm_paddr_t
-xpmap_mtop(vm_paddr_t mpa)
-{
- vm_paddr_t tmp = (mpa & PG_FRAME);
-
- return machtophys(tmp) | (mpa & ~PG_FRAME);
-}
-
-static __inline vm_paddr_t
-xpmap_ptom(vm_paddr_t ppa)
-{
- vm_paddr_t tmp = (ppa & PG_FRAME);
-
- return phystomach(tmp) | (ppa & ~PG_FRAME);
-}
-
-static __inline void
-set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-#ifdef notyet
- PANIC_IF(max_mapnr && pfn >= max_mapnr);
-#endif
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef notyet
- PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY));
-#endif
- return;
- }
- xen_phys_machine[pfn] = mfn;
-}
-
-
-
-
-#endif /* _XEN_XENPMAP_H_ */
diff --git a/sys/amd64/include/xen/xenvar.h b/sys/amd64/include/xen/xenvar.h
index d9dbc5d..110a351 100644
--- a/sys/amd64/include/xen/xenvar.h
+++ b/sys/amd64/include/xen/xenvar.h
@@ -48,68 +48,7 @@ if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__
#define TRACE_DEBUG(argflags, _f, _a...)
#endif
-#ifdef XENHVM
-
-static inline vm_paddr_t
-phystomach(vm_paddr_t pa)
-{
-
- return (pa);
-}
-
-static inline vm_paddr_t
-machtophys(vm_paddr_t ma)
-{
-
- return (ma);
-}
-
#define vtomach(va) pmap_kextract((vm_offset_t) (va))
-#define PFNTOMFN(pa) (pa)
-#define MFNTOPFN(ma) (ma)
-
-#define set_phys_to_machine(pfn, mfn) ((void)0)
-#define phys_to_machine_mapping_valid(pfn) (TRUE)
-#define PT_UPDATES_FLUSH() ((void)0)
-
-#else
-
-extern xen_pfn_t *xen_phys_machine;
-
-
-extern xen_pfn_t *xen_machine_phys;
-/* Xen starts physical pages after the 4MB ISA hole -
- * FreeBSD doesn't
- */
-
-
-#undef ADD_ISA_HOLE /* XXX */
-
-#ifdef ADD_ISA_HOLE
-#define ISA_INDEX_OFFSET 1024
-#define ISA_PDR_OFFSET 1
-#else
-#define ISA_INDEX_OFFSET 0
-#define ISA_PDR_OFFSET 0
-#endif
-
-
-#define PFNTOMFN(i) (xen_phys_machine[(i)])
-#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)])
-
-#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE)
-#define PTOV(x) (((uintptr_t)(x)) + KERNBASE)
-
-#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT)
-#define PFNTOV(x) PTOV((vm_paddr_t)(x) << PAGE_SHIFT)
-
-#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT)
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-
-#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT)
-#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT)
-
-#endif
void xpq_init(void);
diff --git a/sys/amd64/vmm/amd/amdv.c b/sys/amd64/vmm/amd/amdv.c
index acb3a3d..3157e21 100644
--- a/sys/amd64/vmm/amd/amdv.c
+++ b/sys/amd64/vmm/amd/amdv.c
@@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/errno.h>
-#include <sys/smp.h>
#include <machine/vmm.h>
#include "io/iommu.h"
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index f505ea1..7cc13ca 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -802,6 +802,7 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
case CPU_MODE_REAL:
vmexit->u.inst_emul.cs_base = seg.base;
vmexit->u.inst_emul.cs_d = 0;
+ break;
case CPU_MODE_PROTECTED:
case CPU_MODE_COMPATIBILITY:
vmexit->u.inst_emul.cs_base = seg.base;
diff --git a/sys/amd64/vmm/amd/svm_msr.c b/sys/amd64/vmm/amd/svm_msr.c
index 100af4b..d3a6fe8 100644
--- a/sys/amd64/vmm/amd/svm_msr.c
+++ b/sys/amd64/vmm/amd/svm_msr.c
@@ -27,12 +27,17 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/types.h>
+#include <sys/param.h>
#include <sys/errno.h>
+#include <sys/systm.h>
#include <machine/cpufunc.h>
#include <machine/specialreg.h>
+#include <machine/vmm.h>
+#include "svm.h"
+#include "vmcb.h"
+#include "svm_softc.h"
#include "svm_msr.h"
#ifndef MSR_AMDK8_IPM
@@ -105,6 +110,14 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result,
int error = 0;
switch (num) {
+ case MSR_MTRRcap:
+ case MSR_MTRRdefType:
+ case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
+ case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+ case MSR_MTRR64kBase:
+ case MSR_SYSCFG:
+ *result = 0;
+ break;
case MSR_AMDK8_IPM:
*result = 0;
break;
@@ -122,6 +135,15 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu)
int error = 0;
switch (num) {
+ case MSR_MTRRcap:
+ vm_inject_gp(sc->vm, vcpu);
+ break;
+ case MSR_MTRRdefType:
+ case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
+ case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+ case MSR_MTRR64kBase:
+ case MSR_SYSCFG:
+ break; /* Ignore writes */
case MSR_AMDK8_IPM:
/*
* Ignore writes to the "Interrupt Pending Message" MSR.
diff --git a/sys/amd64/vmm/amd/vmcb.c b/sys/amd64/vmm/amd/vmcb.c
index fb4b2c8..d860169 100644
--- a/sys/amd64/vmm/amd/vmcb.c
+++ b/sys/amd64/vmm/amd/vmcb.c
@@ -29,7 +29,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/cpuset.h>
#include <machine/segments.h>
#include <machine/specialreg.h>
diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c
index e517778..526b0d1 100644
--- a/sys/amd64/vmm/intel/vmx_msr.c
+++ b/sys/amd64/vmm/intel/vmx_msr.c
@@ -31,7 +31,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/cpuset.h>
#include <machine/clock.h>
#include <machine/cpufunc.h>
@@ -396,6 +395,13 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
error = 0;
switch (num) {
+ case MSR_MTRRcap:
+ case MSR_MTRRdefType:
+ case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
+ case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+ case MSR_MTRR64kBase:
+ *val = 0;
+ break;
case MSR_IA32_MISC_ENABLE:
*val = misc_enable;
break;
@@ -427,6 +433,14 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
error = 0;
switch (num) {
+ case MSR_MTRRcap:
+ vm_inject_gp(vmx->vm, vcpuid);
+ break;
+ case MSR_MTRRdefType:
+ case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
+ case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+ case MSR_MTRR64kBase:
+ break; /* Ignore writes */
case MSR_IA32_MISC_ENABLE:
changed = val ^ misc_enable;
/*
diff --git a/sys/amd64/vmm/io/vatpic.c b/sys/amd64/vmm/io/vatpic.c
index 0df6e7c..6e94f5b 100644
--- a/sys/amd64/vmm/io/vatpic.c
+++ b/sys/amd64/vmm/io/vatpic.c
@@ -30,7 +30,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/queue.h>
-#include <sys/cpuset.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
diff --git a/sys/amd64/vmm/io/vatpit.c b/sys/amd64/vmm/io/vatpit.c
index 842253d..173ef1f 100644
--- a/sys/amd64/vmm/io/vatpit.c
+++ b/sys/amd64/vmm/io/vatpit.c
@@ -31,7 +31,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/queue.h>
-#include <sys/cpuset.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
diff --git a/sys/amd64/vmm/io/vhpet.c b/sys/amd64/vmm/io/vhpet.c
index a4c96cd..1db1c51 100644
--- a/sys/amd64/vmm/io/vhpet.c
+++ b/sys/amd64/vmm/io/vhpet.c
@@ -36,7 +36,6 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/systm.h>
-#include <sys/cpuset.h>
#include <dev/acpica/acpi_hpet.h>
diff --git a/sys/amd64/vmm/io/vioapic.c b/sys/amd64/vmm/io/vioapic.c
index 411887d..e6b8b5a 100644
--- a/sys/amd64/vmm/io/vioapic.c
+++ b/sys/amd64/vmm/io/vioapic.c
@@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/queue.h>
-#include <sys/cpuset.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/systm.h>
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index 7097248..3451e1e 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -547,6 +547,8 @@ vlapic_update_ppr(struct vlapic *vlapic)
VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
}
+static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt");
+
static void
vlapic_process_eoi(struct vlapic *vlapic)
{
@@ -557,11 +559,7 @@ vlapic_process_eoi(struct vlapic *vlapic)
isrptr = &lapic->isr0;
tmrptr = &lapic->tmr0;
- /*
- * The x86 architecture reserves the the first 32 vectors for use
- * by the processor.
- */
- for (i = 7; i > 0; i--) {
+ for (i = 7; i >= 0; i--) {
idx = i * 4;
bitpos = fls(isrptr[idx]);
if (bitpos-- != 0) {
@@ -570,17 +568,21 @@ vlapic_process_eoi(struct vlapic *vlapic)
vlapic->isrvec_stk_top);
}
isrptr[idx] &= ~(1 << bitpos);
+ vector = i * 32 + bitpos;
+ VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d",
+ vector);
VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
vlapic->isrvec_stk_top--;
vlapic_update_ppr(vlapic);
if ((tmrptr[idx] & (1 << bitpos)) != 0) {
- vector = i * 32 + bitpos;
vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
vector);
}
return;
}
}
+ VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "Gratuitous EOI");
+ vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1);
}
static __inline int
@@ -1092,11 +1094,7 @@ vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
irrptr = &lapic->irr0;
- /*
- * The x86 architecture reserves the the first 32 vectors for use
- * by the processor.
- */
- for (i = 7; i > 0; i--) {
+ for (i = 7; i >= 0; i--) {
idx = i * 4;
val = atomic_load_acq_int(&irrptr[idx]);
bitpos = fls(val);
diff --git a/sys/amd64/vmm/io/vpmtmr.c b/sys/amd64/vmm/io/vpmtmr.c
index 09f763f..1e7bb93 100644
--- a/sys/amd64/vmm/io/vpmtmr.c
+++ b/sys/amd64/vmm/io/vpmtmr.c
@@ -29,7 +29,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/queue.h>
-#include <sys/cpuset.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/systm.h>
diff --git a/sys/amd64/vmm/io/vrtc.c b/sys/amd64/vmm/io/vrtc.c
index ab9cabb..18ebc4b 100644
--- a/sys/amd64/vmm/io/vrtc.c
+++ b/sys/amd64/vmm/io/vrtc.c
@@ -30,7 +30,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/queue.h>
-#include <sys/cpuset.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/lock.h>
@@ -63,9 +62,12 @@ struct rtcdev {
uint8_t reg_b;
uint8_t reg_c;
uint8_t reg_d;
- uint8_t nvram[128 - 14];
+ uint8_t nvram[36];
+ uint8_t century;
+ uint8_t nvram2[128 - 51];
} __packed;
CTASSERT(sizeof(struct rtcdev) == 128);
+CTASSERT(offsetof(struct rtcdev, century) == RTC_CENTURY);
struct vrtc {
struct vm *vm;
@@ -139,20 +141,23 @@ update_enabled(struct vrtc *vrtc)
}
static time_t
-vrtc_curtime(struct vrtc *vrtc)
+vrtc_curtime(struct vrtc *vrtc, sbintime_t *basetime)
{
sbintime_t now, delta;
- time_t t;
+ time_t t, secs;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
t = vrtc->base_rtctime;
+ *basetime = vrtc->base_uptime;
if (update_enabled(vrtc)) {
now = sbinuptime();
delta = now - vrtc->base_uptime;
KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: "
"%#lx to %#lx", vrtc->base_uptime, now));
- t += delta / SBT_1S;
+ secs = delta / SBT_1S;
+ t += secs;
+ *basetime += secs * SBT_1S;
}
return (t);
}
@@ -245,6 +250,7 @@ secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update)
rtc->day_of_month = rtcset(rtc, ct.day);
rtc->month = rtcset(rtc, ct.mon);
rtc->year = rtcset(rtc, ct.year % 100);
+ rtc->century = rtcset(rtc, ct.year / 100);
}
static int
@@ -274,7 +280,7 @@ rtc_to_secs(struct vrtc *vrtc)
struct timespec ts;
struct rtcdev *rtc;
struct vm *vm;
- int error, hour, pm, year;
+ int century, error, hour, pm, year;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
@@ -358,10 +364,14 @@ rtc_to_secs(struct vrtc *vrtc)
VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year);
goto fail;
}
- if (year >= 70)
- ct.year = 1900 + year;
- else
- ct.year = 2000 + year;
+
+ error = rtcget(rtc, rtc->century, &century);
+ ct.year = century * 100 + year;
+ if (error || ct.year < POSIX_BASE_YEAR) {
+ VM_CTR2(vm, "Invalid RTC century %#x/%d", rtc->century,
+ ct.year);
+ goto fail;
+ }
error = clock_ct_to_ts(&ct, &ts);
if (error || ts.tv_sec < 0) {
@@ -373,13 +383,19 @@ rtc_to_secs(struct vrtc *vrtc)
}
return (ts.tv_sec); /* success */
fail:
- return (VRTC_BROKEN_TIME); /* failure */
+ /*
+ * Stop updating the RTC if the date/time fields programmed by
+ * the guest are invalid.
+ */
+ VM_CTR0(vrtc->vm, "Invalid RTC date/time programming detected");
+ return (VRTC_BROKEN_TIME);
}
static int
-vrtc_time_update(struct vrtc *vrtc, time_t newtime)
+vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase)
{
struct rtcdev *rtc;
+ sbintime_t oldbase;
time_t oldtime;
uint8_t alarm_sec, alarm_min, alarm_hour;
@@ -391,16 +407,21 @@ vrtc_time_update(struct vrtc *vrtc, time_t newtime)
alarm_hour = rtc->alarm_hour;
oldtime = vrtc->base_rtctime;
- VM_CTR2(vrtc->vm, "Updating RTC time from %#lx to %#lx",
+ VM_CTR2(vrtc->vm, "Updating RTC secs from %#lx to %#lx",
oldtime, newtime);
+ oldbase = vrtc->base_uptime;
+ VM_CTR2(vrtc->vm, "Updating RTC base uptime from %#lx to %#lx",
+ oldbase, newbase);
+ vrtc->base_uptime = newbase;
+
if (newtime == oldtime)
return (0);
/*
* If 'newtime' indicates that RTC updates are disabled then just
* record that and return. There is no need to do alarm interrupt
- * processing or update 'base_uptime' in this case.
+ * processing in this case.
*/
if (newtime == VRTC_BROKEN_TIME) {
vrtc->base_rtctime = VRTC_BROKEN_TIME;
@@ -446,8 +467,6 @@ vrtc_time_update(struct vrtc *vrtc, time_t newtime)
if (uintr_enabled(vrtc))
vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_UPDATE);
- vrtc->base_uptime = sbinuptime();
-
return (0);
}
@@ -518,7 +537,7 @@ static void
vrtc_callout_handler(void *arg)
{
struct vrtc *vrtc = arg;
- sbintime_t freqsbt;
+ sbintime_t freqsbt, basetime;
time_t rtctime;
int error;
@@ -540,8 +559,8 @@ vrtc_callout_handler(void *arg)
vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c | RTCIR_PERIOD);
if (aintr_enabled(vrtc) || uintr_enabled(vrtc)) {
- rtctime = vrtc_curtime(vrtc);
- error = vrtc_time_update(vrtc, rtctime);
+ rtctime = vrtc_curtime(vrtc, &basetime);
+ error = vrtc_time_update(vrtc, rtctime, basetime);
KASSERT(error == 0, ("%s: vrtc_time_update error %d",
__func__, error));
}
@@ -606,7 +625,7 @@ static int
vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
{
struct rtcdev *rtc;
- sbintime_t oldfreq, newfreq;
+ sbintime_t oldfreq, newfreq, basetime;
time_t curtime, rtctime;
int error;
uint8_t oldval, changed;
@@ -627,19 +646,13 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
if (changed & RTCSB_HALT) {
if ((newval & RTCSB_HALT) == 0) {
rtctime = rtc_to_secs(vrtc);
+ basetime = sbinuptime();
if (rtctime == VRTC_BROKEN_TIME) {
- /*
- * Stop updating the RTC if the date/time
- * programmed by the guest is not correct.
- */
- VM_CTR0(vrtc->vm, "Invalid RTC date/time "
- "programming detected");
-
if (rtc_flag_broken_time)
return (-1);
}
} else {
- curtime = vrtc_curtime(vrtc);
+ curtime = vrtc_curtime(vrtc, &basetime);
KASSERT(curtime == vrtc->base_rtctime, ("%s: mismatch "
"between vrtc basetime (%#lx) and curtime (%#lx)",
__func__, vrtc->base_rtctime, curtime));
@@ -658,7 +671,7 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
rtctime = VRTC_BROKEN_TIME;
rtc->reg_b &= ~RTCSB_UINTR;
}
- error = vrtc_time_update(vrtc, rtctime);
+ error = vrtc_time_update(vrtc, rtctime, basetime);
KASSERT(error == 0, ("vrtc_time_update error %d", error));
}
@@ -738,7 +751,7 @@ vrtc_set_time(struct vm *vm, time_t secs)
vrtc = vm_rtc(vm);
VRTC_LOCK(vrtc);
- error = vrtc_time_update(vrtc, secs);
+ error = vrtc_time_update(vrtc, secs, sbinuptime());
VRTC_UNLOCK(vrtc);
if (error) {
@@ -755,11 +768,12 @@ time_t
vrtc_get_time(struct vm *vm)
{
struct vrtc *vrtc;
+ sbintime_t basetime;
time_t t;
vrtc = vm_rtc(vm);
VRTC_LOCK(vrtc);
- t = vrtc_curtime(vrtc);
+ t = vrtc_curtime(vrtc, &basetime);
VRTC_UNLOCK(vrtc);
return (t);
@@ -777,7 +791,7 @@ vrtc_nvram_write(struct vm *vm, int offset, uint8_t value)
* Don't allow writes to RTC control registers or the date/time fields.
*/
if (offset < offsetof(struct rtcdev, nvram[0]) ||
- offset >= sizeof(struct rtcdev)) {
+ offset == RTC_CENTURY || offset >= sizeof(struct rtcdev)) {
VM_CTR1(vrtc->vm, "RTC nvram write to invalid offset %d",
offset);
return (EINVAL);
@@ -796,6 +810,7 @@ int
vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
{
struct vrtc *vrtc;
+ sbintime_t basetime;
time_t curtime;
uint8_t *ptr;
@@ -811,8 +826,8 @@ vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
/*
* Update RTC date/time fields if necessary.
*/
- if (offset < 10) {
- curtime = vrtc_curtime(vrtc);
+ if (offset < 10 || offset == RTC_CENTURY) {
+ curtime = vrtc_curtime(vrtc, &basetime);
secs_to_rtc(curtime, vrtc, 0);
}
@@ -852,6 +867,7 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
{
struct vrtc *vrtc;
struct rtcdev *rtc;
+ sbintime_t basetime;
time_t curtime;
int error, offset;
@@ -869,16 +885,20 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
}
error = 0;
- curtime = vrtc_curtime(vrtc);
- vrtc_time_update(vrtc, curtime);
+ curtime = vrtc_curtime(vrtc, &basetime);
+ vrtc_time_update(vrtc, curtime, basetime);
- if (in) {
- /*
- * Update RTC date/time fields if necessary.
- */
- if (offset < 10)
- secs_to_rtc(curtime, vrtc, 0);
+ /*
+ * Update RTC date/time fields if necessary.
+ *
+ * This is not just for reads of the RTC. The side-effect of writing
+ * the century byte requires other RTC date/time fields (e.g. sec)
+ * to be updated here.
+ */
+ if (offset < 10 || offset == RTC_CENTURY)
+ secs_to_rtc(curtime, vrtc, 0);
+ if (in) {
if (offset == 12) {
/*
* XXX
@@ -922,6 +942,18 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
*((uint8_t *)rtc + offset) = *val;
break;
}
+
+ /*
+ * XXX some guests (e.g. OpenBSD) write the century byte
+ * outside of RTCSB_HALT so re-calculate the RTC date/time.
+ */
+ if (offset == RTC_CENTURY && !rtc_halted(vrtc)) {
+ curtime = rtc_to_secs(vrtc);
+ error = vrtc_time_update(vrtc, curtime, sbinuptime());
+ KASSERT(!error, ("vrtc_time_update error %d", error));
+ if (curtime == VRTC_BROKEN_TIME && rtc_flag_broken_time)
+ error = -1;
+ }
}
VRTC_UNLOCK(vrtc);
return (error);
@@ -971,7 +1003,7 @@ vrtc_init(struct vm *vm)
VRTC_LOCK(vrtc);
vrtc->base_rtctime = VRTC_BROKEN_TIME;
- vrtc_time_update(vrtc, curtime);
+ vrtc_time_update(vrtc, curtime, sbinuptime());
secs_to_rtc(curtime, vrtc, 0);
VRTC_UNLOCK(vrtc);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 6bd5bce..bca9b98 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -1293,8 +1293,12 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
else if (error != 0)
panic("%s: vmm_fetch_instruction error %d", __func__, error);
- if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0)
- return (EFAULT);
+ if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) {
+ VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx",
+ vme->rip + cs_base);
+ *retu = true; /* dump instruction bytes in userspace */
+ return (0);
+ }
/*
* If the instruction length was not specified then update it now
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 0b50e92..7172365 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -72,6 +72,8 @@ enum {
VIE_OP_TYPE_POP,
VIE_OP_TYPE_MOVS,
VIE_OP_TYPE_GROUP1,
+ VIE_OP_TYPE_STOS,
+ VIE_OP_TYPE_BITTEST,
VIE_OP_TYPE_LAST
};
@@ -91,6 +93,11 @@ static const struct vie_op two_byte_opcodes[256] = {
.op_byte = 0xB7,
.op_type = VIE_OP_TYPE_MOVZX,
},
+ [0xBA] = {
+ .op_byte = 0xBA,
+ .op_type = VIE_OP_TYPE_BITTEST,
+ .op_flags = VIE_OP_F_IMM8,
+ },
[0xBE] = {
.op_byte = 0xBE,
.op_type = VIE_OP_TYPE_MOVSX,
@@ -146,6 +153,16 @@ static const struct vie_op one_byte_opcodes[256] = {
.op_type = VIE_OP_TYPE_MOVS,
.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
},
+ [0xAA] = {
+ .op_byte = 0xAA,
+ .op_type = VIE_OP_TYPE_STOS,
+ .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+ },
+ [0xAB] = {
+ .op_byte = 0xAB,
+ .op_type = VIE_OP_TYPE_STOS,
+ .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+ },
[0xC6] = {
/* XXX Group 11 extended opcode - not just MOV */
.op_byte = 0xC6,
@@ -803,6 +820,68 @@ done:
}
static int
+emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t memread,
+ mem_region_write_t memwrite, void *arg)
+{
+ int error, opsize, repeat;
+ uint64_t val;
+ uint64_t rcx, rdi, rflags;
+
+ opsize = (vie->op.op_byte == 0xAA) ? 1 : vie->opsize;
+ repeat = vie->repz_present | vie->repnz_present;
+
+ if (repeat) {
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
+ KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
+
+ /*
+ * The count register is %rcx, %ecx or %cx depending on the
+ * address size of the instruction.
+ */
+ if ((rcx & vie_size2mask(vie->addrsize)) == 0)
+ return (0);
+ }
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
+ KASSERT(!error, ("%s: error %d getting rax", __func__, error));
+
+ error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
+ if (error)
+ return (error);
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
+ KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+ if (rflags & PSL_D)
+ rdi -= opsize;
+ else
+ rdi += opsize;
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi,
+ vie->addrsize);
+ KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
+
+ if (repeat) {
+ rcx = rcx - 1;
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
+ rcx, vie->addrsize);
+ KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
+
+ /*
+ * Repeat the instruction if the count register is not zero.
+ */
+ if ((rcx & vie_size2mask(vie->addrsize)) != 0)
+ vm_restart_instruction(vm, vcpuid);
+ }
+
+ return (0);
+}
+
+static int
emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
{
@@ -1262,6 +1341,48 @@ emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (error);
}
+static int
+emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t memread, mem_region_write_t memwrite, void *memarg)
+{
+ uint64_t val, rflags;
+ int error, bitmask, bitoff;
+
+ /*
+ * 0F BA is a Group 8 extended opcode.
+ *
+ * Currently we only emulate the 'Bit Test' instruction which is
+ * identified by a ModR/M:reg encoding of 100b.
+ */
+ if ((vie->reg & 7) != 4)
+ return (EINVAL);
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+ error = memread(vm, vcpuid, gpa, &val, vie->opsize, memarg);
+ if (error)
+ return (error);
+
+ /*
+ * Intel SDM, Vol 2, Table 3-2:
+ * "Range of Bit Positions Specified by Bit Offset Operands"
+ */
+ bitmask = vie->opsize * 8 - 1;
+ bitoff = vie->immediate & bitmask;
+
+ /* Copy the bit into the Carry flag in %rflags */
+ if (val & (1UL << bitoff))
+ rflags |= PSL_C;
+ else
+ rflags &= ~PSL_C;
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
+ KASSERT(error == 0, ("%s: error %d updating rflags", __func__, error));
+
+ return (0);
+}
+
int
vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
struct vm_guest_paging *paging, mem_region_read_t memread,
@@ -1302,6 +1423,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread,
memwrite, memarg);
break;
+ case VIE_OP_TYPE_STOS:
+ error = emulate_stos(vm, vcpuid, gpa, vie, paging, memread,
+ memwrite, memarg);
+ break;
case VIE_OP_TYPE_AND:
error = emulate_and(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
@@ -1314,6 +1439,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = emulate_sub(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
break;
+ case VIE_OP_TYPE_BITTEST:
+ error = emulate_bittest(vm, vcpuid, gpa, vie,
+ memread, memwrite, memarg);
+ break;
default:
error = EINVAL;
break;
diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c
index fc68a61..63044e8 100644
--- a/sys/amd64/vmm/vmm_ioport.c
+++ b/sys/amd64/vmm/vmm_ioport.c
@@ -28,16 +28,10 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/queue.h>
-#include <sys/cpuset.h>
#include <sys/systm.h>
-#include <vm/vm.h>
-
#include <machine/vmm.h>
#include <machine/vmm_instruction_emul.h>
-#include <x86/psl.h>
#include "vatpic.h"
#include "vatpit.h"
diff --git a/sys/amd64/vmm/vmm_stat.c b/sys/amd64/vmm/vmm_stat.c
index 9ecf9af..4ae5fb9 100644
--- a/sys/amd64/vmm/vmm_stat.c
+++ b/sys/amd64/vmm/vmm_stat.c
@@ -33,7 +33,6 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>
-#include <sys/smp.h>
#include <machine/vmm.h>
#include "vmm_util.h"
diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c
index c37d21c..45e08b5 100644
--- a/sys/amd64/vmm/x86.c
+++ b/sys/amd64/vmm/x86.c
@@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/pcpu.h>
#include <sys/systm.h>
-#include <sys/cpuset.h>
#include <sys/sysctl.h>
#include <machine/clock.h>
@@ -289,9 +288,8 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
/*
* Machine check handling is done in the host.
- * Hide MTRR capability.
*/
- regs[3] &= ~(CPUID_MCA | CPUID_MCE | CPUID_MTRR);
+ regs[3] &= ~(CPUID_MCA | CPUID_MCE);
/*
* Hide the debug store capability.
OpenPOWER on IntegriCloud