summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/amd64/mca.c36
-rw-r--r--sys/amd64/amd64/pmap.c131
-rw-r--r--sys/amd64/include/md_var.h1
-rw-r--r--sys/amd64/include/specialreg.h2
4 files changed, 162 insertions, 8 deletions
diff --git a/sys/amd64/amd64/mca.c b/sys/amd64/amd64/mca.c
index b0e842a..50c335d 100644
--- a/sys/amd64/amd64/mca.c
+++ b/sys/amd64/amd64/mca.c
@@ -65,6 +65,15 @@ TUNABLE_INT("hw.mca.enabled", &mca_enabled);
SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0,
"Administrative toggle for machine check support");
+static int amd10h_L1TP = 1;
+TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP);
+SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0,
+ "Administrative toggle for logging of level one TLB parity (L1TP) errors");
+
+int workaround_erratum383;
+SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0,
+ "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
+
static STAILQ_HEAD(, mca_internal) mca_records;
static struct callout mca_timer;
static int mca_ticks = 3600; /* Check hourly by default. */
@@ -481,7 +490,7 @@ void
mca_init(void)
{
uint64_t mcg_cap;
- uint64_t ctl;
+ uint64_t ctl, mask;
int skip;
int i;
@@ -489,6 +498,15 @@ mca_init(void)
if (!mca_enabled || !(cpu_feature & CPUID_MCE))
return;
+ /*
+ * On AMD Family 10h processors, unless logging of level one TLB
+ * parity (L1TP) errors is disabled, enable the recommended workaround
+ * for Erratum 383.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
+ workaround_erratum383 = 1;
+
if (cpu_feature & CPUID_MCA) {
if (PCPU_GET(cpuid) == 0)
mca_setup();
@@ -499,6 +517,22 @@ mca_init(void)
/* Enable MCA features. */
wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
+ /*
+ * Disable logging of level one TLB parity (L1TP) errors by
+ * the data and instruction caches as an alternative
+ * workaround for AMD Family 10h Erratum 383. Unlike the
+ * recommended workaround, there is no performance penalty to
+ * this workaround. However, L1TP errors will go unreported.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) {
+ mask = rdmsr(MSR_MC0_CTL_MASK);
+ if ((mask & (1UL << 5)) == 0)
+ wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5));
+ mask = rdmsr(MSR_MC1_CTL_MASK);
+ if ((mask & (1UL << 5)) == 0)
+ wrmsr(MSR_MC1_CTL_MASK, mask | (1UL << 5));
+ }
for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
/* By default enable logging of all errors. */
ctl = 0xffffffffffffffffUL;
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 07db5d1..c4e6a3b 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -7,7 +7,7 @@
* All rights reserved.
* Copyright (c) 2003 Peter Wemm
* All rights reserved.
- * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu>
+ * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -255,6 +255,9 @@ static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
vm_page_t m);
+static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
+ pd_entry_t newpde);
+static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags);
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
@@ -686,13 +689,13 @@ pmap_init(void)
pv_entry_high_water = 9 * (pv_entry_max / 10);
/*
- * Disable large page mappings by default if the kernel is running in
- * a virtual machine on an AMD Family 10h processor. This is a work-
- * around for Erratum 383.
+ * If the kernel is running in a virtual machine on an AMD Family 10h
+ * processor, then it must assume that MCA is enabled by the virtual
+ * machine monitor.
*/
if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
CPUID_TO_FAMILY(cpu_id) == 0x10)
- pg_ps_enabled = 0;
+ workaround_erratum383 = 1;
/*
* Are large page mappings enabled?
@@ -848,6 +851,42 @@ pmap_cache_bits(int mode, boolean_t is_pde)
cache_bits |= PG_NC_PWT;
return (cache_bits);
}
+
+/*
+ * After changing the page size for the specified virtual address in the page
+ * table, flush the corresponding entries from the processor's TLB. Only the
+ * calling processor's TLB is affected.
+ *
+ * The calling thread must be pinned to a processor.
+ */
+static void
+pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
+{
+ u_long cr4;
+
+ if ((newpde & PG_PS) == 0)
+ /* Demotion: flush a specific 2MB page mapping. */
+ invlpg(va);
+ else if ((newpde & PG_G) == 0)
+ /*
+ * Promotion: flush every 4KB page mapping from the TLB
+ * because there are too many to flush individually.
+ */
+ invltlb();
+ else {
+ /*
+ * Promotion: flush every 4KB page mapping from the TLB,
+ * including any global (PG_G) mappings.
+ */
+ cr4 = rcr4();
+ load_cr4(cr4 & ~CR4_PGE);
+ /*
+ * Although preemption at this point could be detrimental to
+ * performance, it would not lead to an error.
+ */
+ load_cr4(cr4);
+ }
+}
#ifdef SMP
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
@@ -944,6 +983,69 @@ pmap_invalidate_cache(void)
smp_cache_flush();
sched_unpin();
}
+
+struct pde_action {
+ cpumask_t store; /* processor that updates the PDE */
+ cpumask_t invalidate; /* processors that invalidate their TLB */
+ vm_offset_t va;
+ pd_entry_t *pde;
+ pd_entry_t newpde;
+};
+
+static void
+pmap_update_pde_action(void *arg)
+{
+ struct pde_action *act = arg;
+
+ if (act->store == PCPU_GET(cpumask))
+ pde_store(act->pde, act->newpde);
+}
+
+static void
+pmap_update_pde_teardown(void *arg)
+{
+ struct pde_action *act = arg;
+
+ if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+ pmap_update_pde_invalidate(act->va, act->newpde);
+}
+
+/*
+ * Change the page size for the specified virtual address in a way that
+ * prevents any possibility of the TLB ever having two entries that map the
+ * same virtual address using different page sizes. This is the recommended
+ * workaround for Erratum 383 on AMD Family 10h processors. It prevents a
+ * machine check exception for a TLB state that is improperly diagnosed as a
+ * hardware error.
+ */
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+ struct pde_action act;
+ cpumask_t active, cpumask;
+
+ sched_pin();
+ cpumask = PCPU_GET(cpumask);
+ if (pmap == kernel_pmap)
+ active = all_cpus;
+ else
+ active = pmap->pm_active;
+ if ((active & PCPU_GET(other_cpus)) != 0) {
+ act.store = cpumask;
+ act.invalidate = active;
+ act.va = va;
+ act.pde = pde;
+ act.newpde = newpde;
+ smp_rendezvous_cpus(cpumask | active,
+ smp_no_rendevous_barrier, pmap_update_pde_action,
+ pmap_update_pde_teardown, &act);
+ } else {
+ pde_store(pde, newpde);
+ if ((active & cpumask) != 0)
+ pmap_update_pde_invalidate(va, newpde);
+ }
+ sched_unpin();
+}
#else /* !SMP */
/*
* Normal, non-SMP, invalidation functions.
@@ -981,6 +1083,15 @@ pmap_invalidate_cache(void)
wbinvd();
}
+
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+
+ pde_store(pde, newpde);
+ if (pmap == kernel_pmap || pmap->pm_active)
+ pmap_update_pde_invalidate(va, newpde);
+}
#endif /* !SMP */
static void
@@ -2361,7 +2472,10 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
* processor changing the setting of PG_A and/or PG_M between
* the read above and the store below.
*/
- pde_store(pde, newpde);
+ if (workaround_erratum383)
+ pmap_update_pde(pmap, va, pde, newpde);
+ else
+ pde_store(pde, newpde);
/*
* Invalidate a stale recursive mapping of the page table page.
@@ -2977,7 +3091,10 @@ setpte:
/*
* Map the superpage.
*/
- pde_store(pde, PG_PS | newpde);
+ if (workaround_erratum383)
+ pmap_update_pde(pmap, va, pde, PG_PS | newpde);
+ else
+ pde_store(pde, PG_PS | newpde);
pmap_pde_promotions++;
CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx"
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index 15df851..2b43b37 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -61,6 +61,7 @@ extern char sigcode[];
extern int szsigcode;
extern uint64_t *vm_page_dump;
extern int vm_page_dump_size;
+extern int workaround_erratum383;
extern int _udatasel;
extern int _ucodesel;
extern int _ucode32sel;
diff --git a/sys/amd64/include/specialreg.h b/sys/amd64/include/specialreg.h
index 8cadbcd..b325ed4 100644
--- a/sys/amd64/include/specialreg.h
+++ b/sys/amd64/include/specialreg.h
@@ -494,6 +494,8 @@
#define MSR_TOP_MEM 0xc001001a /* boundary for ram below 4G */
#define MSR_TOP_MEM2 0xc001001d /* boundary for ram above 4G */
#define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */
+#define MSR_MC0_CTL_MASK 0xc0010044
+#define MSR_MC1_CTL_MASK 0xc0010045
/* VIA ACE crypto featureset: for via_feature_rng */
#define VIA_HAS_RNG 1 /* cpu has RNG */
OpenPOWER on IntegriCloud