summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoralc <alc@FreeBSD.org>2010-03-09 03:30:31 +0000
committeralc <alc@FreeBSD.org>2010-03-09 03:30:31 +0000
commit790da94e62d11167d04896bdb935482cd760524e (patch)
treebaedeaf7bbf9a0b1a57ad2ccaf9a61433f06556c
parent1fe700c954973387dad8a8bca442eacc4dfda56d (diff)
downloadFreeBSD-src-790da94e62d11167d04896bdb935482cd760524e.zip
FreeBSD-src-790da94e62d11167d04896bdb935482cd760524e.tar.gz
Implement AMD's recommended workaround for Erratum 383 on Family 10h
processors. With this workaround, superpage promotion can be re-enabled under virtualization. Moreover, machine check exceptions can safely be enabled when FreeBSD is running natively on Family 10h processors. Most of the credit should go to Andriy Gapon for diagnosing the error and working with Borislav Petkov at AMD to document it. Andriy also reviewed and tested my patches. Discussed with: jhb MFC after: 3 weeks
-rw-r--r--sys/amd64/amd64/mca.c36
-rw-r--r--sys/amd64/amd64/pmap.c131
-rw-r--r--sys/amd64/include/md_var.h1
-rw-r--r--sys/amd64/include/specialreg.h2
4 files changed, 162 insertions, 8 deletions
diff --git a/sys/amd64/amd64/mca.c b/sys/amd64/amd64/mca.c
index b0e842a..50c335d 100644
--- a/sys/amd64/amd64/mca.c
+++ b/sys/amd64/amd64/mca.c
@@ -65,6 +65,15 @@ TUNABLE_INT("hw.mca.enabled", &mca_enabled);
SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0,
"Administrative toggle for machine check support");
+static int amd10h_L1TP = 1;
+TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP);
+SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0,
+ "Administrative toggle for logging of level one TLB parity (L1TP) errors");
+
+int workaround_erratum383;
+SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0,
+ "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
+
static STAILQ_HEAD(, mca_internal) mca_records;
static struct callout mca_timer;
static int mca_ticks = 3600; /* Check hourly by default. */
@@ -481,7 +490,7 @@ void
mca_init(void)
{
uint64_t mcg_cap;
- uint64_t ctl;
+ uint64_t ctl, mask;
int skip;
int i;
@@ -489,6 +498,15 @@ mca_init(void)
if (!mca_enabled || !(cpu_feature & CPUID_MCE))
return;
+ /*
+ * On AMD Family 10h processors, unless logging of level one TLB
+ * parity (L1TP) errors is disabled, enable the recommended workaround
+ * for Erratum 383.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
+ workaround_erratum383 = 1;
+
if (cpu_feature & CPUID_MCA) {
if (PCPU_GET(cpuid) == 0)
mca_setup();
@@ -499,6 +517,22 @@ mca_init(void)
/* Enable MCA features. */
wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
+ /*
+ * Disable logging of level one TLB parity (L1TP) errors by
+ * the data and instruction caches as an alternative
+ * workaround for AMD Family 10h Erratum 383. Unlike the
+ * recommended workaround, there is no performance penalty to
+ * this workaround. However, L1TP errors will go unreported.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) {
+ mask = rdmsr(MSR_MC0_CTL_MASK);
+ if ((mask & (1UL << 5)) == 0)
+ wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5));
+ mask = rdmsr(MSR_MC1_CTL_MASK);
+ if ((mask & (1UL << 5)) == 0)
+ wrmsr(MSR_MC1_CTL_MASK, mask | (1UL << 5));
+ }
for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
/* By default enable logging of all errors. */
ctl = 0xffffffffffffffffUL;
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 07db5d1..c4e6a3b 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -7,7 +7,7 @@
* All rights reserved.
* Copyright (c) 2003 Peter Wemm
* All rights reserved.
- * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu>
+ * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -255,6 +255,9 @@ static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
vm_page_t m);
+static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
+ pd_entry_t newpde);
+static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags);
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
@@ -686,13 +689,13 @@ pmap_init(void)
pv_entry_high_water = 9 * (pv_entry_max / 10);
/*
- * Disable large page mappings by default if the kernel is running in
- * a virtual machine on an AMD Family 10h processor. This is a work-
- * around for Erratum 383.
+ * If the kernel is running in a virtual machine on an AMD Family 10h
+ * processor, then it must assume that MCA is enabled by the virtual
+ * machine monitor.
*/
if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
CPUID_TO_FAMILY(cpu_id) == 0x10)
- pg_ps_enabled = 0;
+ workaround_erratum383 = 1;
/*
* Are large page mappings enabled?
@@ -848,6 +851,42 @@ pmap_cache_bits(int mode, boolean_t is_pde)
cache_bits |= PG_NC_PWT;
return (cache_bits);
}
+
+/*
+ * After changing the page size for the specified virtual address in the page
+ * table, flush the corresponding entries from the processor's TLB. Only the
+ * calling processor's TLB is affected.
+ *
+ * The calling thread must be pinned to a processor.
+ */
+static void
+pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
+{
+ u_long cr4;
+
+ if ((newpde & PG_PS) == 0)
+ /* Demotion: flush a specific 2MB page mapping. */
+ invlpg(va);
+ else if ((newpde & PG_G) == 0)
+ /*
+ * Promotion: flush every 4KB page mapping from the TLB
+ * because there are too many to flush individually.
+ */
+ invltlb();
+ else {
+ /*
+ * Promotion: flush every 4KB page mapping from the TLB,
+ * including any global (PG_G) mappings.
+ */
+ cr4 = rcr4();
+ load_cr4(cr4 & ~CR4_PGE);
+ /*
+ * Although preemption at this point could be detrimental to
+ * performance, it would not lead to an error.
+ */
+ load_cr4(cr4);
+ }
+}
#ifdef SMP
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
@@ -944,6 +983,69 @@ pmap_invalidate_cache(void)
smp_cache_flush();
sched_unpin();
}
+
+struct pde_action {
+ cpumask_t store; /* processor that updates the PDE */
+ cpumask_t invalidate; /* processors that invalidate their TLB */
+ vm_offset_t va;
+ pd_entry_t *pde;
+ pd_entry_t newpde;
+};
+
+static void
+pmap_update_pde_action(void *arg)
+{
+ struct pde_action *act = arg;
+
+ if (act->store == PCPU_GET(cpumask))
+ pde_store(act->pde, act->newpde);
+}
+
+static void
+pmap_update_pde_teardown(void *arg)
+{
+ struct pde_action *act = arg;
+
+ if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+ pmap_update_pde_invalidate(act->va, act->newpde);
+}
+
+/*
+ * Change the page size for the specified virtual address in a way that
+ * prevents any possibility of the TLB ever having two entries that map the
+ * same virtual address using different page sizes. This is the recommended
+ * workaround for Erratum 383 on AMD Family 10h processors. It prevents a
+ * machine check exception for a TLB state that is improperly diagnosed as a
+ * hardware error.
+ */
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+ struct pde_action act;
+ cpumask_t active, cpumask;
+
+ sched_pin();
+ cpumask = PCPU_GET(cpumask);
+ if (pmap == kernel_pmap)
+ active = all_cpus;
+ else
+ active = pmap->pm_active;
+ if ((active & PCPU_GET(other_cpus)) != 0) {
+ act.store = cpumask;
+ act.invalidate = active;
+ act.va = va;
+ act.pde = pde;
+ act.newpde = newpde;
+ smp_rendezvous_cpus(cpumask | active,
+ smp_no_rendevous_barrier, pmap_update_pde_action,
+ pmap_update_pde_teardown, &act);
+ } else {
+ pde_store(pde, newpde);
+ if ((active & cpumask) != 0)
+ pmap_update_pde_invalidate(va, newpde);
+ }
+ sched_unpin();
+}
#else /* !SMP */
/*
* Normal, non-SMP, invalidation functions.
@@ -981,6 +1083,15 @@ pmap_invalidate_cache(void)
wbinvd();
}
+
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+
+ pde_store(pde, newpde);
+ if (pmap == kernel_pmap || pmap->pm_active)
+ pmap_update_pde_invalidate(va, newpde);
+}
#endif /* !SMP */
static void
@@ -2361,7 +2472,10 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
* processor changing the setting of PG_A and/or PG_M between
* the read above and the store below.
*/
- pde_store(pde, newpde);
+ if (workaround_erratum383)
+ pmap_update_pde(pmap, va, pde, newpde);
+ else
+ pde_store(pde, newpde);
/*
* Invalidate a stale recursive mapping of the page table page.
@@ -2977,7 +3091,10 @@ setpte:
/*
* Map the superpage.
*/
- pde_store(pde, PG_PS | newpde);
+ if (workaround_erratum383)
+ pmap_update_pde(pmap, va, pde, PG_PS | newpde);
+ else
+ pde_store(pde, PG_PS | newpde);
pmap_pde_promotions++;
CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx"
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index 15df851..2b43b37 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -61,6 +61,7 @@ extern char sigcode[];
extern int szsigcode;
extern uint64_t *vm_page_dump;
extern int vm_page_dump_size;
+extern int workaround_erratum383;
extern int _udatasel;
extern int _ucodesel;
extern int _ucode32sel;
diff --git a/sys/amd64/include/specialreg.h b/sys/amd64/include/specialreg.h
index 8cadbcd..b325ed4 100644
--- a/sys/amd64/include/specialreg.h
+++ b/sys/amd64/include/specialreg.h
@@ -494,6 +494,8 @@
#define MSR_TOP_MEM 0xc001001a /* boundary for ram below 4G */
#define MSR_TOP_MEM2 0xc001001d /* boundary for ram above 4G */
#define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */
+#define MSR_MC0_CTL_MASK 0xc0010044
+#define MSR_MC1_CTL_MASK 0xc0010045
/* VIA ACE crypto featureset: for via_feature_rng */
#define VIA_HAS_RNG 1 /* cpu has RNG */
OpenPOWER on IntegriCloud