diff options
-rw-r--r-- | sys/amd64/amd64/mca.c | 36 | ||||
-rw-r--r-- | sys/amd64/amd64/pmap.c | 131 | ||||
-rw-r--r-- | sys/amd64/include/md_var.h | 1 | ||||
-rw-r--r-- | sys/amd64/include/specialreg.h | 2 |
4 files changed, 162 insertions, 8 deletions
diff --git a/sys/amd64/amd64/mca.c b/sys/amd64/amd64/mca.c index b0e842a..50c335d 100644 --- a/sys/amd64/amd64/mca.c +++ b/sys/amd64/amd64/mca.c @@ -65,6 +65,15 @@ TUNABLE_INT("hw.mca.enabled", &mca_enabled); SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0, "Administrative toggle for machine check support"); +static int amd10h_L1TP = 1; +TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP); +SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0, + "Administrative toggle for logging of level one TLB parity (L1TP) errors"); + +int workaround_erratum383; +SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0, + "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?"); + static STAILQ_HEAD(, mca_internal) mca_records; static struct callout mca_timer; static int mca_ticks = 3600; /* Check hourly by default. */ @@ -481,7 +490,7 @@ void mca_init(void) { uint64_t mcg_cap; - uint64_t ctl; + uint64_t ctl, mask; int skip; int i; @@ -489,6 +498,15 @@ mca_init(void) if (!mca_enabled || !(cpu_feature & CPUID_MCE)) return; + /* + * On AMD Family 10h processors, unless logging of level one TLB + * parity (L1TP) errors is disabled, enable the recommended workaround + * for Erratum 383. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && + CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP) + workaround_erratum383 = 1; + if (cpu_feature & CPUID_MCA) { if (PCPU_GET(cpuid) == 0) mca_setup(); @@ -499,6 +517,22 @@ mca_init(void) /* Enable MCA features. */ wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); + /* + * Disable logging of level one TLB parity (L1TP) errors by + * the data and instruction caches as an alternative + * workaround for AMD Family 10h Erratum 383. Unlike the + * recommended workaround, there is no performance penalty to + * this workaround. However, L1TP errors will go unreported. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && + CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) { + mask = rdmsr(MSR_MC0_CTL_MASK); + if ((mask & (1UL << 5)) == 0) + wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5)); + mask = rdmsr(MSR_MC1_CTL_MASK); + if ((mask & (1UL << 5)) == 0) + wrmsr(MSR_MC1_CTL_MASK, mask | (1UL << 5)); + } for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { /* By default enable logging of all errors. */ ctl = 0xffffffffffffffffUL; diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 07db5d1..c4e6a3b 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -7,7 +7,7 @@ * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. - * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu> + * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -255,6 +255,9 @@ static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); +static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, + pd_entry_t newpde); +static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); @@ -686,13 +689,13 @@ pmap_init(void) pv_entry_high_water = 9 * (pv_entry_max / 10); /* - * Disable large page mappings by default if the kernel is running in - * a virtual machine on an AMD Family 10h processor. This is a work- - * around for Erratum 383. + * If the kernel is running in a virtual machine on an AMD Family 10h + * processor, then it must assume that MCA is enabled by the virtual + * machine monitor. */ if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x10) - pg_ps_enabled = 0; + workaround_erratum383 = 1; /* * Are large page mappings enabled? @@ -848,6 +851,42 @@ pmap_cache_bits(int mode, boolean_t is_pde) cache_bits |= PG_NC_PWT; return (cache_bits); } + +/* + * After changing the page size for the specified virtual address in the page + * table, flush the corresponding entries from the processor's TLB. Only the + * calling processor's TLB is affected. + * + * The calling thread must be pinned to a processor. + */ +static void +pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) +{ + u_long cr4; + + if ((newpde & PG_PS) == 0) + /* Demotion: flush a specific 2MB page mapping. */ + invlpg(va); + else if ((newpde & PG_G) == 0) + /* + * Promotion: flush every 4KB page mapping from the TLB + * because there are too many to flush individually. + */ + invltlb(); + else { + /* + * Promotion: flush every 4KB page mapping from the TLB, + * including any global (PG_G) mappings. + */ + cr4 = rcr4(); + load_cr4(cr4 & ~CR4_PGE); + /* + * Although preemption at this point could be detrimental to + * performance, it would not lead to an error. + */ + load_cr4(cr4); + } +} #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. @@ -944,6 +983,69 @@ pmap_invalidate_cache(void) smp_cache_flush(); sched_unpin(); } + +struct pde_action { + cpumask_t store; /* processor that updates the PDE */ + cpumask_t invalidate; /* processors that invalidate their TLB */ + vm_offset_t va; + pd_entry_t *pde; + pd_entry_t newpde; +}; + +static void +pmap_update_pde_action(void *arg) +{ + struct pde_action *act = arg; + + if (act->store == PCPU_GET(cpumask)) + pde_store(act->pde, act->newpde); +} + +static void +pmap_update_pde_teardown(void *arg) +{ + struct pde_action *act = arg; + + if ((act->invalidate & PCPU_GET(cpumask)) != 0) + pmap_update_pde_invalidate(act->va, act->newpde); +} + +/* + * Change the page size for the specified virtual address in a way that + * prevents any possibility of the TLB ever having two entries that map the + * same virtual address using different page sizes. This is the recommended + * workaround for Erratum 383 on AMD Family 10h processors. It prevents a + * machine check exception for a TLB state that is improperly diagnosed as a + * hardware error. + */ +static void +pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) +{ + struct pde_action act; + cpumask_t active, cpumask; + + sched_pin(); + cpumask = PCPU_GET(cpumask); + if (pmap == kernel_pmap) + active = all_cpus; + else + active = pmap->pm_active; + if ((active & PCPU_GET(other_cpus)) != 0) { + act.store = cpumask; + act.invalidate = active; + act.va = va; + act.pde = pde; + act.newpde = newpde; + smp_rendezvous_cpus(cpumask | active, + smp_no_rendevous_barrier, pmap_update_pde_action, + pmap_update_pde_teardown, &act); + } else { + pde_store(pde, newpde); + if ((active & cpumask) != 0) + pmap_update_pde_invalidate(va, newpde); + } + sched_unpin(); +} #else /* !SMP */ /* * Normal, non-SMP, invalidation functions. @@ -981,6 +1083,15 @@ pmap_invalidate_cache(void) wbinvd(); } + +static void +pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) +{ + + pde_store(pde, newpde); + if (pmap == kernel_pmap || pmap->pm_active) + pmap_update_pde_invalidate(va, newpde); +} #endif /* !SMP */ static void @@ -2361,7 +2472,10 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) * processor changing the setting of PG_A and/or PG_M between * the read above and the store below. */ - pde_store(pde, newpde); + if (workaround_erratum383) + pmap_update_pde(pmap, va, pde, newpde); + else + pde_store(pde, newpde); /* * Invalidate a stale recursive mapping of the page table page. @@ -2977,7 +3091,10 @@ setpte: /* * Map the superpage. */ - pde_store(pde, PG_PS | newpde); + if (workaround_erratum383) + pmap_update_pde(pmap, va, pde, PG_PS | newpde); + else + pde_store(pde, PG_PS | newpde); pmap_pde_promotions++; CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx" diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index 15df851..2b43b37 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -61,6 +61,7 @@ extern char sigcode[]; extern int szsigcode; extern uint64_t *vm_page_dump; extern int vm_page_dump_size; +extern int workaround_erratum383; extern int _udatasel; extern int _ucodesel; extern int _ucode32sel; diff --git a/sys/amd64/include/specialreg.h b/sys/amd64/include/specialreg.h index 8cadbcd..b325ed4 100644 --- a/sys/amd64/include/specialreg.h +++ b/sys/amd64/include/specialreg.h @@ -494,6 +494,8 @@ #define MSR_TOP_MEM 0xc001001a /* boundary for ram below 4G */ #define MSR_TOP_MEM2 0xc001001d /* boundary for ram above 4G */ #define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */ +#define MSR_MC0_CTL_MASK 0xc0010044 +#define MSR_MC1_CTL_MASK 0xc0010045 /* VIA ACE crypto featureset: for via_feature_rng */ #define VIA_HAS_RNG 1 /* cpu has RNG */ |