summaryrefslogtreecommitdiffstats
path: root/sys/i386
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2015-12-03 11:14:14 +0000
committerkib <kib@FreeBSD.org>2015-12-03 11:14:14 +0000
commitf741f698b73f18777a27f138b3cfc8ad44bde038 (patch)
tree75c40eece99b5dfdb79cdf57b3e74fd84f9976f0 /sys/i386
parent559961c499e55f090d0cd285103c25e0dfc18d02 (diff)
downloadFreeBSD-src-f741f698b73f18777a27f138b3cfc8ad44bde038.zip
FreeBSD-src-f741f698b73f18777a27f138b3cfc8ad44bde038.tar.gz
For amd64 non-PCID machines, and for i386 machines with support for
the PG_G global pte flag, pmap_invalidate_all() fails to flush global TLB entries [*]. This is because TLB shootdown handler for such configs reloads CR3, and on i386 pmap_invalidate_all() does the same for the initiating CPU. Note that current code does not issue total invalidation requests for the kernel_pmap. Rename amd64 function invltlb_globpcid() to invltlb_glob(), it is not specific for PCID for quite some time, and implement the same functionality for i386. Use the function instead of invltlb() in shootdown handlers and in i386 pmap_invalidate_all(), but only for the kernel pmap (which maps pages with the PG_G attribute set), which takes care of PG_G TLB entries on flush. To detect the affected pmap in i386 TLB shootdown handler, pmap should be passed to the smp_masked_invltlb() function, which makes amd64 and i386 TLB shootdown code almost identical. Merge the code under x86/. Noted by: jhb [*] Reviewed by: cem, jhb, pho Tested by: pho Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D4346
Diffstat (limited to 'sys/i386')
-rw-r--r--sys/i386/i386/mp_machdep.c203
-rw-r--r--sys/i386/i386/pmap.c53
-rw-r--r--sys/i386/include/pmap.h2
-rw-r--r--sys/i386/include/smp.h9
4 files changed, 40 insertions, 227 deletions
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 9b8b9c9..379eb6d 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -132,11 +132,6 @@ __FBSDID("$FreeBSD$");
extern struct pcpu __pcpu[];
-/* Variables needed for SMP tlb shootdown. */
-vm_offset_t smp_tlb_addr1;
-vm_offset_t smp_tlb_addr2;
-volatile int smp_tlb_wait;
-
/*
* Local data and functions.
*/
@@ -487,201 +482,3 @@ start_ap(int apic_id)
}
return 0; /* return FAILURE */
}
-
-/*
- * Flush the TLB on all other CPU's
- */
-static void
-smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
-{
- u_int ncpu;
-
- ncpu = mp_ncpus - 1; /* does not shootdown self */
- if (ncpu < 1)
- return; /* no other cpus */
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- smp_tlb_addr1 = addr1;
- smp_tlb_addr2 = addr2;
- smp_tlb_wait = 0;
- ipi_all_but_self(vector);
- while (smp_tlb_wait < ncpu)
- ia32_pause();
- mtx_unlock_spin(&smp_ipi_mtx);
-}
-
-static void
-smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
-{
- int cpu, ncpu, othercpus;
-
- othercpus = mp_ncpus - 1;
- if (CPU_ISFULLSET(&mask)) {
- if (othercpus < 1)
- return;
- } else {
- CPU_CLR(PCPU_GET(cpuid), &mask);
- if (CPU_EMPTY(&mask))
- return;
- }
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- smp_tlb_addr1 = addr1;
- smp_tlb_addr2 = addr2;
- atomic_store_rel_int(&smp_tlb_wait, 0);
- if (CPU_ISFULLSET(&mask)) {
- ncpu = othercpus;
- ipi_all_but_self(vector);
- } else {
- ncpu = 0;
- while ((cpu = CPU_FFS(&mask)) != 0) {
- cpu--;
- CPU_CLR(cpu, &mask);
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
- vector);
- ipi_send_cpu(cpu, vector);
- ncpu++;
- }
- }
- while (smp_tlb_wait < ncpu)
- ia32_pause();
- mtx_unlock_spin(&smp_ipi_mtx);
-}
-
-void
-smp_invltlb(void)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_global++;
-#endif
- }
-}
-
-void
-smp_invlpg(vm_offset_t addr)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLPG, addr, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_page++;
-#endif
- }
-}
-
-void
-smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_range++;
- ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
-#endif
- }
-}
-
-void
-smp_masked_invltlb(cpuset_t mask)
-{
-
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_masked_global++;
-#endif
- }
-}
-
-void
-smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
-{
-
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_masked_page++;
-#endif
- }
-}
-
-void
-smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
-{
-
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_masked_range++;
- ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
-#endif
- }
-}
-
-void
-smp_cache_flush(void)
-{
-
- if (smp_started)
- smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
-}
-
-/*
- * Handlers for TLB related IPIs
- */
-void
-invltlb_handler(void)
-{
- uint64_t cr3;
-#ifdef COUNT_XINVLTLB_HITS
- xhits_gbl[PCPU_GET(cpuid)]++;
-#endif /* COUNT_XINVLTLB_HITS */
-#ifdef COUNT_IPIS
- (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- cr3 = rcr3();
- load_cr3(cr3);
- atomic_add_int(&smp_tlb_wait, 1);
-}
-
-void
-invlpg_handler(void)
-{
-#ifdef COUNT_XINVLTLB_HITS
- xhits_pg[PCPU_GET(cpuid)]++;
-#endif /* COUNT_XINVLTLB_HITS */
-#ifdef COUNT_IPIS
- (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- invlpg(smp_tlb_addr1);
-
- atomic_add_int(&smp_tlb_wait, 1);
-}
-
-void
-invlrng_handler(void)
-{
- vm_offset_t addr;
-#ifdef COUNT_XINVLTLB_HITS
- xhits_rng[PCPU_GET(cpuid)]++;
-#endif /* COUNT_XINVLTLB_HITS */
-#ifdef COUNT_IPIS
- (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- addr = smp_tlb_addr1;
- do {
- invlpg(addr);
- addr += PAGE_SIZE;
- } while (addr < smp_tlb_addr2);
-
- atomic_add_int(&smp_tlb_wait, 1);
-}
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 5b69d58..dff9fc5 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -655,7 +655,7 @@ pmap_set_pg(void)
va = KERNBASE + KERNLOAD;
while (va < endva) {
pdir_pde(PTD, va) |= pgeflag;
- invltlb(); /* Play it safe, invltlb() every time */
+ invltlb_glob(); /* Play it safe, invltlb() every time */
va += NBPDR;
}
} else {
@@ -664,7 +664,7 @@ pmap_set_pg(void)
pte = vtopte(va);
if (*pte)
*pte |= pgeflag;
- invltlb(); /* Play it safe, invltlb() every time */
+ invltlb_glob(); /* Play it safe, invltlb() every time */
va += PAGE_SIZE;
}
}
@@ -973,6 +973,22 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
load_cr4(cr4 | CR4_PGE);
}
}
+
+void
+invltlb_glob(void)
+{
+ uint64_t cr4;
+
+ if (pgeflag == 0) {
+ invltlb();
+ } else {
+ cr4 = rcr4();
+ load_cr4(cr4 & ~CR4_PGE);
+ load_cr4(cr4 | CR4_PGE);
+ }
+}
+
+
#ifdef SMP
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
@@ -996,13 +1012,13 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
- cpuset_t other_cpus;
+ cpuset_t *mask, other_cpus;
u_int cpuid;
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
invlpg(va);
- smp_invlpg(va);
+ mask = &all_cpus;
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
@@ -1010,16 +1026,16 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
if (CPU_ISSET(cpuid, &pmap->pm_active))
invlpg(va);
CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invlpg(other_cpus, va);
+ mask = &other_cpus;
}
+ smp_masked_invlpg(*mask, va);
sched_unpin();
}
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
- cpuset_t other_cpus;
+ cpuset_t *mask, other_cpus;
vm_offset_t addr;
u_int cpuid;
@@ -1027,7 +1043,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
- smp_invlpg_range(sva, eva);
+ mask = &all_cpus;
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
@@ -1036,22 +1052,25 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invlpg_range(other_cpus, sva, eva);
+ mask = &other_cpus;
}
+ smp_masked_invlpg_range(*mask, sva, eva);
sched_unpin();
}
void
pmap_invalidate_all(pmap_t pmap)
{
- cpuset_t other_cpus;
+ cpuset_t *mask, other_cpus;
u_int cpuid;
sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
+ if (pmap == kernel_pmap) {
+ invltlb_glob();
+ mask = &all_cpus;
+ } else if (!CPU_CMP(&pmap->pm_active, &all_cpus)) {
invltlb();
- smp_invltlb();
+ mask = &all_cpus;
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
@@ -1059,9 +1078,9 @@ pmap_invalidate_all(pmap_t pmap)
if (CPU_ISSET(cpuid, &pmap->pm_active))
invltlb();
CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invltlb(other_cpus);
+ mask = &other_cpus;
}
+ smp_masked_invltlb(*mask, pmap);
sched_unpin();
}
@@ -1193,7 +1212,9 @@ PMAP_INLINE void
pmap_invalidate_all(pmap_t pmap)
{
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+ if (pmap == kernel_pmap)
+ invltlb_glob();
+ else if (!CPU_EMPTY(&pmap->pm_active))
invltlb();
}
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index 76822b1..f2d8c58 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -394,6 +394,8 @@ void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
boolean_t force);
+void invltlb_glob(void);
+
#endif /* _KERNEL */
#endif /* !LOCORE */
diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h
index 4618419..5948c25 100644
--- a/sys/i386/include/smp.h
+++ b/sys/i386/include/smp.h
@@ -53,10 +53,6 @@ extern u_int ipi_global;
extern u_int ipi_page;
extern u_int ipi_range;
extern u_int ipi_range_size;
-extern u_int ipi_masked_global;
-extern u_int ipi_masked_page;
-extern u_int ipi_masked_range;
-extern u_int ipi_masked_range_size;
struct cpu_info {
int cpu_present:1;
@@ -105,13 +101,10 @@ void ipi_selected(cpuset_t cpus, u_int ipi);
u_int mp_bootaddress(u_int);
void set_interrupt_apic_ids(void);
void smp_cache_flush(void);
-void smp_invlpg(vm_offset_t addr);
void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
-void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
vm_offset_t endva);
-void smp_invltlb(void);
-void smp_masked_invltlb(cpuset_t mask);
+void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap);
void mem_range_AP_init(void);
void topo_probe(void);
void ipi_send_cpu(int cpu, u_int ipi);
OpenPOWER on IntegriCloud