summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/amd64/apic_vector.S94
-rw-r--r--sys/amd64/amd64/locore.S23
-rw-r--r--sys/amd64/amd64/locore.s23
-rw-r--r--sys/amd64/amd64/mp_machdep.c211
-rw-r--r--sys/amd64/amd64/mptable.c211
-rw-r--r--sys/amd64/amd64/pmap.c372
-rw-r--r--sys/amd64/amd64/support.S36
-rw-r--r--sys/amd64/amd64/support.s36
-rw-r--r--sys/amd64/include/cpufunc.h244
-rw-r--r--sys/amd64/include/mptable.h211
-rw-r--r--sys/amd64/include/pmap.h2
-rw-r--r--sys/amd64/include/smp.h3
-rw-r--r--sys/amd64/isa/intr_machdep.c8
-rw-r--r--sys/amd64/isa/intr_machdep.h20
-rw-r--r--sys/amd64/isa/nmi.c8
-rw-r--r--sys/conf/options.i3861
-rw-r--r--sys/conf/options.pc981
-rw-r--r--sys/i386/i386/apic_vector.s94
-rw-r--r--sys/i386/i386/locore.s23
-rw-r--r--sys/i386/i386/mp_machdep.c211
-rw-r--r--sys/i386/i386/mpapic.c3
-rw-r--r--sys/i386/i386/mptable.c211
-rw-r--r--sys/i386/i386/pmap.c372
-rw-r--r--sys/i386/i386/support.s36
-rw-r--r--sys/i386/include/cpufunc.h244
-rw-r--r--sys/i386/include/mptable.h211
-rw-r--r--sys/i386/include/pmap.h2
-rw-r--r--sys/i386/include/smp.h3
-rw-r--r--sys/i386/include/smptests.h7
-rw-r--r--sys/i386/isa/apic_vector.s94
-rw-r--r--sys/i386/isa/intr_machdep.c8
-rw-r--r--sys/i386/isa/intr_machdep.h20
-rw-r--r--sys/i386/isa/nmi.c8
-rw-r--r--sys/kern/subr_witness.c3
34 files changed, 2134 insertions, 920 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 95c9133..e3a37e1 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -181,30 +181,108 @@ Xspuriousint:
iret
/*
- * Handle TLB shootdowns.
+ * Global address space TLB shootdown.
*/
.text
SUPERALIGN_TEXT
.globl Xinvltlb
Xinvltlb:
pushl %eax
+ pushl %ds
+ movl $KDSEL, %eax /* Kernel data selector */
+ mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
- movl $KPSEL, %eax
+ movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
- ss
- incl _xhits(,%eax,4)
+ incl xhits_gbl(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl %cr3, %eax /* invalidate the TLB */
movl %eax, %cr3
- ss /* stack segment, avoid %ds load */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
+ lock
+ incl smp_tlb_wait
+
+ popl %ds
+ popl %eax
+ iret
+
+/*
+ * Single page TLB shootdown
+ */
+ .text
+ SUPERALIGN_TEXT
+ .globl Xinvlpg
+Xinvlpg:
+ pushl %eax
+ pushl %ds
+ movl $KDSEL, %eax /* Kernel data selector */
+ mov %ax, %ds
+
+#ifdef COUNT_XINVLTLB_HITS
+ pushl %fs
+ movl $KPSEL, %eax /* Private space selector */
+ mov %ax, %fs
+ movl PCPU(CPUID), %eax
+ popl %fs
+ ss
+ incl xhits_pg(,%eax,4)
+#endif /* COUNT_XINVLTLB_HITS */
+
+ movl smp_tlb_addr1, %eax
+ invlpg (%eax) /* invalidate single page */
+
+ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
+
+ lock
+ incl smp_tlb_wait
+
+ popl %ds
+ popl %eax
+ iret
+
+/*
+ * Page range TLB shootdown.
+ */
+ .text
+ SUPERALIGN_TEXT
+ .globl Xinvlrng
+Xinvlrng:
+ pushl %eax
+ pushl %edx
+ pushl %ds
+ movl $KDSEL, %eax /* Kernel data selector */
+ mov %ax, %ds
+
+#ifdef COUNT_XINVLTLB_HITS
+ pushl %fs
+ movl $KPSEL, %eax /* Private space selector */
+ mov %ax, %fs
+ movl PCPU(CPUID), %eax
+ popl %fs
+ incl xhits_rng(,%eax,4)
+#endif /* COUNT_XINVLTLB_HITS */
+
+ movl smp_tlb_addr1, %edx
+ movl smp_tlb_addr2, %eax
+1: invlpg (%edx) /* invalidate single page */
+ addl $PAGE_SIZE, %edx
+ cmpl %edx, %eax
+ jb 1b
+
+ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
+
+ lock
+ incl smp_tlb_wait
+
+ popl %ds
+ popl %edx
popl %eax
iret
@@ -443,12 +521,6 @@ Xrendezvous:
.data
-#ifdef COUNT_XINVLTLB_HITS
- .globl _xhits
-_xhits:
- .space (NCPU * 4), 0
-#endif /* COUNT_XINVLTLB_HITS */
-
.globl apic_pin_trigger
apic_pin_trigger:
.long 0
diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S
index 4fff220..299bc3e 100644
--- a/sys/amd64/amd64/locore.S
+++ b/sys/amd64/amd64/locore.S
@@ -381,12 +381,6 @@ begin:
movl IdlePTD,%esi
movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
- testl $CPUID_PGE, R(cpu_feature)
- jz 1f
- movl %cr4, %eax
- orl $CR4_PGE, %eax
- movl %eax, %cr4
-1:
pushl physfree /* value of first for init386(first) */
call init386 /* wire 386 chip for unix operation */
@@ -809,14 +803,7 @@ no_kernend:
jne map_read_write
#endif
xorl %edx,%edx
-
-#if !defined(SMP)
- testl $CPUID_PGE, R(cpu_feature)
- jz 2f
- orl $PG_G,%edx
-#endif
-
-2: movl $R(etext),%ecx
+ movl $R(etext),%ecx
addl $PAGE_MASK,%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx)
@@ -827,13 +814,7 @@ no_kernend:
andl $~PAGE_MASK, %eax
map_read_write:
movl $PG_RW,%edx
-#if !defined(SMP)
- testl $CPUID_PGE, R(cpu_feature)
- jz 1f
- orl $PG_G,%edx
-#endif
-
-1: movl R(KERNend),%ecx
+ movl R(KERNend),%ecx
subl %eax,%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx)
diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s
index 4fff220..299bc3e 100644
--- a/sys/amd64/amd64/locore.s
+++ b/sys/amd64/amd64/locore.s
@@ -381,12 +381,6 @@ begin:
movl IdlePTD,%esi
movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
- testl $CPUID_PGE, R(cpu_feature)
- jz 1f
- movl %cr4, %eax
- orl $CR4_PGE, %eax
- movl %eax, %cr4
-1:
pushl physfree /* value of first for init386(first) */
call init386 /* wire 386 chip for unix operation */
@@ -809,14 +803,7 @@ no_kernend:
jne map_read_write
#endif
xorl %edx,%edx
-
-#if !defined(SMP)
- testl $CPUID_PGE, R(cpu_feature)
- jz 2f
- orl $PG_G,%edx
-#endif
-
-2: movl $R(etext),%ecx
+ movl $R(etext),%ecx
addl $PAGE_MASK,%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx)
@@ -827,13 +814,7 @@ no_kernend:
andl $~PAGE_MASK, %eax
map_read_write:
movl $PG_RW,%edx
-#if !defined(SMP)
- testl $CPUID_PGE, R(cpu_feature)
- jz 1f
- orl $PG_G,%edx
-#endif
-
-1: movl R(KERNend),%ecx
+ movl R(KERNend),%ecx
subl %eax,%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx)
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 27ee7ae..008dfc5 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU];
+#ifdef APIC_IO
+/* Variables needed for SMP tlb shootdown. */
+u_int smp_tlb_addr1;
+u_int smp_tlb_addr2;
+volatile int smp_tlb_wait;
+static struct mtx smp_tlb_mtx;
+#endif
+
/*
* Local data and functions.
*/
@@ -335,6 +343,9 @@ init_locks(void)
#ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", MTX_SPIN);
#endif /* USE_COMLOCK */
+#ifdef APIC_IO
+ mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN);
+#endif
}
/*
@@ -604,6 +615,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(XINVLPG_OFFSET, Xinvlpg,
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(XINVLRNG_OFFSET, Xinvlrng,
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock,
@@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
+#if defined(APIC_IO)
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+ sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+ sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+ sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
+ 0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+ &ipi_masked_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+ &ipi_masked_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+ &ipi_masked_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+ &ipi_masked_range_size, 0, "");
+#endif
+
/*
* Flush the TLB on all other CPU's
- *
- * XXX: Needs to handshake and wait for completion before proceding.
*/
+static void
+smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2)
+{
+ u_int ncpu;
+ register_t eflags;
+
+ ncpu = mp_ncpus - 1; /* does not shootdown self */
+ if (ncpu < 1)
+ return; /* no other cpus */
+ eflags = read_eflags();
+ if ((eflags & PSL_I) == 0)
+ panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
+ mtx_lock_spin(&smp_tlb_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_wait = 0;
+ ipi_all_but_self(vector);
+ while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
+ /* XXX cpu_pause() */ ;
+ mtx_unlock_spin(&smp_tlb_mtx);
+}
+
+static void
+smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2)
+{
+ u_int m;
+ int i, ncpu, othercpus;
+ register_t eflags;
+
+ othercpus = mp_ncpus - 1;
+ if (mask == (u_int)-1) {
+ ncpu = othercpus;
+ if (ncpu < 1)
+ return;
+ } else {
+ /* XXX there should be a pcpu self mask */
+ mask &= ~(1 << PCPU_GET(cpuid));
+ if (mask == 0)
+ return;
+ /* Count the target cpus */
+ ncpu = 0;
+ m = mask;
+ while ((i = ffs(m)) != 0) {
+ m >>= i;
+ ncpu++;
+ }
+ if (ncpu > othercpus) {
+ /* XXX this should be a panic offence */
+ printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
+ ncpu, othercpus);
+ ncpu = othercpus;
+ }
+ /* XXX should be a panic, implied by mask == 0 above */
+ if (ncpu < 1)
+ return;
+ }
+ eflags = read_eflags();
+ if ((eflags & PSL_I) == 0)
+ panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
+ mtx_lock_spin(&smp_tlb_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_wait = 0;
+ if (mask == (u_int)-1)
+ ipi_all_but_self(vector);
+ else
+ ipi_selected(mask, vector);
+ while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
+ /* XXX cpu_pause() */ ;
+ mtx_unlock_spin(&smp_tlb_mtx);
+}
+#endif
+
void
smp_invltlb(void)
{
#if defined(APIC_IO)
- if (smp_started)
- ipi_all_but_self(IPI_INVLTLB);
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
+#endif
+ }
#endif /* APIC_IO */
}
void
-invlpg(u_int addr)
+smp_invlpg(u_int addr)
{
- __asm __volatile("invlpg (%0)"::"r"(addr):"memory");
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_page++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /* send a message to the other CPUs */
- smp_invltlb();
+void
+smp_invlpg_range(u_int addr1, u_int addr2)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_range++;
+ ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+#endif /* APIC_IO */
}
void
-invltlb(void)
+smp_masked_invltlb(u_int mask)
{
- u_long temp;
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_global++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /*
- * This should be implemented as load_cr3(rcr3()) when load_cr3() is
- * inlined.
- */
- __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
+void
+smp_masked_invlpg(u_int mask, u_int addr)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_page++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /* send a message to the other CPUs */
- smp_invltlb();
+void
+smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_range++;
+ ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+#endif /* APIC_IO */
}
@@ -2280,6 +2451,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ if (bootverbose)
+ apic_dump("ap_init()");
+
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) {
@@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame)
{
mtx_lock_spin(&sched_lock);
- statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
+ statclock_process(curthread->td_kse, TRAPF_PC(&frame),
+ TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock);
}
diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c
index 27ee7ae..008dfc5 100644
--- a/sys/amd64/amd64/mptable.c
+++ b/sys/amd64/amd64/mptable.c
@@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU];
+#ifdef APIC_IO
+/* Variables needed for SMP tlb shootdown. */
+u_int smp_tlb_addr1;
+u_int smp_tlb_addr2;
+volatile int smp_tlb_wait;
+static struct mtx smp_tlb_mtx;
+#endif
+
/*
* Local data and functions.
*/
@@ -335,6 +343,9 @@ init_locks(void)
#ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", MTX_SPIN);
#endif /* USE_COMLOCK */
+#ifdef APIC_IO
+ mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN);
+#endif
}
/*
@@ -604,6 +615,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(XINVLPG_OFFSET, Xinvlpg,
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(XINVLRNG_OFFSET, Xinvlrng,
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock,
@@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
+#if defined(APIC_IO)
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+ sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+ sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+ sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
+ 0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+ &ipi_masked_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+ &ipi_masked_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+ &ipi_masked_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+ &ipi_masked_range_size, 0, "");
+#endif
+
/*
* Flush the TLB on all other CPU's
- *
- * XXX: Needs to handshake and wait for completion before proceding.
*/
+static void
+smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2)
+{
+ u_int ncpu;
+ register_t eflags;
+
+ ncpu = mp_ncpus - 1; /* does not shootdown self */
+ if (ncpu < 1)
+ return; /* no other cpus */
+ eflags = read_eflags();
+ if ((eflags & PSL_I) == 0)
+ panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
+ mtx_lock_spin(&smp_tlb_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_wait = 0;
+ ipi_all_but_self(vector);
+ while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
+ /* XXX cpu_pause() */ ;
+ mtx_unlock_spin(&smp_tlb_mtx);
+}
+
+static void
+smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2)
+{
+ u_int m;
+ int i, ncpu, othercpus;
+ register_t eflags;
+
+ othercpus = mp_ncpus - 1;
+ if (mask == (u_int)-1) {
+ ncpu = othercpus;
+ if (ncpu < 1)
+ return;
+ } else {
+ /* XXX there should be a pcpu self mask */
+ mask &= ~(1 << PCPU_GET(cpuid));
+ if (mask == 0)
+ return;
+ /* Count the target cpus */
+ ncpu = 0;
+ m = mask;
+ while ((i = ffs(m)) != 0) {
+ m >>= i;
+ ncpu++;
+ }
+ if (ncpu > othercpus) {
+ /* XXX this should be a panic offence */
+ printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
+ ncpu, othercpus);
+ ncpu = othercpus;
+ }
+ /* XXX should be a panic, implied by mask == 0 above */
+ if (ncpu < 1)
+ return;
+ }
+ eflags = read_eflags();
+ if ((eflags & PSL_I) == 0)
+ panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
+ mtx_lock_spin(&smp_tlb_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_wait = 0;
+ if (mask == (u_int)-1)
+ ipi_all_but_self(vector);
+ else
+ ipi_selected(mask, vector);
+ while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
+ /* XXX cpu_pause() */ ;
+ mtx_unlock_spin(&smp_tlb_mtx);
+}
+#endif
+
void
smp_invltlb(void)
{
#if defined(APIC_IO)
- if (smp_started)
- ipi_all_but_self(IPI_INVLTLB);
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
+#endif
+ }
#endif /* APIC_IO */
}
void
-invlpg(u_int addr)
+smp_invlpg(u_int addr)
{
- __asm __volatile("invlpg (%0)"::"r"(addr):"memory");
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_page++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /* send a message to the other CPUs */
- smp_invltlb();
+void
+smp_invlpg_range(u_int addr1, u_int addr2)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_range++;
+ ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+#endif /* APIC_IO */
}
void
-invltlb(void)
+smp_masked_invltlb(u_int mask)
{
- u_long temp;
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_global++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /*
- * This should be implemented as load_cr3(rcr3()) when load_cr3() is
- * inlined.
- */
- __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
+void
+smp_masked_invlpg(u_int mask, u_int addr)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_page++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /* send a message to the other CPUs */
- smp_invltlb();
+void
+smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_range++;
+ ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+#endif /* APIC_IO */
}
@@ -2280,6 +2451,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ if (bootverbose)
+ apic_dump("ap_init()");
+
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) {
@@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame)
{
mtx_lock_spin(&sched_lock);
- statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
+ statclock_process(curthread->td_kse, TRAPF_PC(&frame),
+ TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock);
}
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index f12cb0b..ba3ee22 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -85,6 +85,9 @@
#include <sys/user.h>
#include <sys/vmmeter.h>
#include <sys/sysctl.h>
+#if defined(SMP)
+#include <sys/smp.h>
+#endif
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -101,7 +104,6 @@
#include <machine/md_var.h>
#include <machine/specialreg.h>
#if defined(SMP) || defined(APIC_IO)
-#include <machine/smp.h>
#include <machine/apic.h>
#include <machine/segments.h>
#include <machine/tss.h>
@@ -259,10 +261,10 @@ static vm_offset_t
pmap_kmem_choose(vm_offset_t addr)
{
vm_offset_t newaddr = addr;
+
#ifndef DISABLE_PSE
- if (cpu_feature & CPUID_PSE) {
+ if (cpu_feature & CPUID_PSE)
newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
- }
#endif
return newaddr;
}
@@ -367,10 +369,9 @@ pmap_bootstrap(firstaddr, loadaddr)
PTD[i] = 0;
pgeflag = 0;
-#if !defined(SMP) /* XXX - see also mp_machdep.c */
- if (cpu_feature & CPUID_PGE) {
+#if !defined(SMP) || defined(ENABLE_PG_G)
+ if (cpu_feature & CPUID_PGE)
pgeflag = PG_G;
- }
#endif
/*
@@ -383,7 +384,7 @@ pmap_bootstrap(firstaddr, loadaddr)
*/
pdir4mb = 0;
-#if !defined(DISABLE_PSE)
+#ifndef DISABLE_PSE
if (cpu_feature & CPUID_PSE) {
pd_entry_t ptditmp;
/*
@@ -394,57 +395,64 @@ pmap_bootstrap(firstaddr, loadaddr)
ptditmp &= ~(NBPDR - 1);
ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
pdir4mb = ptditmp;
-
-#if !defined(SMP)
- /*
- * Enable the PSE mode.
- */
- load_cr4(rcr4() | CR4_PSE);
-
- /*
- * We can do the mapping here for the single processor
- * case. We simply ignore the old page table page from
- * now on.
- */
- /*
- * For SMP, we still need 4K pages to bootstrap APs,
- * PSE will be enabled as soon as all APs are up.
- */
- PTD[KPTDI] = (pd_entry_t) ptditmp;
- kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
- invltlb();
-#endif
}
#endif
-
+#ifndef SMP
+ /*
+ * Turn on PGE/PSE. SMP does this later on since the
+ * 4K page tables are required for AP boot (for now).
+ * XXX fixme.
+ */
+ pmap_set_opt();
+#endif
#ifdef SMP
if (cpu_apic_address == 0)
panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
-
/* local apic is mapped on last page */
SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
(cpu_apic_address & PG_FRAME));
#endif
-
- invltlb();
+ cpu_invltlb();
}
-#ifdef SMP
/*
- * Set 4mb pdir for mp startup
+ * Enable 4MB page mode for MP startup. Turn on PG_G support.
+ * BSP will run this after all the AP's have started up.
*/
void
pmap_set_opt(void)
{
- if (pseflag && (cpu_feature & CPUID_PSE)) {
+ pt_entry_t *pte;
+ vm_offset_t va;
+
+ if (pgeflag && (cpu_feature & CPUID_PGE))
+ load_cr4(rcr4() | CR4_PGE);
+#ifndef DISABLE_PSE
+ if (pseflag && (cpu_feature & CPUID_PSE))
load_cr4(rcr4() | CR4_PSE);
- if (pdir4mb && PCPU_GET(cpuid) == 0) { /* only on BSP */
+#endif
+ if (PCPU_GET(cpuid) == 0) {
+#ifndef DISABLE_PSE
+ if (pdir4mb)
kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
- cpu_invltlb();
+#endif
+ if (pgeflag) {
+ /* XXX see earlier comments about virtual_avail */
+ for (va = KERNBASE; va < virtual_avail; va += PAGE_SIZE)
+ {
+ pte = vtopte(va);
+ if (*pte)
+ *pte |= pgeflag;
+ }
}
- }
+ /*
+ * for SMP, this will cause all cpus to reload again, which
+ * is actually what we want since they now have CR4_PGE on.
+ */
+ invltlb();
+ } else
+ cpu_invltlb();
}
-#endif
/*
* Initialize the pmap module.
@@ -552,27 +560,37 @@ pmap_track_modified(vm_offset_t va)
return 0;
}
-static PMAP_INLINE void
-invltlb_1pg(vm_offset_t va)
-{
-#ifdef I386_CPU
- invltlb();
-#else
- invlpg(va);
-#endif
-}
-
static __inline void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
#if defined(SMP)
- if (pmap->pm_active & PCPU_GET(cpumask))
- cpu_invlpg((void *)va);
- if (pmap->pm_active & PCPU_GET(other_cpus))
- smp_invltlb();
+ u_int cpumask;
+ u_int other_cpus;
+ struct thread *td;
+
+ td = curthread;
+ critical_enter();
+ /*
+ * We need to disable interrupt preemption but MUST NOT have
+ * interrupts disabled here.
+ * XXX we may need to hold schedlock to get a coherent pm_active
+ */
+ if (td->td_critnest == 1)
+ cpu_critical_exit(td->td_savecrit);
+ if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
+ invlpg(va); /* global */
+ } else {
+ cpumask = PCPU_GET(cpumask);
+ other_cpus = PCPU_GET(other_cpus);
+ if (pmap->pm_active & cpumask)
+ cpu_invlpg(va);
+ if (pmap->pm_active & other_cpus)
+ smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+ }
+ critical_exit();
#else
if (pmap->pm_active)
- invltlb_1pg(va);
+ cpu_invlpg(va);
#endif
}
@@ -580,10 +598,30 @@ static __inline void
pmap_invalidate_all(pmap_t pmap)
{
#if defined(SMP)
- if (pmap->pm_active & PCPU_GET(cpumask))
- cpu_invltlb();
- if (pmap->pm_active & PCPU_GET(other_cpus))
- smp_invltlb();
+ u_int cpumask;
+ u_int other_cpus;
+ struct thread *td;
+
+ td = curthread;
+ critical_enter();
+ /*
+ * We need to disable interrupt preemption but MUST NOT have
+ * interrupts disabled here.
+ * XXX we may need to hold schedlock to get a coherent pm_active
+ */
+ if (td->td_critnest == 1)
+ cpu_critical_exit(td->td_savecrit);
+ if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
+ invltlb(); /* global */
+ } else {
+ cpumask = PCPU_GET(cpumask);
+ other_cpus = PCPU_GET(other_cpus);
+ if (pmap->pm_active & cpumask)
+ cpu_invltlb();
+ if (pmap->pm_active & other_cpus)
+ smp_masked_invltlb(pmap->pm_active & other_cpus);
+ }
+ critical_exit();
#else
if (pmap->pm_active)
invltlb();
@@ -609,12 +647,7 @@ get_ptbase(pmap)
/* otherwise, we are alternate address space */
if (frame != (APTDpde & PG_FRAME)) {
APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
-#if defined(SMP)
- /* The page directory is not shared between CPUs */
- cpu_invltlb();
-#else
invltlb();
-#endif
}
return APTmap;
}
@@ -643,7 +676,7 @@ pmap_pte_quick(pmap, va)
newpf = pde & PG_FRAME;
if (((*PMAP1) & PG_FRAME) != newpf) {
*PMAP1 = newpf | PG_RW | PG_V;
- invltlb_1pg((vm_offset_t) PADDR1);
+ pmap_invalidate_page(pmap, (vm_offset_t) PADDR1);
}
return PADDR1 + (index & (NPTEPG - 1));
}
@@ -689,20 +722,17 @@ pmap_extract(pmap, va)
/*
* add a wired page to the kva
- * note that in order for the mapping to take effect -- you
- * should do a invltlb after doing the pmap_kenter...
*/
PMAP_INLINE void
pmap_kenter(vm_offset_t va, vm_offset_t pa)
{
pt_entry_t *pte;
- pt_entry_t npte, opte;
+ pt_entry_t npte;
npte = pa | PG_RW | PG_V | pgeflag;
pte = vtopte(va);
- opte = *pte;
*pte = npte;
- invltlb_1pg(va);
+ invlpg(va);
}
/*
@@ -715,7 +745,7 @@ pmap_kremove(vm_offset_t va)
pte = vtopte(va);
*pte = 0;
- invltlb_1pg(va);
+ invlpg(va);
}
/*
@@ -733,13 +763,17 @@ pmap_kremove(vm_offset_t va)
vm_offset_t
pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
{
- vm_offset_t sva = *virt;
- vm_offset_t va = sva;
+ vm_offset_t va, sva;
+ pt_entry_t *pte;
+
+ va = sva = *virt;
while (start < end) {
- pmap_kenter(va, start);
+ pte = vtopte(va);
+ *pte = start | PG_RW | PG_V | pgeflag;
va += PAGE_SIZE;
start += PAGE_SIZE;
}
+ invlpg_range(sva, end);
*virt = va;
return (sva);
}
@@ -754,28 +788,21 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
* over. The page *must* be wired.
*/
void
-pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
+pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
{
- vm_offset_t end_va;
+ vm_offset_t va, end_va;
+ pt_entry_t *pte;
+ va = sva;
end_va = va + count * PAGE_SIZE;
-
- while (va < end_va) {
- pt_entry_t *pte;
+ while (va < end_va) {
pte = vtopte(va);
*pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag;
-#ifdef SMP
- cpu_invlpg((void *)va);
-#else
- invltlb_1pg(va);
-#endif
va += PAGE_SIZE;
m++;
}
-#ifdef SMP
- smp_invltlb();
-#endif
+ invlpg_range(sva, end_va);
}
/*
@@ -783,27 +810,20 @@ pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
* kernel -- it is meant only for temporary mappings.
*/
void
-pmap_qremove(vm_offset_t va, int count)
+pmap_qremove(vm_offset_t sva, int count)
{
- vm_offset_t end_va;
+ pt_entry_t *pte;
+ vm_offset_t va, end_va;
- end_va = va + count*PAGE_SIZE;
+ va = sva;
+ end_va = va + count * PAGE_SIZE;
while (va < end_va) {
- pt_entry_t *pte;
-
pte = vtopte(va);
*pte = 0;
-#ifdef SMP
- cpu_invlpg((void *)va);
-#else
- invltlb_1pg(va);
-#endif
va += PAGE_SIZE;
}
-#ifdef SMP
- smp_invltlb();
-#endif
+ invlpg_range(sva, end_va);
}
static vm_page_t
@@ -824,9 +844,6 @@ retry:
void
pmap_new_proc(struct proc *p)
{
-#ifdef I386_CPU
- int updateneeded = 0;
-#endif
int i;
vm_object_t upobj;
vm_offset_t up;
@@ -870,23 +887,14 @@ pmap_new_proc(struct proc *p)
* Enter the page into the kernel address space.
*/
*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
- if (oldpte) {
-#ifdef I386_CPU
- updateneeded = 1;
-#else
+ if (oldpte)
invlpg(up + i * PAGE_SIZE);
-#endif
- }
vm_page_wakeup(m);
vm_page_flag_clear(m, PG_ZERO);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL;
}
-#ifdef I386_CPU
- if (updateneeded)
- invltlb();
-#endif
}
/*
@@ -901,7 +909,7 @@ pmap_dispose_proc(p)
vm_object_t upobj;
vm_offset_t up;
vm_page_t m;
- pt_entry_t *ptek, oldpte;
+ pt_entry_t *ptek;
upobj = p->p_upages_obj;
up = (vm_offset_t)p->p_uarea;
@@ -911,17 +919,11 @@ pmap_dispose_proc(p)
if (m == NULL)
panic("pmap_dispose_proc: upage already missing?");
vm_page_busy(m);
- oldpte = *(ptek + i);
*(ptek + i) = 0;
-#ifndef I386_CPU
invlpg(up + i * PAGE_SIZE);
-#endif
vm_page_unwire(m, 0);
vm_page_free(m);
}
-#ifdef I386_CPU
- invltlb();
-#endif
}
/*
@@ -986,9 +988,6 @@ pmap_swapin_proc(p)
void
pmap_new_thread(struct thread *td)
{
-#ifdef I386_CPU
- int updateneeded = 0;
-#endif
int i;
vm_object_t ksobj;
vm_page_t m;
@@ -1019,13 +1018,8 @@ pmap_new_thread(struct thread *td)
ptek = vtopte(ks - PAGE_SIZE);
oldpte = *ptek;
*ptek = 0;
- if (oldpte) {
-#ifdef I386_CPU
- updateneeded = 1;
-#else
+ if (oldpte)
invlpg(ks - PAGE_SIZE);
-#endif
- }
ptek++;
#else
/* get a kernel virtual address for the kstack for this thread */
@@ -1055,23 +1049,14 @@ pmap_new_thread(struct thread *td)
* Enter the page into the kernel address space.
*/
*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
- if (oldpte) {
-#ifdef I386_CPU
- updateneeded = 1;
-#else
+ if (oldpte)
invlpg(ks + i * PAGE_SIZE);
-#endif
- }
vm_page_wakeup(m);
vm_page_flag_clear(m, PG_ZERO);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL;
}
-#ifdef I386_CPU
- if (updateneeded)
- invltlb();
-#endif
}
/*
@@ -1086,7 +1071,7 @@ pmap_dispose_thread(td)
vm_object_t ksobj;
vm_offset_t ks;
vm_page_t m;
- pt_entry_t *ptek, oldpte;
+ pt_entry_t *ptek;
ksobj = td->td_kstack_obj;
ks = td->td_kstack;
@@ -1096,17 +1081,11 @@ pmap_dispose_thread(td)
if (m == NULL)
panic("pmap_dispose_thread: kstack already missing?");
vm_page_busy(m);
- oldpte = *(ptek + i);
*(ptek + i) = 0;
-#ifndef I386_CPU
invlpg(ks + i * PAGE_SIZE);
-#endif
vm_page_unwire(m, 0);
vm_page_free(m);
}
-#ifdef I386_CPU
- invltlb();
-#endif
}
/*
@@ -2207,13 +2186,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
if ((origpte & PG_RW) == 0) {
*pte |= PG_RW;
-#ifdef SMP
- cpu_invlpg((void *)va);
- if (pmap->pm_active & PCPU_GET(other_cpus))
- smp_invltlb();
-#else
- invltlb_1pg(va);
-#endif
+ pmap_invalidate_page(pmap, va);
}
return;
}
@@ -2281,13 +2254,7 @@ validate:
if ((origpte & ~(PG_M|PG_A)) != newpte) {
*pte = newpte | PG_A;
/*if (origpte)*/ {
-#ifdef SMP
- cpu_invlpg((void *)va);
- if (pmap->pm_active & PCPU_GET(other_cpus))
- smp_invltlb();
-#else
- invltlb_1pg(va);
-#endif
+ pmap_invalidate_page(pmap, va);
}
}
}
@@ -2710,7 +2677,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t pdnxt;
pd_entry_t src_frame, dst_frame;
vm_page_t m;
- pd_entry_t saved_pde;
if (dst_addr != src_addr)
return;
@@ -2720,17 +2686,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
return;
dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME;
- if (dst_frame != (APTDpde & PG_FRAME)) {
- APTDpde = dst_frame | PG_RW | PG_V;
-#if defined(SMP)
- /* The page directory is not shared between CPUs */
- cpu_invltlb();
-#else
- invltlb();
-#endif
- }
- saved_pde = APTDpde & (PG_FRAME | PG_RW | PG_V);
- for(addr = src_addr; addr < end_addr; addr = pdnxt) {
+ for (addr = src_addr; addr < end_addr; addr = pdnxt) {
pt_entry_t *src_pte, *dst_pte;
vm_page_t dstmpte, srcmpte;
pd_entry_t srcptepaddr;
@@ -2771,6 +2727,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
if (pdnxt > end_addr)
pdnxt = end_addr;
+ /*
+ * Have to recheck this before every avtopte() call below
+ * in case we have blocked and something else used APTDpde.
+ */
+ if (dst_frame != (APTDpde & PG_FRAME)) {
+ APTDpde = dst_frame | PG_RW | PG_V;
+ invltlb();
+ }
src_pte = vtopte(addr);
dst_pte = avtopte(addr);
while (addr < pdnxt) {
@@ -2786,16 +2750,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
* block.
*/
dstmpte = pmap_allocpte(dst_pmap, addr);
- if ((APTDpde & PG_FRAME) !=
- (saved_pde & PG_FRAME)) {
- APTDpde = saved_pde;
-printf ("IT HAPPENNED!");
-#if defined(SMP)
- cpu_invltlb();
-#else
- invltlb();
-#endif
- }
if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
/*
* Clear the modified and
@@ -2839,12 +2793,15 @@ void
pmap_zero_page(vm_offset_t phys)
{
+#ifdef SMP
+ /* XXX overkill, we only want to disable migration here */
+ /* XXX or maybe not. down the track we have reentrancy issues */
+ critical_enter();
+#endif
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
-
*CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
- invltlb_1pg((vm_offset_t)CADDR2);
-
+ cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR2);
@@ -2852,6 +2809,9 @@ pmap_zero_page(vm_offset_t phys)
#endif
bzero(CADDR2, PAGE_SIZE);
*CMAP2 = 0;
+#ifdef SMP
+ critical_exit();
+#endif
}
/*
@@ -2864,12 +2824,15 @@ void
pmap_zero_page_area(vm_offset_t phys, int off, int size)
{
+#ifdef SMP
+ /* XXX overkill, we only want to disable migration here */
+ /* XXX or maybe not. down the track we have reentrancy issues */
+ critical_enter();
+#endif
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
-
*CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
- invltlb_1pg((vm_offset_t)CADDR2);
-
+ cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
i686_pagezero(CADDR2);
@@ -2877,6 +2840,9 @@ pmap_zero_page_area(vm_offset_t phys, int off, int size)
#endif
bzero((char *)CADDR2 + off, size);
*CMAP2 = 0;
+#ifdef SMP
+ critical_exit();
+#endif
}
/*
@@ -2889,6 +2855,11 @@ void
pmap_copy_page(vm_offset_t src, vm_offset_t dst)
{
+#ifdef SMP
+ /* XXX overkill, we only want to disable migration here */
+ /* XXX or maybe not. down the track we have reentrancy issues */
+ critical_enter();
+#endif
if (*CMAP1)
panic("pmap_copy_page: CMAP1 busy");
if (*CMAP2)
@@ -2896,17 +2867,14 @@ pmap_copy_page(vm_offset_t src, vm_offset_t dst)
*CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
*CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
-#ifdef I386_CPU
- invltlb();
-#else
- invlpg((u_int)CADDR1);
- invlpg((u_int)CADDR2);
-#endif
-
+ cpu_invlpg((u_int)CADDR1); /* SMP: local only */
+ cpu_invlpg((u_int)CADDR2); /* SMP: local only */
bcopy(CADDR1, CADDR2, PAGE_SIZE);
-
*CMAP1 = 0;
*CMAP2 = 0;
+#ifdef SMP
+ critical_exit();
+#endif
}
@@ -3322,14 +3290,13 @@ pmap_mapdev(pa, size)
panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
pa = pa & PG_FRAME;
- for (tmpva = va; size > 0;) {
+ for (tmpva = va; size > 0; ) {
pte = vtopte(tmpva);
*pte = pa | PG_RW | PG_V | pgeflag;
size -= PAGE_SIZE;
tmpva += PAGE_SIZE;
- pa += PAGE_SIZE;
}
- invltlb();
+ invlpg_range(va, tmpva);
return ((void *)(va + offset));
}
@@ -3339,11 +3306,20 @@ pmap_unmapdev(va, size)
vm_offset_t va;
vm_size_t size;
{
- vm_offset_t base, offset;
+ vm_offset_t base, offset, tmpva;
+ pt_entry_t *pte;
base = va & PG_FRAME;
offset = va & PAGE_MASK;
size = roundup(offset + size, PAGE_SIZE);
+
+ for (tmpva = base; size > 0; ) {
+ pte = vtopte(tmpva);
+ *pte = 0;
+ size -= PAGE_SIZE;
+ tmpva += PAGE_SIZE;
+ }
+ invlpg_range(va, tmpva);
kmem_free(kernel_map, base, size);
}
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index bc58672..0649009 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -1591,42 +1591,6 @@ ENTRY(ssdtosd)
popl %ebx
ret
-/* load_cr0(cr0) */
-ENTRY(load_cr0)
- movl 4(%esp),%eax
- movl %eax,%cr0
- ret
-
-/* rcr0() */
-ENTRY(rcr0)
- movl %cr0,%eax
- ret
-
-/* rcr3() */
-ENTRY(rcr3)
- movl %cr3,%eax
- ret
-
-/* void load_cr3(caddr_t cr3) */
-ENTRY(load_cr3)
-#ifdef SWTCH_OPTIM_STATS
- incl tlb_flush_count
-#endif
- movl 4(%esp),%eax
- movl %eax,%cr3
- ret
-
-/* rcr4() */
-ENTRY(rcr4)
- movl %cr4,%eax
- ret
-
-/* void load_cr4(caddr_t cr4) */
-ENTRY(load_cr4)
- movl 4(%esp),%eax
- movl %eax,%cr4
- ret
-
/* void reset_dbregs() */
ENTRY(reset_dbregs)
movl $0,%eax
diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s
index bc58672..0649009 100644
--- a/sys/amd64/amd64/support.s
+++ b/sys/amd64/amd64/support.s
@@ -1591,42 +1591,6 @@ ENTRY(ssdtosd)
popl %ebx
ret
-/* load_cr0(cr0) */
-ENTRY(load_cr0)
- movl 4(%esp),%eax
- movl %eax,%cr0
- ret
-
-/* rcr0() */
-ENTRY(rcr0)
- movl %cr0,%eax
- ret
-
-/* rcr3() */
-ENTRY(rcr3)
- movl %cr3,%eax
- ret
-
-/* void load_cr3(caddr_t cr3) */
-ENTRY(load_cr3)
-#ifdef SWTCH_OPTIM_STATS
- incl tlb_flush_count
-#endif
- movl 4(%esp),%eax
- movl %eax,%cr3
- ret
-
-/* rcr4() */
-ENTRY(rcr4)
- movl %cr4,%eax
- ret
-
-/* void load_cr4(caddr_t cr4) */
-ENTRY(load_cr4)
- movl 4(%esp),%eax
- movl %eax,%cr4
- ret
-
/* void reset_dbregs() */
ENTRY(reset_dbregs)
movl $0,%eax
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index 969541f..94d5c3a 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -227,62 +227,6 @@ invd(void)
__asm __volatile("invd");
}
-#if defined(SMP) && defined(_KERNEL)
-
-/*
- * When using APIC IPI's, invlpg() is not simply the invlpg instruction
- * (this is a bug) and the inlining cost is prohibitive since the call
- * executes into the IPI transmission system.
- */
-void invlpg __P((u_int addr));
-void invltlb __P((void));
-
-static __inline void
-cpu_invlpg(void *addr)
-{
- __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
-}
-
-static __inline void
-cpu_invltlb(void)
-{
- u_int temp;
- /*
- * This should be implemented as load_cr3(rcr3()) when load_cr3()
- * is inlined.
- */
- __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
- : : "memory");
-#if defined(SWTCH_OPTIM_STATS)
- ++tlb_flush_count;
-#endif
-}
-
-#else /* !(SMP && _KERNEL) */
-
-static __inline void
-invlpg(u_int addr)
-{
- __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
-}
-
-static __inline void
-invltlb(void)
-{
- u_int temp;
- /*
- * This should be implemented as load_cr3(rcr3()) when load_cr3()
- * is inlined.
- */
- __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
- : : "memory");
-#ifdef SWTCH_OPTIM_STATS
- ++tlb_flush_count;
-#endif
-}
-
-#endif /* SMP && _KERNEL */
-
static __inline u_short
inw(u_int port)
{
@@ -348,15 +292,6 @@ outw(u_int port, u_short data)
}
static __inline u_int
-rcr2(void)
-{
- u_int data;
-
- __asm __volatile("movl %%cr2,%0" : "=r" (data));
- return (data);
-}
-
-static __inline u_int
read_eflags(void)
{
u_int ef;
@@ -420,6 +355,162 @@ wrmsr(u_int msr, u_int64_t newval)
__asm __volatile("wrmsr" : : "A" (newval), "c" (msr));
}
+static __inline void
+load_cr0(u_int data)
+{
+
+ __asm __volatile("movl %0,%%cr0" : : "r" (data));
+}
+
+static __inline u_int
+rcr0(void)
+{
+ u_int data;
+
+ __asm __volatile("movl %%cr0,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline u_int
+rcr2(void)
+{
+ u_int data;
+
+ __asm __volatile("movl %%cr2,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline void
+load_cr3(u_int data)
+{
+
+ __asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
+#if defined(SWTCH_OPTIM_STATS)
+ ++tlb_flush_count;
+#endif
+}
+
+static __inline u_int
+rcr3(void)
+{
+ u_int data;
+
+ __asm __volatile("movl %%cr3,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline void
+load_cr4(u_int data)
+{
+ __asm __volatile("movl %0,%%cr4" : : "r" (data));
+}
+
+static __inline u_int
+rcr4(void)
+{
+ u_int data;
+
+ __asm __volatile("movl %%cr4,%0" : "=r" (data));
+ return (data);
+}
+
+/*
+ * Global TLB flush (except for thise for pages marked PG_G)
+ */
+static __inline void
+cpu_invltlb(void)
+{
+
+ load_cr3(rcr3());
+}
+
+/*
+ * TLB flush for an individual page (even if it has PG_G).
+ * Only works on 486+ CPUs (i386 does not have PG_G).
+ */
+static __inline void
+cpu_invlpg(u_int addr)
+{
+
+#ifndef I386_CPU
+ __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
+#else
+ cpu_invltlb();
+#endif
+}
+
+#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */
+/*
+ * Same as above but for a range of pages.
+ */
+static __inline void
+cpu_invlpg_range(u_int startva, u_int endva)
+{
+#ifndef I386_CPU
+ u_int addr;
+
+ for (addr = startva; addr < endva; addr += PAGE_SIZE)
+ __asm __volatile("invlpg %0" : : "m" (*(char *)addr));
+ __asm __volatile("" : : : "memory");
+#else
+ cpu_invltlb();
+#endif
+}
+#endif
+
+#ifdef SMP
+extern void smp_invlpg(u_int addr);
+extern void smp_masked_invlpg(u_int mask, u_int addr);
+#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */
+extern void smp_invlpg_range(u_int startva, u_int endva);
+extern void smp_masked_invlpg_range(u_int mask, u_int startva, u_int endva);
+#endif
+extern void smp_invltlb(void);
+extern void smp_masked_invltlb(u_int mask);
+#endif
+
+/*
+ * Generic page TLB flush. Takes care of SMP.
+ */
+static __inline void
+invlpg(u_int addr)
+{
+
+ cpu_invlpg(addr);
+#ifdef SMP
+ smp_invlpg(addr);
+#endif
+}
+
+#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */
+/*
+ * Generic TLB flush for a range of pages. Takes care of SMP.
+ * Saves many IPIs for SMP mode.
+ */
+static __inline void
+invlpg_range(u_int startva, u_int endva)
+{
+
+ cpu_invlpg_range(startva, endva);
+#ifdef SMP
+ smp_invlpg_range(startva, endva);
+#endif
+}
+#endif
+
+/*
+ * Generic global TLB flush (except for thise for pages marked PG_G)
+ */
+static __inline void
+invltlb(void)
+{
+
+ cpu_invltlb();
+#ifdef SMP
+ smp_invltlb();
+#endif
+}
+
static __inline u_int
rfs(void)
{
@@ -581,6 +672,8 @@ cpu_critical_exit(critical_t eflags)
int breakpoint __P((void));
u_int bsfl __P((u_int mask));
u_int bsrl __P((u_int mask));
+void cpu_invlpg __P((u_int addr));
+void cpu_invlpg_range __P((u_int start, u_int end));
void disable_intr __P((void));
void do_cpuid __P((u_int ax, u_int *p));
void enable_intr __P((void));
@@ -591,15 +684,26 @@ void insl __P((u_int port, void *addr, size_t cnt));
void insw __P((u_int port, void *addr, size_t cnt));
void invd __P((void));
void invlpg __P((u_int addr));
+void invlpg_range __P((u_int start, u_int end));
void invltlb __P((void));
u_short inw __P((u_int port));
+void load_cr0 __P((u_int cr0));
+void load_cr3 __P((u_int cr3));
+void load_cr4 __P((u_int cr4));
+void load_fs __P((u_int sel));
+void load_gs __P((u_int sel));
void outb __P((u_int port, u_char data));
void outl __P((u_int port, u_int data));
void outsb __P((u_int port, void *addr, size_t cnt));
void outsl __P((u_int port, void *addr, size_t cnt));
void outsw __P((u_int port, void *addr, size_t cnt));
void outw __P((u_int port, u_short data));
+u_int rcr0 __P((void));
u_int rcr2 __P((void));
+u_int rcr3 __P((void));
+u_int rcr4 __P((void));
+u_int rfs __P((void));
+u_int rgs __P((void));
u_int64_t rdmsr __P((u_int msr));
u_int64_t rdpmc __P((u_int pmc));
u_int64_t rdtsc __P((void));
@@ -607,22 +711,12 @@ u_int read_eflags __P((void));
void wbinvd __P((void));
void write_eflags __P((u_int ef));
void wrmsr __P((u_int msr, u_int64_t newval));
-u_int rfs __P((void));
-u_int rgs __P((void));
-void load_fs __P((u_int sel));
-void load_gs __P((u_int sel));
critical_t cpu_critical_enter __P((void));
void cpu_critical_exit __P((critical_t eflags));
#endif /* __GNUC__ */
-void load_cr0 __P((u_int cr0));
-void load_cr3 __P((u_int cr3));
-void load_cr4 __P((u_int cr4));
void ltr __P((u_short sel));
-u_int rcr0 __P((void));
-u_int rcr3 __P((void));
-u_int rcr4 __P((void));
void reset_dbregs __P((void));
__END_DECLS
diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h
index 27ee7ae..008dfc5 100644
--- a/sys/amd64/include/mptable.h
+++ b/sys/amd64/include/mptable.h
@@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU];
+#ifdef APIC_IO
+/* Variables needed for SMP tlb shootdown. */
+u_int smp_tlb_addr1;
+u_int smp_tlb_addr2;
+volatile int smp_tlb_wait;
+static struct mtx smp_tlb_mtx;
+#endif
+
/*
* Local data and functions.
*/
@@ -335,6 +343,9 @@ init_locks(void)
#ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", MTX_SPIN);
#endif /* USE_COMLOCK */
+#ifdef APIC_IO
+ mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN);
+#endif
}
/*
@@ -604,6 +615,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(XINVLPG_OFFSET, Xinvlpg,
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(XINVLRNG_OFFSET, Xinvlrng,
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock,
@@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
+#if defined(APIC_IO)
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+ sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+ sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+ sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
+ 0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+ &ipi_masked_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+ &ipi_masked_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+ &ipi_masked_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+ &ipi_masked_range_size, 0, "");
+#endif
+
/*
* Flush the TLB on all other CPU's
- *
- * XXX: Needs to handshake and wait for completion before proceding.
*/
+static void
+smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2)
+{
+ u_int ncpu;
+ register_t eflags;
+
+ ncpu = mp_ncpus - 1; /* does not shootdown self */
+ if (ncpu < 1)
+ return; /* no other cpus */
+ eflags = read_eflags();
+ if ((eflags & PSL_I) == 0)
+ panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
+ mtx_lock_spin(&smp_tlb_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_wait = 0;
+ ipi_all_but_self(vector);
+ while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
+ /* XXX cpu_pause() */ ;
+ mtx_unlock_spin(&smp_tlb_mtx);
+}
+
+static void
+smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2)
+{
+ u_int m;
+ int i, ncpu, othercpus;
+ register_t eflags;
+
+ othercpus = mp_ncpus - 1;
+ if (mask == (u_int)-1) {
+ ncpu = othercpus;
+ if (ncpu < 1)
+ return;
+ } else {
+ /* XXX there should be a pcpu self mask */
+ mask &= ~(1 << PCPU_GET(cpuid));
+ if (mask == 0)
+ return;
+ /* Count the target cpus */
+ ncpu = 0;
+ m = mask;
+ while ((i = ffs(m)) != 0) {
+ m >>= i;
+ ncpu++;
+ }
+ if (ncpu > othercpus) {
+ /* XXX this should be a panic offence */
+ printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
+ ncpu, othercpus);
+ ncpu = othercpus;
+ }
+ /* XXX should be a panic, implied by mask == 0 above */
+ if (ncpu < 1)
+ return;
+ }
+ eflags = read_eflags();
+ if ((eflags & PSL_I) == 0)
+ panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
+ mtx_lock_spin(&smp_tlb_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_wait = 0;
+ if (mask == (u_int)-1)
+ ipi_all_but_self(vector);
+ else
+ ipi_selected(mask, vector);
+ while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
+ /* XXX cpu_pause() */ ;
+ mtx_unlock_spin(&smp_tlb_mtx);
+}
+#endif
+
void
smp_invltlb(void)
{
#if defined(APIC_IO)
- if (smp_started)
- ipi_all_but_self(IPI_INVLTLB);
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
+#endif
+ }
#endif /* APIC_IO */
}
void
-invlpg(u_int addr)
+smp_invlpg(u_int addr)
{
- __asm __volatile("invlpg (%0)"::"r"(addr):"memory");
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_page++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /* send a message to the other CPUs */
- smp_invltlb();
+void
+smp_invlpg_range(u_int addr1, u_int addr2)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_range++;
+ ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+#endif /* APIC_IO */
}
void
-invltlb(void)
+smp_masked_invltlb(u_int mask)
{
- u_long temp;
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_global++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /*
- * This should be implemented as load_cr3(rcr3()) when load_cr3() is
- * inlined.
- */
- __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
+void
+smp_masked_invlpg(u_int mask, u_int addr)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_page++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /* send a message to the other CPUs */
- smp_invltlb();
+void
+smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_range++;
+ ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+#endif /* APIC_IO */
}
@@ -2280,6 +2451,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ if (bootverbose)
+ apic_dump("ap_init()");
+
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) {
@@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame)
{
mtx_lock_spin(&sched_lock);
- statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
+ statclock_process(curthread->td_kse, TRAPF_PC(&frame),
+ TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock);
}
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index cb5a24d..618bb3f 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -267,9 +267,7 @@ void *pmap_mapdev __P((vm_offset_t, vm_size_t));
void pmap_unmapdev __P((vm_offset_t, vm_size_t));
pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)) __pure2;
vm_page_t pmap_use_pt __P((pmap_t, vm_offset_t));
-#ifdef SMP
void pmap_set_opt __P((void));
-#endif
#endif /* _KERNEL */
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index 34228e2..4136c20 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -51,6 +51,8 @@ extern int current_postcode; /** XXX currently in mp_machdep.c */
* Interprocessor interrupts for SMP.
*/
#define IPI_INVLTLB XINVLTLB_OFFSET
+#define IPI_INVLPG XINVLPG_OFFSET
+#define IPI_INVLRNG XINVLRNG_OFFSET
#define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET
#define IPI_AST XCPUAST_OFFSET
#define IPI_STOP XCPUSTOP_OFFSET
@@ -107,7 +109,6 @@ void assign_apic_irq __P((int apic, int intpin, int irq));
void revoke_apic_irq __P((int irq));
void bsp_apic_configure __P((void));
void init_secondary __P((void));
-void smp_invltlb __P((void));
void forward_statclock __P((void));
void forwarded_statclock __P((struct trapframe frame));
void forward_hardclock __P((void));
diff --git a/sys/amd64/isa/intr_machdep.c b/sys/amd64/isa/intr_machdep.c
index cfc162b..92bf581 100644
--- a/sys/amd64/isa/intr_machdep.c
+++ b/sys/amd64/isa/intr_machdep.c
@@ -499,14 +499,6 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags)
}
else {
vector = TPR_SLOW_INTS + intr;
-#ifdef APIC_INTR_REORDER
-#ifdef APIC_INTR_HIGHPRI_CLOCK
- /* XXX: Hack (kludge?) for more accurate clock. */
- if (intr == apic_8254_intr || intr == 8) {
- vector = TPR_FAST_INTS + intr;
- }
-#endif
-#endif
setidt(vector, slowintr[intr],
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
}
diff --git a/sys/amd64/isa/intr_machdep.h b/sys/amd64/isa/intr_machdep.h
index 1726635..789b02b 100644
--- a/sys/amd64/isa/intr_machdep.h
+++ b/sys/amd64/isa/intr_machdep.h
@@ -88,6 +88,7 @@
/* IDT vector base for regular (aka. slow) and fast interrupts */
#define TPR_SLOW_INTS 0x20
#define TPR_FAST_INTS 0x60
+/* XXX note that the AST interrupt is at 0x50 */
/* blocking values for local APIC Task Priority Register */
#define TPR_BLOCK_HWI 0x4f /* hardware INTs */
@@ -104,20 +105,23 @@
#endif /** TEST_TEST1 */
/* TLB shootdowns */
-#define XINVLTLB_OFFSET (ICU_OFFSET + 112)
+#define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* 0x90 */
+#define XINVLPG_OFFSET (ICU_OFFSET + 113) /* 0x91 */
+#define XINVLRNG_OFFSET (ICU_OFFSET + 114) /* 0x92 */
/* inter-cpu clock handling */
-#define XHARDCLOCK_OFFSET (ICU_OFFSET + 113)
-#define XSTATCLOCK_OFFSET (ICU_OFFSET + 114)
+#define XHARDCLOCK_OFFSET (ICU_OFFSET + 120) /* 0x98 */
+#define XSTATCLOCK_OFFSET (ICU_OFFSET + 121) /* 0x99 */
/* inter-CPU rendezvous */
-#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 115)
+#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 122) /* 0x9A */
/* IPI to generate an additional software trap at the target CPU */
-#define XCPUAST_OFFSET (ICU_OFFSET + 48)
+/* XXX in the middle of the interrupt range, overlapping IRQ48 */
+#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* 0x50 */
/* IPI to signal CPUs to stop and wait for another CPU to restart them */
-#define XCPUSTOP_OFFSET (ICU_OFFSET + 128)
+#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* 0xA0 */
/*
* Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff:
@@ -181,7 +185,9 @@ inthand_t
IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31);
inthand_t
- Xinvltlb, /* TLB shootdowns */
+ Xinvltlb, /* TLB shootdowns - global */
+ Xinvlpg, /* TLB shootdowns - 1 page */
+ Xinvlrng, /* TLB shootdowns - page range */
Xhardclock, /* Forward hardclock() */
Xstatclock, /* Forward statclock() */
Xcpuast, /* Additional software trap on other cpu */
diff --git a/sys/amd64/isa/nmi.c b/sys/amd64/isa/nmi.c
index cfc162b..92bf581 100644
--- a/sys/amd64/isa/nmi.c
+++ b/sys/amd64/isa/nmi.c
@@ -499,14 +499,6 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags)
}
else {
vector = TPR_SLOW_INTS + intr;
-#ifdef APIC_INTR_REORDER
-#ifdef APIC_INTR_HIGHPRI_CLOCK
- /* XXX: Hack (kludge?) for more accurate clock. */
- if (intr == apic_8254_intr || intr == 8) {
- vector = TPR_FAST_INTS + intr;
- }
-#endif
-#endif
setidt(vector, slowintr[intr],
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
}
diff --git a/sys/conf/options.i386 b/sys/conf/options.i386
index b505dbd..3848b0a 100644
--- a/sys/conf/options.i386
+++ b/sys/conf/options.i386
@@ -5,6 +5,7 @@ DISABLE_PSE
MATH_EMULATE opt_math_emulate.h
GPL_MATH_EMULATE opt_math_emulate.h
PMAP_SHPGPERPROC opt_pmap.h
+ENABLE_PG_G opt_pmap.h
PPC_PROBE_CHIPSET opt_ppc.h
PPC_DEBUG opt_ppc.h
SHOW_BUSYBUFS
diff --git a/sys/conf/options.pc98 b/sys/conf/options.pc98
index 6cf1db2..9b71078 100644
--- a/sys/conf/options.pc98
+++ b/sys/conf/options.pc98
@@ -5,6 +5,7 @@ DISABLE_PSE
MATH_EMULATE opt_math_emulate.h
GPL_MATH_EMULATE opt_math_emulate.h
PMAP_SHPGPERPROC opt_pmap.h
+ENABLE_PG_G opt_pmap.h
PPC_PROBE_CHIPSET opt_ppc.h
PPC_DEBUG opt_ppc.h
SHOW_BUSYBUFS
diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s
index 95c9133..e3a37e1 100644
--- a/sys/i386/i386/apic_vector.s
+++ b/sys/i386/i386/apic_vector.s
@@ -181,30 +181,108 @@ Xspuriousint:
iret
/*
- * Handle TLB shootdowns.
+ * Global address space TLB shootdown.
*/
.text
SUPERALIGN_TEXT
.globl Xinvltlb
Xinvltlb:
pushl %eax
+ pushl %ds
+ movl $KDSEL, %eax /* Kernel data selector */
+ mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
- movl $KPSEL, %eax
+ movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
- ss
- incl _xhits(,%eax,4)
+ incl xhits_gbl(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl %cr3, %eax /* invalidate the TLB */
movl %eax, %cr3
- ss /* stack segment, avoid %ds load */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
+ lock
+ incl smp_tlb_wait
+
+ popl %ds
+ popl %eax
+ iret
+
+/*
+ * Single page TLB shootdown
+ */
+ .text
+ SUPERALIGN_TEXT
+ .globl Xinvlpg
+Xinvlpg:
+ pushl %eax
+ pushl %ds
+ movl $KDSEL, %eax /* Kernel data selector */
+ mov %ax, %ds
+
+#ifdef COUNT_XINVLTLB_HITS
+ pushl %fs
+ movl $KPSEL, %eax /* Private space selector */
+ mov %ax, %fs
+ movl PCPU(CPUID), %eax
+ popl %fs
+ ss
+ incl xhits_pg(,%eax,4)
+#endif /* COUNT_XINVLTLB_HITS */
+
+ movl smp_tlb_addr1, %eax
+ invlpg (%eax) /* invalidate single page */
+
+ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
+
+ lock
+ incl smp_tlb_wait
+
+ popl %ds
+ popl %eax
+ iret
+
+/*
+ * Page range TLB shootdown.
+ */
+ .text
+ SUPERALIGN_TEXT
+ .globl Xinvlrng
+Xinvlrng:
+ pushl %eax
+ pushl %edx
+ pushl %ds
+ movl $KDSEL, %eax /* Kernel data selector */
+ mov %ax, %ds
+
+#ifdef COUNT_XINVLTLB_HITS
+ pushl %fs
+ movl $KPSEL, %eax /* Private space selector */
+ mov %ax, %fs
+ movl PCPU(CPUID), %eax
+ popl %fs
+ incl xhits_rng(,%eax,4)
+#endif /* COUNT_XINVLTLB_HITS */
+
+ movl smp_tlb_addr1, %edx
+ movl smp_tlb_addr2, %eax
+1: invlpg (%edx) /* invalidate single page */
+ addl $PAGE_SIZE, %edx
+ cmpl %edx, %eax
+ jb 1b
+
+ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
+
+ lock
+ incl smp_tlb_wait
+
+ popl %ds
+ popl %edx
popl %eax
iret
@@ -443,12 +521,6 @@ Xrendezvous:
.data
-#ifdef COUNT_XINVLTLB_HITS
- .globl _xhits
-_xhits:
- .space (NCPU * 4), 0
-#endif /* COUNT_XINVLTLB_HITS */
-
.globl apic_pin_trigger
apic_pin_trigger:
.long 0
diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s
index 4fff220..299bc3e 100644
--- a/sys/i386/i386/locore.s
+++ b/sys/i386/i386/locore.s
@@ -381,12 +381,6 @@ begin:
movl IdlePTD,%esi
movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
- testl $CPUID_PGE, R(cpu_feature)
- jz 1f
- movl %cr4, %eax
- orl $CR4_PGE, %eax
- movl %eax, %cr4
-1:
pushl physfree /* value of first for init386(first) */
call init386 /* wire 386 chip for unix operation */
@@ -809,14 +803,7 @@ no_kernend:
jne map_read_write
#endif
xorl %edx,%edx
-
-#if !defined(SMP)
- testl $CPUID_PGE, R(cpu_feature)
- jz 2f
- orl $PG_G,%edx
-#endif
-
-2: movl $R(etext),%ecx
+ movl $R(etext),%ecx
addl $PAGE_MASK,%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx)
@@ -827,13 +814,7 @@ no_kernend:
andl $~PAGE_MASK, %eax
map_read_write:
movl $PG_RW,%edx
-#if !defined(SMP)
- testl $CPUID_PGE, R(cpu_feature)
- jz 1f
- orl $PG_G,%edx
-#endif
-
-1: movl R(KERNend),%ecx
+ movl R(KERNend),%ecx
subl %eax,%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx)
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 27ee7ae..008dfc5 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU];
+#ifdef APIC_IO
+/* Variables needed for SMP tlb shootdown. */
+u_int smp_tlb_addr1;
+u_int smp_tlb_addr2;
+volatile int smp_tlb_wait;
+static struct mtx smp_tlb_mtx;
+#endif
+
/*
* Local data and functions.
*/
@@ -335,6 +343,9 @@ init_locks(void)
#ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", MTX_SPIN);
#endif /* USE_COMLOCK */
+#ifdef APIC_IO
+ mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN);
+#endif
}
/*
@@ -604,6 +615,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(XINVLPG_OFFSET, Xinvlpg,
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(XINVLRNG_OFFSET, Xinvlrng,
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock,
@@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
+#if defined(APIC_IO)
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+ sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+ sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+ sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
+ 0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+ &ipi_masked_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+ &ipi_masked_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+ &ipi_masked_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+ &ipi_masked_range_size, 0, "");
+#endif
+
/*
* Flush the TLB on all other CPU's
- *
- * XXX: Needs to handshake and wait for completion before proceding.
*/
+static void
+smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2)
+{
+ u_int ncpu;
+ register_t eflags;
+
+ ncpu = mp_ncpus - 1; /* does not shootdown self */
+ if (ncpu < 1)
+ return; /* no other cpus */
+ eflags = read_eflags();
+ if ((eflags & PSL_I) == 0)
+ panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
+ mtx_lock_spin(&smp_tlb_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_wait = 0;
+ ipi_all_but_self(vector);
+ while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
+ /* XXX cpu_pause() */ ;
+ mtx_unlock_spin(&smp_tlb_mtx);
+}
+
+static void
+smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2)
+{
+ u_int m;
+ int i, ncpu, othercpus;
+ register_t eflags;
+
+ othercpus = mp_ncpus - 1;
+ if (mask == (u_int)-1) {
+ ncpu = othercpus;
+ if (ncpu < 1)
+ return;
+ } else {
+ /* XXX there should be a pcpu self mask */
+ mask &= ~(1 << PCPU_GET(cpuid));
+ if (mask == 0)
+ return;
+ /* Count the target cpus */
+ ncpu = 0;
+ m = mask;
+ while ((i = ffs(m)) != 0) {
+ m >>= i;
+ ncpu++;
+ }
+ if (ncpu > othercpus) {
+ /* XXX this should be a panic offence */
+ printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
+ ncpu, othercpus);
+ ncpu = othercpus;
+ }
+ /* XXX should be a panic, implied by mask == 0 above */
+ if (ncpu < 1)
+ return;
+ }
+ eflags = read_eflags();
+ if ((eflags & PSL_I) == 0)
+ panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
+ mtx_lock_spin(&smp_tlb_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_wait = 0;
+ if (mask == (u_int)-1)
+ ipi_all_but_self(vector);
+ else
+ ipi_selected(mask, vector);
+ while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
+ /* XXX cpu_pause() */ ;
+ mtx_unlock_spin(&smp_tlb_mtx);
+}
+#endif
+
void
smp_invltlb(void)
{
#if defined(APIC_IO)
- if (smp_started)
- ipi_all_but_self(IPI_INVLTLB);
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
+#endif
+ }
#endif /* APIC_IO */
}
void
-invlpg(u_int addr)
+smp_invlpg(u_int addr)
{
- __asm __volatile("invlpg (%0)"::"r"(addr):"memory");
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_page++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /* send a message to the other CPUs */
- smp_invltlb();
+void
+smp_invlpg_range(u_int addr1, u_int addr2)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_range++;
+ ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+#endif /* APIC_IO */
}
void
-invltlb(void)
+smp_masked_invltlb(u_int mask)
{
- u_long temp;
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_global++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /*
- * This should be implemented as load_cr3(rcr3()) when load_cr3() is
- * inlined.
- */
- __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
+void
+smp_masked_invlpg(u_int mask, u_int addr)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_page++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /* send a message to the other CPUs */
- smp_invltlb();
+void
+smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_range++;
+ ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+#endif /* APIC_IO */
}
@@ -2280,6 +2451,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ if (bootverbose)
+ apic_dump("ap_init()");
+
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) {
@@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame)
{
mtx_lock_spin(&sched_lock);
- statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
+ statclock_process(curthread->td_kse, TRAPF_PC(&frame),
+ TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock);
}
diff --git a/sys/i386/i386/mpapic.c b/sys/i386/i386/mpapic.c
index dc7861f..d3f4d3d 100644
--- a/sys/i386/i386/mpapic.c
+++ b/sys/i386/i386/mpapic.c
@@ -101,9 +101,6 @@ apic_initialize(void)
#endif /** TEST_TEST1 */
lapic.svr = temp;
-
- if (bootverbose)
- apic_dump("apic_initialize()");
}
diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c
index 27ee7ae..008dfc5 100644
--- a/sys/i386/i386/mptable.c
+++ b/sys/i386/i386/mptable.c
@@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU];
+#ifdef APIC_IO
+/* Variables needed for SMP tlb shootdown. */
+u_int smp_tlb_addr1;
+u_int smp_tlb_addr2;
+volatile int smp_tlb_wait;
+static struct mtx smp_tlb_mtx;
+#endif
+
/*
* Local data and functions.
*/
@@ -335,6 +343,9 @@ init_locks(void)
#ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", MTX_SPIN);
#endif /* USE_COMLOCK */
+#ifdef APIC_IO
+ mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN);
+#endif
}
/*
@@ -604,6 +615,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(XINVLPG_OFFSET, Xinvlpg,
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(XINVLRNG_OFFSET, Xinvlrng,
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock,
@@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
+#if defined(APIC_IO)
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+ sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+ sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+ sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
+ 0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+ &ipi_masked_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+ &ipi_masked_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+ &ipi_masked_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+ &ipi_masked_range_size, 0, "");
+#endif
+
/*
* Flush the TLB on all other CPU's
- *
- * XXX: Needs to handshake and wait for completion before proceding.
*/
+static void
+smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2)
+{
+ u_int ncpu;
+ register_t eflags;
+
+ ncpu = mp_ncpus - 1; /* does not shootdown self */
+ if (ncpu < 1)
+ return; /* no other cpus */
+ eflags = read_eflags();
+ if ((eflags & PSL_I) == 0)
+ panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
+ mtx_lock_spin(&smp_tlb_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_wait = 0;
+ ipi_all_but_self(vector);
+ while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
+ /* XXX cpu_pause() */ ;
+ mtx_unlock_spin(&smp_tlb_mtx);
+}
+
+static void
+smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2)
+{
+ u_int m;
+ int i, ncpu, othercpus;
+ register_t eflags;
+
+ othercpus = mp_ncpus - 1;
+ if (mask == (u_int)-1) {
+ ncpu = othercpus;
+ if (ncpu < 1)
+ return;
+ } else {
+ /* XXX there should be a pcpu self mask */
+ mask &= ~(1 << PCPU_GET(cpuid));
+ if (mask == 0)
+ return;
+ /* Count the target cpus */
+ ncpu = 0;
+ m = mask;
+ while ((i = ffs(m)) != 0) {
+ m >>= i;
+ ncpu++;
+ }
+ if (ncpu > othercpus) {
+ /* XXX this should be a panic offence */
+ printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
+ ncpu, othercpus);
+ ncpu = othercpus;
+ }
+ /* XXX should be a panic, implied by mask == 0 above */
+ if (ncpu < 1)
+ return;
+ }
+ eflags = read_eflags();
+ if ((eflags & PSL_I) == 0)
+ panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
+ mtx_lock_spin(&smp_tlb_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_wait = 0;
+ if (mask == (u_int)-1)
+ ipi_all_but_self(vector);
+ else
+ ipi_selected(mask, vector);
+ while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
+ /* XXX cpu_pause() */ ;
+ mtx_unlock_spin(&smp_tlb_mtx);
+}
+#endif
+
void
smp_invltlb(void)
{
#if defined(APIC_IO)
- if (smp_started)
- ipi_all_but_self(IPI_INVLTLB);
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
+#endif
+ }
#endif /* APIC_IO */
}
void
-invlpg(u_int addr)
+smp_invlpg(u_int addr)
{
- __asm __volatile("invlpg (%0)"::"r"(addr):"memory");
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_page++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /* send a message to the other CPUs */
- smp_invltlb();
+void
+smp_invlpg_range(u_int addr1, u_int addr2)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_range++;
+ ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+#endif /* APIC_IO */
}
void
-invltlb(void)
+smp_masked_invltlb(u_int mask)
{
- u_long temp;
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_global++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /*
- * This should be implemented as load_cr3(rcr3()) when load_cr3() is
- * inlined.
- */
- __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
+void
+smp_masked_invlpg(u_int mask, u_int addr)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_page++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /* send a message to the other CPUs */
- smp_invltlb();
+void
+smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_range++;
+ ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+#endif /* APIC_IO */
}
@@ -2280,6 +2451,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ if (bootverbose)
+ apic_dump("ap_init()");
+
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) {
@@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame)
{
mtx_lock_spin(&sched_lock);
- statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
+ statclock_process(curthread->td_kse, TRAPF_PC(&frame),
+ TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock);
}
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index f12cb0b..ba3ee22 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -85,6 +85,9 @@
#include <sys/user.h>
#include <sys/vmmeter.h>
#include <sys/sysctl.h>
+#if defined(SMP)
+#include <sys/smp.h>
+#endif
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -101,7 +104,6 @@
#include <machine/md_var.h>
#include <machine/specialreg.h>
#if defined(SMP) || defined(APIC_IO)
-#include <machine/smp.h>
#include <machine/apic.h>
#include <machine/segments.h>
#include <machine/tss.h>
@@ -259,10 +261,10 @@ static vm_offset_t
pmap_kmem_choose(vm_offset_t addr)
{
vm_offset_t newaddr = addr;
+
#ifndef DISABLE_PSE
- if (cpu_feature & CPUID_PSE) {
+ if (cpu_feature & CPUID_PSE)
newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
- }
#endif
return newaddr;
}
@@ -367,10 +369,9 @@ pmap_bootstrap(firstaddr, loadaddr)
PTD[i] = 0;
pgeflag = 0;
-#if !defined(SMP) /* XXX - see also mp_machdep.c */
- if (cpu_feature & CPUID_PGE) {
+#if !defined(SMP) || defined(ENABLE_PG_G)
+ if (cpu_feature & CPUID_PGE)
pgeflag = PG_G;
- }
#endif
/*
@@ -383,7 +384,7 @@ pmap_bootstrap(firstaddr, loadaddr)
*/
pdir4mb = 0;
-#if !defined(DISABLE_PSE)
+#ifndef DISABLE_PSE
if (cpu_feature & CPUID_PSE) {
pd_entry_t ptditmp;
/*
@@ -394,57 +395,64 @@ pmap_bootstrap(firstaddr, loadaddr)
ptditmp &= ~(NBPDR - 1);
ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
pdir4mb = ptditmp;
-
-#if !defined(SMP)
- /*
- * Enable the PSE mode.
- */
- load_cr4(rcr4() | CR4_PSE);
-
- /*
- * We can do the mapping here for the single processor
- * case. We simply ignore the old page table page from
- * now on.
- */
- /*
- * For SMP, we still need 4K pages to bootstrap APs,
- * PSE will be enabled as soon as all APs are up.
- */
- PTD[KPTDI] = (pd_entry_t) ptditmp;
- kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
- invltlb();
-#endif
}
#endif
-
+#ifndef SMP
+ /*
+ * Turn on PGE/PSE. SMP does this later on since the
+ * 4K page tables are required for AP boot (for now).
+ * XXX fixme.
+ */
+ pmap_set_opt();
+#endif
#ifdef SMP
if (cpu_apic_address == 0)
panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
-
/* local apic is mapped on last page */
SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
(cpu_apic_address & PG_FRAME));
#endif
-
- invltlb();
+ cpu_invltlb();
}
-#ifdef SMP
/*
- * Set 4mb pdir for mp startup
+ * Enable 4MB page mode for MP startup. Turn on PG_G support.
+ * BSP will run this after all the AP's have started up.
*/
void
pmap_set_opt(void)
{
- if (pseflag && (cpu_feature & CPUID_PSE)) {
+ pt_entry_t *pte;
+ vm_offset_t va;
+
+ if (pgeflag && (cpu_feature & CPUID_PGE))
+ load_cr4(rcr4() | CR4_PGE);
+#ifndef DISABLE_PSE
+ if (pseflag && (cpu_feature & CPUID_PSE))
load_cr4(rcr4() | CR4_PSE);
- if (pdir4mb && PCPU_GET(cpuid) == 0) { /* only on BSP */
+#endif
+ if (PCPU_GET(cpuid) == 0) {
+#ifndef DISABLE_PSE
+ if (pdir4mb)
kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
- cpu_invltlb();
+#endif
+ if (pgeflag) {
+ /* XXX see earlier comments about virtual_avail */
+ for (va = KERNBASE; va < virtual_avail; va += PAGE_SIZE)
+ {
+ pte = vtopte(va);
+ if (*pte)
+ *pte |= pgeflag;
+ }
}
- }
+ /*
+ * for SMP, this will cause all cpus to reload again, which
+ * is actually what we want since they now have CR4_PGE on.
+ */
+ invltlb();
+ } else
+ cpu_invltlb();
}
-#endif
/*
* Initialize the pmap module.
@@ -552,27 +560,37 @@ pmap_track_modified(vm_offset_t va)
return 0;
}
-static PMAP_INLINE void
-invltlb_1pg(vm_offset_t va)
-{
-#ifdef I386_CPU
- invltlb();
-#else
- invlpg(va);
-#endif
-}
-
static __inline void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
#if defined(SMP)
- if (pmap->pm_active & PCPU_GET(cpumask))
- cpu_invlpg((void *)va);
- if (pmap->pm_active & PCPU_GET(other_cpus))
- smp_invltlb();
+ u_int cpumask;
+ u_int other_cpus;
+ struct thread *td;
+
+ td = curthread;
+ critical_enter();
+ /*
+ * We need to disable interrupt preemption but MUST NOT have
+ * interrupts disabled here.
+ * XXX we may need to hold schedlock to get a coherent pm_active
+ */
+ if (td->td_critnest == 1)
+ cpu_critical_exit(td->td_savecrit);
+ if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
+ invlpg(va); /* global */
+ } else {
+ cpumask = PCPU_GET(cpumask);
+ other_cpus = PCPU_GET(other_cpus);
+ if (pmap->pm_active & cpumask)
+ cpu_invlpg(va);
+ if (pmap->pm_active & other_cpus)
+ smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+ }
+ critical_exit();
#else
if (pmap->pm_active)
- invltlb_1pg(va);
+ cpu_invlpg(va);
#endif
}
@@ -580,10 +598,30 @@ static __inline void
pmap_invalidate_all(pmap_t pmap)
{
#if defined(SMP)
- if (pmap->pm_active & PCPU_GET(cpumask))
- cpu_invltlb();
- if (pmap->pm_active & PCPU_GET(other_cpus))
- smp_invltlb();
+ u_int cpumask;
+ u_int other_cpus;
+ struct thread *td;
+
+ td = curthread;
+ critical_enter();
+ /*
+ * We need to disable interrupt preemption but MUST NOT have
+ * interrupts disabled here.
+ * XXX we may need to hold schedlock to get a coherent pm_active
+ */
+ if (td->td_critnest == 1)
+ cpu_critical_exit(td->td_savecrit);
+ if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
+ invltlb(); /* global */
+ } else {
+ cpumask = PCPU_GET(cpumask);
+ other_cpus = PCPU_GET(other_cpus);
+ if (pmap->pm_active & cpumask)
+ cpu_invltlb();
+ if (pmap->pm_active & other_cpus)
+ smp_masked_invltlb(pmap->pm_active & other_cpus);
+ }
+ critical_exit();
#else
if (pmap->pm_active)
invltlb();
@@ -609,12 +647,7 @@ get_ptbase(pmap)
/* otherwise, we are alternate address space */
if (frame != (APTDpde & PG_FRAME)) {
APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
-#if defined(SMP)
- /* The page directory is not shared between CPUs */
- cpu_invltlb();
-#else
invltlb();
-#endif
}
return APTmap;
}
@@ -643,7 +676,7 @@ pmap_pte_quick(pmap, va)
newpf = pde & PG_FRAME;
if (((*PMAP1) & PG_FRAME) != newpf) {
*PMAP1 = newpf | PG_RW | PG_V;
- invltlb_1pg((vm_offset_t) PADDR1);
+ pmap_invalidate_page(pmap, (vm_offset_t) PADDR1);
}
return PADDR1 + (index & (NPTEPG - 1));
}
@@ -689,20 +722,17 @@ pmap_extract(pmap, va)
/*
* add a wired page to the kva
- * note that in order for the mapping to take effect -- you
- * should do a invltlb after doing the pmap_kenter...
*/
PMAP_INLINE void
pmap_kenter(vm_offset_t va, vm_offset_t pa)
{
pt_entry_t *pte;
- pt_entry_t npte, opte;
+ pt_entry_t npte;
npte = pa | PG_RW | PG_V | pgeflag;
pte = vtopte(va);
- opte = *pte;
*pte = npte;
- invltlb_1pg(va);
+ invlpg(va);
}
/*
@@ -715,7 +745,7 @@ pmap_kremove(vm_offset_t va)
pte = vtopte(va);
*pte = 0;
- invltlb_1pg(va);
+ invlpg(va);
}
/*
@@ -733,13 +763,17 @@ pmap_kremove(vm_offset_t va)
vm_offset_t
pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
{
- vm_offset_t sva = *virt;
- vm_offset_t va = sva;
+ vm_offset_t va, sva;
+ pt_entry_t *pte;
+
+ va = sva = *virt;
while (start < end) {
- pmap_kenter(va, start);
+ pte = vtopte(va);
+ *pte = start | PG_RW | PG_V | pgeflag;
va += PAGE_SIZE;
start += PAGE_SIZE;
}
+ invlpg_range(sva, end);
*virt = va;
return (sva);
}
@@ -754,28 +788,21 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
* over. The page *must* be wired.
*/
void
-pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
+pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
{
- vm_offset_t end_va;
+ vm_offset_t va, end_va;
+ pt_entry_t *pte;
+ va = sva;
end_va = va + count * PAGE_SIZE;
-
- while (va < end_va) {
- pt_entry_t *pte;
+ while (va < end_va) {
pte = vtopte(va);
*pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag;
-#ifdef SMP
- cpu_invlpg((void *)va);
-#else
- invltlb_1pg(va);
-#endif
va += PAGE_SIZE;
m++;
}
-#ifdef SMP
- smp_invltlb();
-#endif
+ invlpg_range(sva, end_va);
}
/*
@@ -783,27 +810,20 @@ pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
* kernel -- it is meant only for temporary mappings.
*/
void
-pmap_qremove(vm_offset_t va, int count)
+pmap_qremove(vm_offset_t sva, int count)
{
- vm_offset_t end_va;
+ pt_entry_t *pte;
+ vm_offset_t va, end_va;
- end_va = va + count*PAGE_SIZE;
+ va = sva;
+ end_va = va + count * PAGE_SIZE;
while (va < end_va) {
- pt_entry_t *pte;
-
pte = vtopte(va);
*pte = 0;
-#ifdef SMP
- cpu_invlpg((void *)va);
-#else
- invltlb_1pg(va);
-#endif
va += PAGE_SIZE;
}
-#ifdef SMP
- smp_invltlb();
-#endif
+ invlpg_range(sva, end_va);
}
static vm_page_t
@@ -824,9 +844,6 @@ retry:
void
pmap_new_proc(struct proc *p)
{
-#ifdef I386_CPU
- int updateneeded = 0;
-#endif
int i;
vm_object_t upobj;
vm_offset_t up;
@@ -870,23 +887,14 @@ pmap_new_proc(struct proc *p)
* Enter the page into the kernel address space.
*/
*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
- if (oldpte) {
-#ifdef I386_CPU
- updateneeded = 1;
-#else
+ if (oldpte)
invlpg(up + i * PAGE_SIZE);
-#endif
- }
vm_page_wakeup(m);
vm_page_flag_clear(m, PG_ZERO);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL;
}
-#ifdef I386_CPU
- if (updateneeded)
- invltlb();
-#endif
}
/*
@@ -901,7 +909,7 @@ pmap_dispose_proc(p)
vm_object_t upobj;
vm_offset_t up;
vm_page_t m;
- pt_entry_t *ptek, oldpte;
+ pt_entry_t *ptek;
upobj = p->p_upages_obj;
up = (vm_offset_t)p->p_uarea;
@@ -911,17 +919,11 @@ pmap_dispose_proc(p)
if (m == NULL)
panic("pmap_dispose_proc: upage already missing?");
vm_page_busy(m);
- oldpte = *(ptek + i);
*(ptek + i) = 0;
-#ifndef I386_CPU
invlpg(up + i * PAGE_SIZE);
-#endif
vm_page_unwire(m, 0);
vm_page_free(m);
}
-#ifdef I386_CPU
- invltlb();
-#endif
}
/*
@@ -986,9 +988,6 @@ pmap_swapin_proc(p)
void
pmap_new_thread(struct thread *td)
{
-#ifdef I386_CPU
- int updateneeded = 0;
-#endif
int i;
vm_object_t ksobj;
vm_page_t m;
@@ -1019,13 +1018,8 @@ pmap_new_thread(struct thread *td)
ptek = vtopte(ks - PAGE_SIZE);
oldpte = *ptek;
*ptek = 0;
- if (oldpte) {
-#ifdef I386_CPU
- updateneeded = 1;
-#else
+ if (oldpte)
invlpg(ks - PAGE_SIZE);
-#endif
- }
ptek++;
#else
/* get a kernel virtual address for the kstack for this thread */
@@ -1055,23 +1049,14 @@ pmap_new_thread(struct thread *td)
* Enter the page into the kernel address space.
*/
*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
- if (oldpte) {
-#ifdef I386_CPU
- updateneeded = 1;
-#else
+ if (oldpte)
invlpg(ks + i * PAGE_SIZE);
-#endif
- }
vm_page_wakeup(m);
vm_page_flag_clear(m, PG_ZERO);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL;
}
-#ifdef I386_CPU
- if (updateneeded)
- invltlb();
-#endif
}
/*
@@ -1086,7 +1071,7 @@ pmap_dispose_thread(td)
vm_object_t ksobj;
vm_offset_t ks;
vm_page_t m;
- pt_entry_t *ptek, oldpte;
+ pt_entry_t *ptek;
ksobj = td->td_kstack_obj;
ks = td->td_kstack;
@@ -1096,17 +1081,11 @@ pmap_dispose_thread(td)
if (m == NULL)
panic("pmap_dispose_thread: kstack already missing?");
vm_page_busy(m);
- oldpte = *(ptek + i);
*(ptek + i) = 0;
-#ifndef I386_CPU
invlpg(ks + i * PAGE_SIZE);
-#endif
vm_page_unwire(m, 0);
vm_page_free(m);
}
-#ifdef I386_CPU
- invltlb();
-#endif
}
/*
@@ -2207,13 +2186,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
if ((origpte & PG_RW) == 0) {
*pte |= PG_RW;
-#ifdef SMP
- cpu_invlpg((void *)va);
- if (pmap->pm_active & PCPU_GET(other_cpus))
- smp_invltlb();
-#else
- invltlb_1pg(va);
-#endif
+ pmap_invalidate_page(pmap, va);
}
return;
}
@@ -2281,13 +2254,7 @@ validate:
if ((origpte & ~(PG_M|PG_A)) != newpte) {
*pte = newpte | PG_A;
/*if (origpte)*/ {
-#ifdef SMP
- cpu_invlpg((void *)va);
- if (pmap->pm_active & PCPU_GET(other_cpus))
- smp_invltlb();
-#else
- invltlb_1pg(va);
-#endif
+ pmap_invalidate_page(pmap, va);
}
}
}
@@ -2710,7 +2677,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t pdnxt;
pd_entry_t src_frame, dst_frame;
vm_page_t m;
- pd_entry_t saved_pde;
if (dst_addr != src_addr)
return;
@@ -2720,17 +2686,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
return;
dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME;
- if (dst_frame != (APTDpde & PG_FRAME)) {
- APTDpde = dst_frame | PG_RW | PG_V;
-#if defined(SMP)
- /* The page directory is not shared between CPUs */
- cpu_invltlb();
-#else
- invltlb();
-#endif
- }
- saved_pde = APTDpde & (PG_FRAME | PG_RW | PG_V);
- for(addr = src_addr; addr < end_addr; addr = pdnxt) {
+ for (addr = src_addr; addr < end_addr; addr = pdnxt) {
pt_entry_t *src_pte, *dst_pte;
vm_page_t dstmpte, srcmpte;
pd_entry_t srcptepaddr;
@@ -2771,6 +2727,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
if (pdnxt > end_addr)
pdnxt = end_addr;
+ /*
+ * Have to recheck this before every avtopte() call below
+ * in case we have blocked and something else used APTDpde.
+ */
+ if (dst_frame != (APTDpde & PG_FRAME)) {
+ APTDpde = dst_frame | PG_RW | PG_V;
+ invltlb();
+ }
src_pte = vtopte(addr);
dst_pte = avtopte(addr);
while (addr < pdnxt) {
@@ -2786,16 +2750,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
* block.
*/
dstmpte = pmap_allocpte(dst_pmap, addr);
- if ((APTDpde & PG_FRAME) !=
- (saved_pde & PG_FRAME)) {
- APTDpde = saved_pde;
-printf ("IT HAPPENNED!");
-#if defined(SMP)
- cpu_invltlb();
-#else
- invltlb();
-#endif
- }
if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
/*
* Clear the modified and
@@ -2839,12 +2793,15 @@ void
pmap_zero_page(vm_offset_t phys)
{
+#ifdef SMP
+ /* XXX overkill, we only want to disable migration here */
+ /* XXX or maybe not. down the track we have reentrancy issues */
+ critical_enter();
+#endif
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
-
*CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
- invltlb_1pg((vm_offset_t)CADDR2);
-
+ cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR2);
@@ -2852,6 +2809,9 @@ pmap_zero_page(vm_offset_t phys)
#endif
bzero(CADDR2, PAGE_SIZE);
*CMAP2 = 0;
+#ifdef SMP
+ critical_exit();
+#endif
}
/*
@@ -2864,12 +2824,15 @@ void
pmap_zero_page_area(vm_offset_t phys, int off, int size)
{
+#ifdef SMP
+ /* XXX overkill, we only want to disable migration here */
+ /* XXX or maybe not. down the track we have reentrancy issues */
+ critical_enter();
+#endif
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
-
*CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
- invltlb_1pg((vm_offset_t)CADDR2);
-
+ cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
i686_pagezero(CADDR2);
@@ -2877,6 +2840,9 @@ pmap_zero_page_area(vm_offset_t phys, int off, int size)
#endif
bzero((char *)CADDR2 + off, size);
*CMAP2 = 0;
+#ifdef SMP
+ critical_exit();
+#endif
}
/*
@@ -2889,6 +2855,11 @@ void
pmap_copy_page(vm_offset_t src, vm_offset_t dst)
{
+#ifdef SMP
+ /* XXX overkill, we only want to disable migration here */
+ /* XXX or maybe not. down the track we have reentrancy issues */
+ critical_enter();
+#endif
if (*CMAP1)
panic("pmap_copy_page: CMAP1 busy");
if (*CMAP2)
@@ -2896,17 +2867,14 @@ pmap_copy_page(vm_offset_t src, vm_offset_t dst)
*CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
*CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
-#ifdef I386_CPU
- invltlb();
-#else
- invlpg((u_int)CADDR1);
- invlpg((u_int)CADDR2);
-#endif
-
+ cpu_invlpg((u_int)CADDR1); /* SMP: local only */
+ cpu_invlpg((u_int)CADDR2); /* SMP: local only */
bcopy(CADDR1, CADDR2, PAGE_SIZE);
-
*CMAP1 = 0;
*CMAP2 = 0;
+#ifdef SMP
+ critical_exit();
+#endif
}
@@ -3322,14 +3290,13 @@ pmap_mapdev(pa, size)
panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
pa = pa & PG_FRAME;
- for (tmpva = va; size > 0;) {
+ for (tmpva = va; size > 0; ) {
pte = vtopte(tmpva);
*pte = pa | PG_RW | PG_V | pgeflag;
size -= PAGE_SIZE;
tmpva += PAGE_SIZE;
- pa += PAGE_SIZE;
}
- invltlb();
+ invlpg_range(va, tmpva);
return ((void *)(va + offset));
}
@@ -3339,11 +3306,20 @@ pmap_unmapdev(va, size)
vm_offset_t va;
vm_size_t size;
{
- vm_offset_t base, offset;
+ vm_offset_t base, offset, tmpva;
+ pt_entry_t *pte;
base = va & PG_FRAME;
offset = va & PAGE_MASK;
size = roundup(offset + size, PAGE_SIZE);
+
+ for (tmpva = base; size > 0; ) {
+ pte = vtopte(tmpva);
+ *pte = 0;
+ size -= PAGE_SIZE;
+ tmpva += PAGE_SIZE;
+ }
+ invlpg_range(va, tmpva);
kmem_free(kernel_map, base, size);
}
diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s
index bc58672..0649009 100644
--- a/sys/i386/i386/support.s
+++ b/sys/i386/i386/support.s
@@ -1591,42 +1591,6 @@ ENTRY(ssdtosd)
popl %ebx
ret
-/* load_cr0(cr0) */
-ENTRY(load_cr0)
- movl 4(%esp),%eax
- movl %eax,%cr0
- ret
-
-/* rcr0() */
-ENTRY(rcr0)
- movl %cr0,%eax
- ret
-
-/* rcr3() */
-ENTRY(rcr3)
- movl %cr3,%eax
- ret
-
-/* void load_cr3(caddr_t cr3) */
-ENTRY(load_cr3)
-#ifdef SWTCH_OPTIM_STATS
- incl tlb_flush_count
-#endif
- movl 4(%esp),%eax
- movl %eax,%cr3
- ret
-
-/* rcr4() */
-ENTRY(rcr4)
- movl %cr4,%eax
- ret
-
-/* void load_cr4(caddr_t cr4) */
-ENTRY(load_cr4)
- movl 4(%esp),%eax
- movl %eax,%cr4
- ret
-
/* void reset_dbregs() */
ENTRY(reset_dbregs)
movl $0,%eax
diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h
index 969541f..94d5c3a 100644
--- a/sys/i386/include/cpufunc.h
+++ b/sys/i386/include/cpufunc.h
@@ -227,62 +227,6 @@ invd(void)
__asm __volatile("invd");
}
-#if defined(SMP) && defined(_KERNEL)
-
-/*
- * When using APIC IPI's, invlpg() is not simply the invlpg instruction
- * (this is a bug) and the inlining cost is prohibitive since the call
- * executes into the IPI transmission system.
- */
-void invlpg __P((u_int addr));
-void invltlb __P((void));
-
-static __inline void
-cpu_invlpg(void *addr)
-{
- __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
-}
-
-static __inline void
-cpu_invltlb(void)
-{
- u_int temp;
- /*
- * This should be implemented as load_cr3(rcr3()) when load_cr3()
- * is inlined.
- */
- __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
- : : "memory");
-#if defined(SWTCH_OPTIM_STATS)
- ++tlb_flush_count;
-#endif
-}
-
-#else /* !(SMP && _KERNEL) */
-
-static __inline void
-invlpg(u_int addr)
-{
- __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
-}
-
-static __inline void
-invltlb(void)
-{
- u_int temp;
- /*
- * This should be implemented as load_cr3(rcr3()) when load_cr3()
- * is inlined.
- */
- __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
- : : "memory");
-#ifdef SWTCH_OPTIM_STATS
- ++tlb_flush_count;
-#endif
-}
-
-#endif /* SMP && _KERNEL */
-
static __inline u_short
inw(u_int port)
{
@@ -348,15 +292,6 @@ outw(u_int port, u_short data)
}
static __inline u_int
-rcr2(void)
-{
- u_int data;
-
- __asm __volatile("movl %%cr2,%0" : "=r" (data));
- return (data);
-}
-
-static __inline u_int
read_eflags(void)
{
u_int ef;
@@ -420,6 +355,162 @@ wrmsr(u_int msr, u_int64_t newval)
__asm __volatile("wrmsr" : : "A" (newval), "c" (msr));
}
+static __inline void
+load_cr0(u_int data)
+{
+
+ __asm __volatile("movl %0,%%cr0" : : "r" (data));
+}
+
+static __inline u_int
+rcr0(void)
+{
+ u_int data;
+
+ __asm __volatile("movl %%cr0,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline u_int
+rcr2(void)
+{
+ u_int data;
+
+ __asm __volatile("movl %%cr2,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline void
+load_cr3(u_int data)
+{
+
+ __asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
+#if defined(SWTCH_OPTIM_STATS)
+ ++tlb_flush_count;
+#endif
+}
+
+static __inline u_int
+rcr3(void)
+{
+ u_int data;
+
+ __asm __volatile("movl %%cr3,%0" : "=r" (data));
+ return (data);
+}
+
+static __inline void
+load_cr4(u_int data)
+{
+ __asm __volatile("movl %0,%%cr4" : : "r" (data));
+}
+
+static __inline u_int
+rcr4(void)
+{
+ u_int data;
+
+ __asm __volatile("movl %%cr4,%0" : "=r" (data));
+ return (data);
+}
+
+/*
+ * Global TLB flush (except for thise for pages marked PG_G)
+ */
+static __inline void
+cpu_invltlb(void)
+{
+
+ load_cr3(rcr3());
+}
+
+/*
+ * TLB flush for an individual page (even if it has PG_G).
+ * Only works on 486+ CPUs (i386 does not have PG_G).
+ */
+static __inline void
+cpu_invlpg(u_int addr)
+{
+
+#ifndef I386_CPU
+ __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
+#else
+ cpu_invltlb();
+#endif
+}
+
+#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */
+/*
+ * Same as above but for a range of pages.
+ */
+static __inline void
+cpu_invlpg_range(u_int startva, u_int endva)
+{
+#ifndef I386_CPU
+ u_int addr;
+
+ for (addr = startva; addr < endva; addr += PAGE_SIZE)
+ __asm __volatile("invlpg %0" : : "m" (*(char *)addr));
+ __asm __volatile("" : : : "memory");
+#else
+ cpu_invltlb();
+#endif
+}
+#endif
+
+#ifdef SMP
+extern void smp_invlpg(u_int addr);
+extern void smp_masked_invlpg(u_int mask, u_int addr);
+#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */
+extern void smp_invlpg_range(u_int startva, u_int endva);
+extern void smp_masked_invlpg_range(u_int mask, u_int startva, u_int endva);
+#endif
+extern void smp_invltlb(void);
+extern void smp_masked_invltlb(u_int mask);
+#endif
+
+/*
+ * Generic page TLB flush. Takes care of SMP.
+ */
+static __inline void
+invlpg(u_int addr)
+{
+
+ cpu_invlpg(addr);
+#ifdef SMP
+ smp_invlpg(addr);
+#endif
+}
+
+#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */
+/*
+ * Generic TLB flush for a range of pages. Takes care of SMP.
+ * Saves many IPIs for SMP mode.
+ */
+static __inline void
+invlpg_range(u_int startva, u_int endva)
+{
+
+ cpu_invlpg_range(startva, endva);
+#ifdef SMP
+ smp_invlpg_range(startva, endva);
+#endif
+}
+#endif
+
+/*
+ * Generic global TLB flush (except for thise for pages marked PG_G)
+ */
+static __inline void
+invltlb(void)
+{
+
+ cpu_invltlb();
+#ifdef SMP
+ smp_invltlb();
+#endif
+}
+
static __inline u_int
rfs(void)
{
@@ -581,6 +672,8 @@ cpu_critical_exit(critical_t eflags)
int breakpoint __P((void));
u_int bsfl __P((u_int mask));
u_int bsrl __P((u_int mask));
+void cpu_invlpg __P((u_int addr));
+void cpu_invlpg_range __P((u_int start, u_int end));
void disable_intr __P((void));
void do_cpuid __P((u_int ax, u_int *p));
void enable_intr __P((void));
@@ -591,15 +684,26 @@ void insl __P((u_int port, void *addr, size_t cnt));
void insw __P((u_int port, void *addr, size_t cnt));
void invd __P((void));
void invlpg __P((u_int addr));
+void invlpg_range __P((u_int start, u_int end));
void invltlb __P((void));
u_short inw __P((u_int port));
+void load_cr0 __P((u_int cr0));
+void load_cr3 __P((u_int cr3));
+void load_cr4 __P((u_int cr4));
+void load_fs __P((u_int sel));
+void load_gs __P((u_int sel));
void outb __P((u_int port, u_char data));
void outl __P((u_int port, u_int data));
void outsb __P((u_int port, void *addr, size_t cnt));
void outsl __P((u_int port, void *addr, size_t cnt));
void outsw __P((u_int port, void *addr, size_t cnt));
void outw __P((u_int port, u_short data));
+u_int rcr0 __P((void));
u_int rcr2 __P((void));
+u_int rcr3 __P((void));
+u_int rcr4 __P((void));
+u_int rfs __P((void));
+u_int rgs __P((void));
u_int64_t rdmsr __P((u_int msr));
u_int64_t rdpmc __P((u_int pmc));
u_int64_t rdtsc __P((void));
@@ -607,22 +711,12 @@ u_int read_eflags __P((void));
void wbinvd __P((void));
void write_eflags __P((u_int ef));
void wrmsr __P((u_int msr, u_int64_t newval));
-u_int rfs __P((void));
-u_int rgs __P((void));
-void load_fs __P((u_int sel));
-void load_gs __P((u_int sel));
critical_t cpu_critical_enter __P((void));
void cpu_critical_exit __P((critical_t eflags));
#endif /* __GNUC__ */
-void load_cr0 __P((u_int cr0));
-void load_cr3 __P((u_int cr3));
-void load_cr4 __P((u_int cr4));
void ltr __P((u_short sel));
-u_int rcr0 __P((void));
-u_int rcr3 __P((void));
-u_int rcr4 __P((void));
void reset_dbregs __P((void));
__END_DECLS
diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h
index 27ee7ae..008dfc5 100644
--- a/sys/i386/include/mptable.h
+++ b/sys/i386/include/mptable.h
@@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU];
+#ifdef APIC_IO
+/* Variables needed for SMP tlb shootdown. */
+u_int smp_tlb_addr1;
+u_int smp_tlb_addr2;
+volatile int smp_tlb_wait;
+static struct mtx smp_tlb_mtx;
+#endif
+
/*
* Local data and functions.
*/
@@ -335,6 +343,9 @@ init_locks(void)
#ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", MTX_SPIN);
#endif /* USE_COMLOCK */
+#ifdef APIC_IO
+ mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN);
+#endif
}
/*
@@ -604,6 +615,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(XINVLPG_OFFSET, Xinvlpg,
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(XINVLRNG_OFFSET, Xinvlrng,
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock,
@@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
+#if defined(APIC_IO)
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+ sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+ sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+ sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
+ 0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+ &ipi_masked_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+ &ipi_masked_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+ &ipi_masked_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+ &ipi_masked_range_size, 0, "");
+#endif
+
/*
* Flush the TLB on all other CPU's
- *
- * XXX: Needs to handshake and wait for completion before proceding.
*/
+static void
+smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2)
+{
+ u_int ncpu;
+ register_t eflags;
+
+ ncpu = mp_ncpus - 1; /* does not shootdown self */
+ if (ncpu < 1)
+ return; /* no other cpus */
+ eflags = read_eflags();
+ if ((eflags & PSL_I) == 0)
+ panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
+ mtx_lock_spin(&smp_tlb_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_wait = 0;
+ ipi_all_but_self(vector);
+ while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
+ /* XXX cpu_pause() */ ;
+ mtx_unlock_spin(&smp_tlb_mtx);
+}
+
+static void
+smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2)
+{
+ u_int m;
+ int i, ncpu, othercpus;
+ register_t eflags;
+
+ othercpus = mp_ncpus - 1;
+ if (mask == (u_int)-1) {
+ ncpu = othercpus;
+ if (ncpu < 1)
+ return;
+ } else {
+ /* XXX there should be a pcpu self mask */
+ mask &= ~(1 << PCPU_GET(cpuid));
+ if (mask == 0)
+ return;
+ /* Count the target cpus */
+ ncpu = 0;
+ m = mask;
+ while ((i = ffs(m)) != 0) {
+ m >>= i;
+ ncpu++;
+ }
+ if (ncpu > othercpus) {
+ /* XXX this should be a panic offence */
+ printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
+ ncpu, othercpus);
+ ncpu = othercpus;
+ }
+ /* XXX should be a panic, implied by mask == 0 above */
+ if (ncpu < 1)
+ return;
+ }
+ eflags = read_eflags();
+ if ((eflags & PSL_I) == 0)
+ panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
+ mtx_lock_spin(&smp_tlb_mtx);
+ smp_tlb_addr1 = addr1;
+ smp_tlb_addr2 = addr2;
+ smp_tlb_wait = 0;
+ if (mask == (u_int)-1)
+ ipi_all_but_self(vector);
+ else
+ ipi_selected(mask, vector);
+ while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
+ /* XXX cpu_pause() */ ;
+ mtx_unlock_spin(&smp_tlb_mtx);
+}
+#endif
+
void
smp_invltlb(void)
{
#if defined(APIC_IO)
- if (smp_started)
- ipi_all_but_self(IPI_INVLTLB);
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
+#endif
+ }
#endif /* APIC_IO */
}
void
-invlpg(u_int addr)
+smp_invlpg(u_int addr)
{
- __asm __volatile("invlpg (%0)"::"r"(addr):"memory");
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_page++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /* send a message to the other CPUs */
- smp_invltlb();
+void
+smp_invlpg_range(u_int addr1, u_int addr2)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_range++;
+ ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+#endif /* APIC_IO */
}
void
-invltlb(void)
+smp_masked_invltlb(u_int mask)
{
- u_long temp;
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_global++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /*
- * This should be implemented as load_cr3(rcr3()) when load_cr3() is
- * inlined.
- */
- __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
+void
+smp_masked_invlpg(u_int mask, u_int addr)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_page++;
+#endif
+ }
+#endif /* APIC_IO */
+}
- /* send a message to the other CPUs */
- smp_invltlb();
+void
+smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2)
+{
+#if defined(APIC_IO)
+ if (smp_started) {
+ smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_masked_range++;
+ ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+ }
+#endif /* APIC_IO */
}
@@ -2280,6 +2451,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ if (bootverbose)
+ apic_dump("ap_init()");
+
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) {
@@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame)
{
mtx_lock_spin(&sched_lock);
- statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
+ statclock_process(curthread->td_kse, TRAPF_PC(&frame),
+ TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock);
}
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index cb5a24d..618bb3f 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -267,9 +267,7 @@ void *pmap_mapdev __P((vm_offset_t, vm_size_t));
void pmap_unmapdev __P((vm_offset_t, vm_size_t));
pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)) __pure2;
vm_page_t pmap_use_pt __P((pmap_t, vm_offset_t));
-#ifdef SMP
void pmap_set_opt __P((void));
-#endif
#endif /* _KERNEL */
diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h
index 34228e2..4136c20 100644
--- a/sys/i386/include/smp.h
+++ b/sys/i386/include/smp.h
@@ -51,6 +51,8 @@ extern int current_postcode; /** XXX currently in mp_machdep.c */
* Interprocessor interrupts for SMP.
*/
#define IPI_INVLTLB XINVLTLB_OFFSET
+#define IPI_INVLPG XINVLPG_OFFSET
+#define IPI_INVLRNG XINVLRNG_OFFSET
#define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET
#define IPI_AST XCPUAST_OFFSET
#define IPI_STOP XCPUSTOP_OFFSET
@@ -107,7 +109,6 @@ void assign_apic_irq __P((int apic, int intpin, int irq));
void revoke_apic_irq __P((int irq));
void bsp_apic_configure __P((void));
void init_secondary __P((void));
-void smp_invltlb __P((void));
void forward_statclock __P((void));
void forwarded_statclock __P((struct trapframe frame));
void forward_hardclock __P((void));
diff --git a/sys/i386/include/smptests.h b/sys/i386/include/smptests.h
index d666148..ea8e84b 100644
--- a/sys/i386/include/smptests.h
+++ b/sys/i386/include/smptests.h
@@ -90,13 +90,6 @@
*/
#define APIC_INTR_REORDER
-/*
- * Redirect clock interrupts to a higher priority (fast intr) vector,
- * while still using the slow interrupt handler. Only effective when
- * APIC_INTR_REORDER is defined.
- */
-#define APIC_INTR_HIGHPRI_CLOCK
-
#endif /* APIC_IO */
/*
diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s
index 95c9133..e3a37e1 100644
--- a/sys/i386/isa/apic_vector.s
+++ b/sys/i386/isa/apic_vector.s
@@ -181,30 +181,108 @@ Xspuriousint:
iret
/*
- * Handle TLB shootdowns.
+ * Global address space TLB shootdown.
*/
.text
SUPERALIGN_TEXT
.globl Xinvltlb
Xinvltlb:
pushl %eax
+ pushl %ds
+ movl $KDSEL, %eax /* Kernel data selector */
+ mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
- movl $KPSEL, %eax
+ movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
- ss
- incl _xhits(,%eax,4)
+ incl xhits_gbl(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl %cr3, %eax /* invalidate the TLB */
movl %eax, %cr3
- ss /* stack segment, avoid %ds load */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
+ lock
+ incl smp_tlb_wait
+
+ popl %ds
+ popl %eax
+ iret
+
+/*
+ * Single page TLB shootdown
+ */
+ .text
+ SUPERALIGN_TEXT
+ .globl Xinvlpg
+Xinvlpg:
+ pushl %eax
+ pushl %ds
+ movl $KDSEL, %eax /* Kernel data selector */
+ mov %ax, %ds
+
+#ifdef COUNT_XINVLTLB_HITS
+ pushl %fs
+ movl $KPSEL, %eax /* Private space selector */
+ mov %ax, %fs
+ movl PCPU(CPUID), %eax
+ popl %fs
+ ss
+ incl xhits_pg(,%eax,4)
+#endif /* COUNT_XINVLTLB_HITS */
+
+ movl smp_tlb_addr1, %eax
+ invlpg (%eax) /* invalidate single page */
+
+ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
+
+ lock
+ incl smp_tlb_wait
+
+ popl %ds
+ popl %eax
+ iret
+
+/*
+ * Page range TLB shootdown.
+ */
+ .text
+ SUPERALIGN_TEXT
+ .globl Xinvlrng
+Xinvlrng:
+ pushl %eax
+ pushl %edx
+ pushl %ds
+ movl $KDSEL, %eax /* Kernel data selector */
+ mov %ax, %ds
+
+#ifdef COUNT_XINVLTLB_HITS
+ pushl %fs
+ movl $KPSEL, %eax /* Private space selector */
+ mov %ax, %fs
+ movl PCPU(CPUID), %eax
+ popl %fs
+ incl xhits_rng(,%eax,4)
+#endif /* COUNT_XINVLTLB_HITS */
+
+ movl smp_tlb_addr1, %edx
+ movl smp_tlb_addr2, %eax
+1: invlpg (%edx) /* invalidate single page */
+ addl $PAGE_SIZE, %edx
+ cmpl %edx, %eax
+ jb 1b
+
+ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
+
+ lock
+ incl smp_tlb_wait
+
+ popl %ds
+ popl %edx
popl %eax
iret
@@ -443,12 +521,6 @@ Xrendezvous:
.data
-#ifdef COUNT_XINVLTLB_HITS
- .globl _xhits
-_xhits:
- .space (NCPU * 4), 0
-#endif /* COUNT_XINVLTLB_HITS */
-
.globl apic_pin_trigger
apic_pin_trigger:
.long 0
diff --git a/sys/i386/isa/intr_machdep.c b/sys/i386/isa/intr_machdep.c
index cfc162b..92bf581 100644
--- a/sys/i386/isa/intr_machdep.c
+++ b/sys/i386/isa/intr_machdep.c
@@ -499,14 +499,6 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags)
}
else {
vector = TPR_SLOW_INTS + intr;
-#ifdef APIC_INTR_REORDER
-#ifdef APIC_INTR_HIGHPRI_CLOCK
- /* XXX: Hack (kludge?) for more accurate clock. */
- if (intr == apic_8254_intr || intr == 8) {
- vector = TPR_FAST_INTS + intr;
- }
-#endif
-#endif
setidt(vector, slowintr[intr],
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
}
diff --git a/sys/i386/isa/intr_machdep.h b/sys/i386/isa/intr_machdep.h
index 1726635..789b02b 100644
--- a/sys/i386/isa/intr_machdep.h
+++ b/sys/i386/isa/intr_machdep.h
@@ -88,6 +88,7 @@
/* IDT vector base for regular (aka. slow) and fast interrupts */
#define TPR_SLOW_INTS 0x20
#define TPR_FAST_INTS 0x60
+/* XXX note that the AST interrupt is at 0x50 */
/* blocking values for local APIC Task Priority Register */
#define TPR_BLOCK_HWI 0x4f /* hardware INTs */
@@ -104,20 +105,23 @@
#endif /** TEST_TEST1 */
/* TLB shootdowns */
-#define XINVLTLB_OFFSET (ICU_OFFSET + 112)
+#define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* 0x90 */
+#define XINVLPG_OFFSET (ICU_OFFSET + 113) /* 0x91 */
+#define XINVLRNG_OFFSET (ICU_OFFSET + 114) /* 0x92 */
/* inter-cpu clock handling */
-#define XHARDCLOCK_OFFSET (ICU_OFFSET + 113)
-#define XSTATCLOCK_OFFSET (ICU_OFFSET + 114)
+#define XHARDCLOCK_OFFSET (ICU_OFFSET + 120) /* 0x98 */
+#define XSTATCLOCK_OFFSET (ICU_OFFSET + 121) /* 0x99 */
/* inter-CPU rendezvous */
-#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 115)
+#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 122) /* 0x9A */
/* IPI to generate an additional software trap at the target CPU */
-#define XCPUAST_OFFSET (ICU_OFFSET + 48)
+/* XXX in the middle of the interrupt range, overlapping IRQ48 */
+#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* 0x50 */
/* IPI to signal CPUs to stop and wait for another CPU to restart them */
-#define XCPUSTOP_OFFSET (ICU_OFFSET + 128)
+#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* 0xA0 */
/*
* Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff:
@@ -181,7 +185,9 @@ inthand_t
IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31);
inthand_t
- Xinvltlb, /* TLB shootdowns */
+ Xinvltlb, /* TLB shootdowns - global */
+ Xinvlpg, /* TLB shootdowns - 1 page */
+ Xinvlrng, /* TLB shootdowns - page range */
Xhardclock, /* Forward hardclock() */
Xstatclock, /* Forward statclock() */
Xcpuast, /* Additional software trap on other cpu */
diff --git a/sys/i386/isa/nmi.c b/sys/i386/isa/nmi.c
index cfc162b..92bf581 100644
--- a/sys/i386/isa/nmi.c
+++ b/sys/i386/isa/nmi.c
@@ -499,14 +499,6 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags)
}
else {
vector = TPR_SLOW_INTS + intr;
-#ifdef APIC_INTR_REORDER
-#ifdef APIC_INTR_HIGHPRI_CLOCK
- /* XXX: Hack (kludge?) for more accurate clock. */
- if (intr == apic_8254_intr || intr == 8) {
- vector = TPR_FAST_INTS + intr;
- }
-#endif
-#endif
setidt(vector, slowintr[intr],
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
}
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index 39e3243..be50836 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -222,6 +222,9 @@ static struct witness_order_list_entry order_lists[] = {
{ "icu", &lock_class_mtx_spin },
#ifdef SMP
{ "smp rendezvous", &lock_class_mtx_spin },
+#ifdef __i386__
+ { "tlb", &lock_class_mtx_spin },
+#endif
#endif
{ "clk", &lock_class_mtx_spin },
{ NULL, NULL },
OpenPOWER on IntegriCloud