summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authordillon <dillon@FreeBSD.org>2002-07-12 20:17:06 +0000
committerdillon <dillon@FreeBSD.org>2002-07-12 20:17:06 +0000
commitdc5d856e710c7a79039e46c89de1624670a57c92 (patch)
treeb4ac30b07b35f56e5b15e9d9fed028a0cb87849e /sys
parentc5495020cb1a3babc0521d7c9de49291cd5fb515 (diff)
downloadFreeBSD-src-dc5d856e710c7a79039e46c89de1624670a57c92.zip
FreeBSD-src-dc5d856e710c7a79039e46c89de1624670a57c92.tar.gz
Re-enable the idle page-zeroing code. Remove all IPIs from the idle
page-zeroing code as well as from the general page-zeroing code and use a lazy tlb page invalidation scheme based on a callback made at the end of mi_switch. A number of people came up with this idea at the same time so credit belongs to Peter, John, and Jake as well. Two-way SMP buildworld -j 5 tests (second run, after stabilization) 2282.76 real 2515.17 user 704.22 sys before peter's IPI commit 2266.69 real 2467.50 user 633.77 sys after peter's commit 2232.80 real 2468.99 user 615.89 sys after this commit Reviewed by: peter, jhb Approved by: peter
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/genassym.c1
-rw-r--r--sys/amd64/amd64/pmap.c75
-rw-r--r--sys/i386/i386/genassym.c1
-rw-r--r--sys/i386/i386/pmap.c75
-rw-r--r--sys/kern/kern_synch.c7
-rw-r--r--sys/sys/proc.h1
-rw-r--r--sys/vm/vm_zeroidle.c4
7 files changed, 142 insertions, 22 deletions
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index ee890db..06c87f0 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -89,6 +89,7 @@ ASSYM(TD_KSE, offsetof(struct thread, td_kse));
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level));
ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest));
+ASSYM(TD_SWITCHIN, offsetof(struct thread, td_switchin));
ASSYM(TD_MD, offsetof(struct thread, td_md));
ASSYM(P_MD, offsetof(struct proc, p_md));
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 5de1707..1a85a52 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -2675,6 +2675,38 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
}
}
+#ifdef SMP
+
+/*
+ * pmap_zpi_switchin*()
+ *
+ * These functions allow us to avoid doing IPIs alltogether in certain
+ * temporary page-mapping situations (page zeroing). Instead to deal
+ * with being preempted and moved onto a different cpu we invalidate
+ * the page when the scheduler switches us in. This does not occur
+ * very often so we remain relatively optimal with very little effort.
+ */
+static void
+pmap_zpi_switchin12(void)
+{
+ invlpg((u_int)CADDR1);
+ invlpg((u_int)CADDR2);
+}
+
+static void
+pmap_zpi_switchin2(void)
+{
+ invlpg((u_int)CADDR2);
+}
+
+static void
+pmap_zpi_switchin3(void)
+{
+ invlpg((u_int)CADDR3);
+}
+
+#endif
+
/*
* pmap_zero_page zeros the specified hardware page by mapping
* the page into KVM and using bzero to clear its contents.
@@ -2688,13 +2720,19 @@ pmap_zero_page(vm_page_t m)
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
- pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2);
+#ifdef SMP
+ curthread->td_switchin = pmap_zpi_switchin2;
+#endif
+ invlpg((u_int)CADDR2);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR2);
else
#endif
bzero(CADDR2, PAGE_SIZE);
+#ifdef SMP
+ curthread->td_switchin = NULL;
+#endif
*CMAP2 = 0;
}
@@ -2713,13 +2751,19 @@ pmap_zero_page_area(vm_page_t m, int off, int size)
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
- pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2);
+#ifdef SMP
+ curthread->td_switchin = pmap_zpi_switchin2;
+#endif
+ invlpg((u_int)CADDR2);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
i686_pagezero(CADDR2);
else
#endif
bzero((char *)CADDR2 + off, size);
+#ifdef SMP
+ curthread->td_switchin = NULL;
+#endif
*CMAP2 = 0;
}
@@ -2738,13 +2782,19 @@ pmap_zero_page_idle(vm_page_t m)
if (*CMAP3)
panic("pmap_zero_page: CMAP3 busy");
*CMAP3 = PG_V | PG_RW | phys | PG_A | PG_M;
- invlpg((vm_offset_t)CADDR3); /* SMP: local cpu only */
+#ifdef SMP
+ curthread->td_switchin = pmap_zpi_switchin3;
+#endif
+ invlpg((u_int)CADDR3);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR3);
else
#endif
bzero(CADDR3, PAGE_SIZE);
+#ifdef SMP
+ curthread->td_switchin = NULL;
+#endif
*CMAP3 = 0;
}
@@ -2764,13 +2814,20 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
panic("pmap_copy_page: CMAP2 busy");
*CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
*CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
- /*
- * XXX we "know" that CADDR2 immediately follows CADDR1 and use
- * that to save an IPI on SMP systems.
- */
- pmap_invalidate_range(kernel_pmap, (vm_offset_t)CADDR1,
- (vm_offset_t)CADDR2 + PAGE_SIZE);
+#ifdef I386_CPU
+ invltlb();
+#else
+#ifdef SMP
+ curthread->td_switchin = pmap_zpi_switchin12;
+#endif
+ invlpg((u_int)CADDR1);
+ invlpg((u_int)CADDR2);
+#endif
bcopy(CADDR1, CADDR2, PAGE_SIZE);
+
+#ifdef SMP
+ curthread->td_switchin = NULL;
+#endif
*CMAP1 = 0;
*CMAP2 = 0;
}
diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c
index ee890db..06c87f0 100644
--- a/sys/i386/i386/genassym.c
+++ b/sys/i386/i386/genassym.c
@@ -89,6 +89,7 @@ ASSYM(TD_KSE, offsetof(struct thread, td_kse));
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level));
ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest));
+ASSYM(TD_SWITCHIN, offsetof(struct thread, td_switchin));
ASSYM(TD_MD, offsetof(struct thread, td_md));
ASSYM(P_MD, offsetof(struct proc, p_md));
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 5de1707..1a85a52 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -2675,6 +2675,38 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
}
}
+#ifdef SMP
+
+/*
+ * pmap_zpi_switchin*()
+ *
+ * These functions allow us to avoid doing IPIs alltogether in certain
+ * temporary page-mapping situations (page zeroing). Instead to deal
+ * with being preempted and moved onto a different cpu we invalidate
+ * the page when the scheduler switches us in. This does not occur
+ * very often so we remain relatively optimal with very little effort.
+ */
+static void
+pmap_zpi_switchin12(void)
+{
+ invlpg((u_int)CADDR1);
+ invlpg((u_int)CADDR2);
+}
+
+static void
+pmap_zpi_switchin2(void)
+{
+ invlpg((u_int)CADDR2);
+}
+
+static void
+pmap_zpi_switchin3(void)
+{
+ invlpg((u_int)CADDR3);
+}
+
+#endif
+
/*
* pmap_zero_page zeros the specified hardware page by mapping
* the page into KVM and using bzero to clear its contents.
@@ -2688,13 +2720,19 @@ pmap_zero_page(vm_page_t m)
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
- pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2);
+#ifdef SMP
+ curthread->td_switchin = pmap_zpi_switchin2;
+#endif
+ invlpg((u_int)CADDR2);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR2);
else
#endif
bzero(CADDR2, PAGE_SIZE);
+#ifdef SMP
+ curthread->td_switchin = NULL;
+#endif
*CMAP2 = 0;
}
@@ -2713,13 +2751,19 @@ pmap_zero_page_area(vm_page_t m, int off, int size)
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
- pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2);
+#ifdef SMP
+ curthread->td_switchin = pmap_zpi_switchin2;
+#endif
+ invlpg((u_int)CADDR2);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
i686_pagezero(CADDR2);
else
#endif
bzero((char *)CADDR2 + off, size);
+#ifdef SMP
+ curthread->td_switchin = NULL;
+#endif
*CMAP2 = 0;
}
@@ -2738,13 +2782,19 @@ pmap_zero_page_idle(vm_page_t m)
if (*CMAP3)
panic("pmap_zero_page: CMAP3 busy");
*CMAP3 = PG_V | PG_RW | phys | PG_A | PG_M;
- invlpg((vm_offset_t)CADDR3); /* SMP: local cpu only */
+#ifdef SMP
+ curthread->td_switchin = pmap_zpi_switchin3;
+#endif
+ invlpg((u_int)CADDR3);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR3);
else
#endif
bzero(CADDR3, PAGE_SIZE);
+#ifdef SMP
+ curthread->td_switchin = NULL;
+#endif
*CMAP3 = 0;
}
@@ -2764,13 +2814,20 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
panic("pmap_copy_page: CMAP2 busy");
*CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
*CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
- /*
- * XXX we "know" that CADDR2 immediately follows CADDR1 and use
- * that to save an IPI on SMP systems.
- */
- pmap_invalidate_range(kernel_pmap, (vm_offset_t)CADDR1,
- (vm_offset_t)CADDR2 + PAGE_SIZE);
+#ifdef I386_CPU
+ invltlb();
+#else
+#ifdef SMP
+ curthread->td_switchin = pmap_zpi_switchin12;
+#endif
+ invlpg((u_int)CADDR1);
+ invlpg((u_int)CADDR2);
+#endif
bcopy(CADDR1, CADDR2, PAGE_SIZE);
+
+#ifdef SMP
+ curthread->td_switchin = NULL;
+#endif
*CMAP1 = 0;
*CMAP2 = 0;
}
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 765f4b0..53ab46f 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -892,6 +892,13 @@ mi_switch()
if (PCPU_GET(switchtime.sec) == 0)
binuptime(PCPU_PTR(switchtime));
PCPU_SET(switchticks, ticks);
+
+ /*
+ * Call the switchin function while still holding the scheduler lock
+ * (used by the idlezero code and the general page-zeroing code)
+ */
+ if (td->td_switchin)
+ td->td_switchin();
}
/*
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index e08dbb9..fea0805 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -285,6 +285,7 @@ struct thread {
int td_intr_nesting_level; /* (k) Interrupt recursion. */
void *td_mailbox; /* the userland mailbox address */
struct ucred *td_ucred; /* (k) Reference to credentials. */
+ void (*td_switchin)(void); /* (k) switchin special func */
#define td_endzero td_md
#define td_startcopy td_endzero
diff --git a/sys/vm/vm_zeroidle.c b/sys/vm/vm_zeroidle.c
index bf35c80..fbf5fe4 100644
--- a/sys/vm/vm_zeroidle.c
+++ b/sys/vm/vm_zeroidle.c
@@ -30,11 +30,7 @@ static int cnt_prezero;
SYSCTL_INT(_vm_stats_misc, OID_AUTO,
cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, "");
-#ifdef SMP
-static int idlezero_enable = 0;
-#else
static int idlezero_enable = 1;
-#endif
SYSCTL_INT(_vm, OID_AUTO, idlezero_enable, CTLFLAG_RW, &idlezero_enable, 0, "");
TUNABLE_INT("vm.idlezero_enable", &idlezero_enable);
OpenPOWER on IntegriCloud