summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/amd64/trap.c64
-rw-r--r--sys/amd64/include/proc.h1
-rw-r--r--sys/i386/i386/trap.c69
-rw-r--r--sys/i386/include/proc.h1
-rw-r--r--sys/kern/kern_sysctl.c12
-rw-r--r--sys/kern/subr_uio.c8
-rw-r--r--sys/sys/proc.h1
-rw-r--r--sys/vm/vm_fault.c8
8 files changed, 117 insertions, 47 deletions
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 28bf79d..a685f19 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -301,26 +301,6 @@ trap(struct trapframe *frame)
}
code = frame->tf_err;
- if (type == T_PAGEFLT) {
- /*
- * If we get a page fault while in a critical section, then
- * it is most likely a fatal kernel page fault. The kernel
- * is already going to panic trying to get a sleep lock to
- * do the VM lookup, so just consider it a fatal trap so the
- * kernel can print out a useful trap message and even get
- * to the debugger.
- *
- * If we get a page fault while holding a non-sleepable
- * lock, then it is most likely a fatal kernel page fault.
- * If WITNESS is enabled, then it's going to whine about
- * bogus LORs with various VM locks, so just skip to the
- * fatal trap handling directly.
- */
- if (td->td_critnest != 0 ||
- WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
- "Kernel page fault") != 0)
- trap_fatal(frame, frame->tf_addr);
- }
if (ISPL(frame->tf_cs) == SEL_UPL) {
/* user trap */
@@ -653,6 +633,50 @@ trap_pfault(frame, usermode)
struct proc *p = td->td_proc;
vm_offset_t eva = frame->tf_addr;
+ if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
+ /*
+ * Due to both processor errata and lazy TLB invalidation when
+ * access restrictions are removed from virtual pages, memory
+ * accesses that are allowed by the physical mapping layer may
+ * nonetheless cause one spurious page fault per virtual page.
+ * When the thread is executing a "no faulting" section that
+ * is bracketed by vm_fault_{disable,enable}_pagefaults(),
+ * every page fault is treated as a spurious page fault,
+ * unless it accesses the same virtual address as the most
+ * recent page fault within the same "no faulting" section.
+ */
+ if (td->td_md.md_spurflt_addr != eva ||
+ (td->td_pflags & TDP_RESETSPUR) != 0) {
+ /*
+ * Do nothing to the TLB. A stale TLB entry is
+ * flushed automatically by a page fault.
+ */
+ td->td_md.md_spurflt_addr = eva;
+ td->td_pflags &= ~TDP_RESETSPUR;
+ return (0);
+ }
+ } else {
+ /*
+ * If we get a page fault while in a critical section, then
+ * it is most likely a fatal kernel page fault. The kernel
+ * is already going to panic trying to get a sleep lock to
+ * do the VM lookup, so just consider it a fatal trap so the
+ * kernel can print out a useful trap message and even get
+ * to the debugger.
+ *
+ * If we get a page fault while holding a non-sleepable
+ * lock, then it is most likely a fatal kernel page fault.
+ * If WITNESS is enabled, then it's going to whine about
+ * bogus LORs with various VM locks, so just skip to the
+ * fatal trap handling directly.
+ */
+ if (td->td_critnest != 0 ||
+ WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
+ "Kernel page fault") != 0) {
+ trap_fatal(frame, eva);
+ return (-1);
+ }
+ }
va = trunc_page(eva);
if (va >= VM_MIN_KERNEL_ADDRESS) {
/*
diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h
index 5207d89..14585fb 100644
--- a/sys/amd64/include/proc.h
+++ b/sys/amd64/include/proc.h
@@ -46,6 +46,7 @@ struct proc_ldt {
struct mdthread {
int md_spinlock_count; /* (k) */
register_t md_saved_flags; /* (k) */
+ register_t md_spurflt_addr; /* (k) Spurious page fault address. */
};
struct mdproc {
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 05572b9..005646d 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -330,28 +330,13 @@ trap(struct trapframe *frame)
* For some Cyrix CPUs, %cr2 is clobbered by
* interrupts. This problem is worked around by using
* an interrupt gate for the pagefault handler. We
- * are finally ready to read %cr2 and then must
- * reenable interrupts.
- *
- * If we get a page fault while in a critical section, then
- * it is most likely a fatal kernel page fault. The kernel
- * is already going to panic trying to get a sleep lock to
- * do the VM lookup, so just consider it a fatal trap so the
- * kernel can print out a useful trap message and even get
- * to the debugger.
- *
- * If we get a page fault while holding a non-sleepable
- * lock, then it is most likely a fatal kernel page fault.
- * If WITNESS is enabled, then it's going to whine about
- * bogus LORs with various VM locks, so just skip to the
- * fatal trap handling directly.
+ * are finally ready to read %cr2 and conditionally
+ * reenable interrupts. If we hold a spin lock, then
+ * we must not reenable interrupts. This might be a
+ * spurious page fault.
*/
eva = rcr2();
- if (td->td_critnest != 0 ||
- WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
- "Kernel page fault") != 0)
- trap_fatal(frame, eva);
- else
+ if (td->td_md.md_spinlock_count == 0)
enable_intr();
}
@@ -804,6 +789,50 @@ trap_pfault(frame, usermode, eva)
struct thread *td = curthread;
struct proc *p = td->td_proc;
+ if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
+ /*
+ * Due to both processor errata and lazy TLB invalidation when
+ * access restrictions are removed from virtual pages, memory
+ * accesses that are allowed by the physical mapping layer may
+ * nonetheless cause one spurious page fault per virtual page.
+ * When the thread is executing a "no faulting" section that
+ * is bracketed by vm_fault_{disable,enable}_pagefaults(),
+ * every page fault is treated as a spurious page fault,
+ * unless it accesses the same virtual address as the most
+ * recent page fault within the same "no faulting" section.
+ */
+ if (td->td_md.md_spurflt_addr != eva ||
+ (td->td_pflags & TDP_RESETSPUR) != 0) {
+ /*
+ * Do nothing to the TLB. A stale TLB entry is
+ * flushed automatically by a page fault.
+ */
+ td->td_md.md_spurflt_addr = eva;
+ td->td_pflags &= ~TDP_RESETSPUR;
+ return (0);
+ }
+ } else {
+ /*
+ * If we get a page fault while in a critical section, then
+ * it is most likely a fatal kernel page fault. The kernel
+ * is already going to panic trying to get a sleep lock to
+ * do the VM lookup, so just consider it a fatal trap so the
+ * kernel can print out a useful trap message and even get
+ * to the debugger.
+ *
+ * If we get a page fault while holding a non-sleepable
+ * lock, then it is most likely a fatal kernel page fault.
+ * If WITNESS is enabled, then it's going to whine about
+ * bogus LORs with various VM locks, so just skip to the
+ * fatal trap handling directly.
+ */
+ if (td->td_critnest != 0 ||
+ WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
+ "Kernel page fault") != 0) {
+ trap_fatal(frame, eva);
+ return (-1);
+ }
+ }
va = trunc_page(eva);
if (va >= KERNBASE) {
/*
diff --git a/sys/i386/include/proc.h b/sys/i386/include/proc.h
index 86e8602..14f03b7 100644
--- a/sys/i386/include/proc.h
+++ b/sys/i386/include/proc.h
@@ -51,6 +51,7 @@ struct proc_ldt {
struct mdthread {
int md_spinlock_count; /* (k) */
register_t md_saved_flags; /* (k) */
+ register_t md_spurflt_addr; /* (k) Spurious page fault address. */
};
struct mdproc {
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index 97d03a3..f294ce3 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -1294,8 +1294,8 @@ kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp,
static int
sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
{
- int error = 0;
size_t i, len, origidx;
+ int error;
origidx = req->oldidx;
req->oldidx += l;
@@ -1316,10 +1316,14 @@ sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
else {
if (i > len - origidx)
i = len - origidx;
- error = copyout(p, (char *)req->oldptr + origidx, i);
+ if (req->lock == REQ_WIRED) {
+ error = copyout_nofault(p, (char *)req->oldptr +
+ origidx, i);
+ } else
+ error = copyout(p, (char *)req->oldptr + origidx, i);
+ if (error != 0)
+ return (error);
}
- if (error)
- return (error);
if (i < l)
return (ENOMEM);
return (0);
diff --git a/sys/kern/subr_uio.c b/sys/kern/subr_uio.c
index 3c7688a..b85e50b 100644
--- a/sys/kern/subr_uio.c
+++ b/sys/kern/subr_uio.c
@@ -187,8 +187,12 @@ uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault)
/* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */
newflags = TDP_DEADLKTREAT;
- if (uio->uio_segflg == UIO_USERSPACE && nofault)
- newflags |= TDP_NOFAULTING;
+ if (uio->uio_segflg == UIO_USERSPACE && nofault) {
+ /*
+ * Fail if a non-spurious page fault occurs.
+ */
+ newflags |= TDP_NOFAULTING | TDP_RESETSPUR;
+ }
save = curthread_pflags_set(newflags);
while (n > 0 && uio->uio_resid) {
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 78ad04c..6dad36b 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -417,6 +417,7 @@ do { \
#define TDP_IGNSUSP 0x00800000 /* Permission to ignore the MNTK_SUSPEND* */
#define TDP_AUDITREC 0x01000000 /* Audit record pending on thread */
#define TDP_RFPPWAIT 0x02000000 /* Handle RFPPWAIT on syscall exit */
+#define TDP_RESETSPUR 0x04000000 /* Reset spurious page fault history. */
/*
* Reasons that the current thread can not be run yet.
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 8c98b26..8477c35 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -1468,11 +1468,17 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
return i;
}
+/*
+ * Block entry into the machine-independent layer's page fault handler by
+ * the calling thread. Subsequent calls to vm_fault() by that thread will
+ * return KERN_PROTECTION_FAILURE. Enable machine-dependent handling of
+ * spurious page faults.
+ */
int
vm_fault_disable_pagefaults(void)
{
- return (curthread_pflags_set(TDP_NOFAULTING));
+ return (curthread_pflags_set(TDP_NOFAULTING | TDP_RESETSPUR));
}
void
OpenPOWER on IntegriCloud