summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2004-07-02 20:21:44 +0000
committerjhb <jhb@FreeBSD.org>2004-07-02 20:21:44 +0000
commit696704716d52a895094da20b7e1a0f763b069e12 (patch)
tree2a5d6a91ba98f5b9e075eecc1a9ca724b8a9110a
parent1f506bc6fab7cc97cb923d4af1174f9c732221dd (diff)
downloadFreeBSD-src-696704716d52a895094da20b7e1a0f763b069e12.zip
FreeBSD-src-696704716d52a895094da20b7e1a0f763b069e12.tar.gz
Implement preemption of kernel threads natively in the scheduler rather
than as one-off hacks in various other parts of the kernel: - Add a function maybe_preempt() that is called from sched_add() to determine if a thread about to be added to a run queue should be preempted to directly. If it is not safe to preempt or if the new thread does not have a high enough priority, then the function returns false and sched_add() adds the thread to the run queue. If the thread should be preempted to but the current thread is in a nested critical section, then the flag TDF_OWEPREEMPT is set and the thread is added to the run queue. Otherwise, mi_switch() is called immediately and the thread is never added to the run queue since it is switch to directly. When exiting an outermost critical section, if TDF_OWEPREEMPT is set, then clear it and call mi_switch() to perform the deferred preemption. - Remove explicit preemption from ithread_schedule() as calling setrunqueue() now does all the correct work. This also removes the do_switch argument from ithread_schedule(). - Do not use the manual preemption code in mtx_unlock if the architecture supports native preemption. - Don't call mi_switch() in a loop during shutdown to give ithreads a chance to run if the architecture supports native preemption since the ithreads will just preempt DELAY(). - Don't call mi_switch() from the page zeroing idle thread for architectures that support native preemption as it is unnecessary. - Native preemption is enabled on the same archs that supported ithread preemption, namely alpha, i386, and amd64. This change should largely be a NOP for the default case as committed except that we will do fewer context switches in a few cases and will avoid the run queues completely when preempting. Approved by: scottl (with his re@ hat)
-rw-r--r--sys/alpha/alpha/interrupt.c2
-rw-r--r--sys/alpha/include/param.h2
-rw-r--r--sys/amd64/amd64/intr_machdep.c2
-rw-r--r--sys/amd64/include/param.h2
-rw-r--r--sys/conf/NOTES6
-rw-r--r--sys/conf/options1
-rw-r--r--sys/i386/i386/intr_machdep.c2
-rw-r--r--sys/i386/include/param.h2
-rw-r--r--sys/ia64/ia64/interrupt.c2
-rw-r--r--sys/kern/kern_intr.c19
-rw-r--r--sys/kern/kern_mutex.c6
-rw-r--r--sys/kern/kern_shutdown.c37
-rw-r--r--sys/kern/kern_switch.c97
-rw-r--r--sys/kern/kern_synch.c5
-rw-r--r--sys/kern/sched_4bsd.c12
-rw-r--r--sys/kern/sched_ule.c11
-rw-r--r--sys/powerpc/powerpc/intr_machdep.c2
-rw-r--r--sys/sparc64/sparc64/intr_machdep.c4
-rw-r--r--sys/sys/interrupt.h2
-rw-r--r--sys/sys/proc.h2
-rw-r--r--sys/vm/vm_zeroidle.c2
21 files changed, 174 insertions, 46 deletions
diff --git a/sys/alpha/alpha/interrupt.c b/sys/alpha/alpha/interrupt.c
index 72b833e..d32ce1e 100644
--- a/sys/alpha/alpha/interrupt.c
+++ b/sys/alpha/alpha/interrupt.c
@@ -455,7 +455,7 @@ alpha_dispatch_intr(void *frame, unsigned long vector)
* thread to the current CPU until we return from the interrupt.
*/
sched_pin();
- error = ithread_schedule(ithd, !cold);
+ error = ithread_schedule(ithd);
KASSERT(error == 0, ("got an impossible stray interrupt"));
sched_unpin();
}
diff --git a/sys/alpha/include/param.h b/sys/alpha/include/param.h
index 1a9c9af..79efec6 100644
--- a/sys/alpha/include/param.h
+++ b/sys/alpha/include/param.h
@@ -113,6 +113,8 @@
#define SSIZE 1 /* initial stack size/NBPG */
#define SINCR 1 /* increment of stack/NBPG */
+#define PREEMPTION
+
#ifndef KSTACK_PAGES
#define KSTACK_PAGES 2 /* pages of kstack (with pcb) */
#endif
diff --git a/sys/amd64/amd64/intr_machdep.c b/sys/amd64/amd64/intr_machdep.c
index e23df93..6b591d4 100644
--- a/sys/amd64/amd64/intr_machdep.c
+++ b/sys/amd64/amd64/intr_machdep.c
@@ -215,7 +215,7 @@ intr_execute_handlers(struct intsrc *isrc, struct intrframe *iframe)
if (ih == NULL)
error = EINVAL;
else
- error = ithread_schedule(it, !cold);
+ error = ithread_schedule(it);
}
if (error == EINVAL) {
atomic_add_long(isrc->is_straycount, 1);
diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h
index 5216c55..2f46837 100644
--- a/sys/amd64/include/param.h
+++ b/sys/amd64/include/param.h
@@ -119,6 +119,8 @@
#define NBPML4 (1ul<<PML4SHIFT)/* bytes/page map lev4 table */
#define PML4MASK (NBPML4-1)
+#define PREEMPTION
+
#define IOPAGES 2 /* pages of i/o permission bitmap */
#ifndef KSTACK_PAGES
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 2607142..f944152 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -186,6 +186,11 @@ options MUTEX_WAKE_ALL
# SMP Debugging Options:
#
+# FULL_PREEMPTION instructs the kernel to preempt non-realtime kernel
+# threads. It sole use is to expose race conditions and other
+# bugs during development. Enabling this option will reduce
+# performance and increase the frequency of kernel panics by
+# design. If you aren't sure that you need it then you don't.
# MUTEX_DEBUG enables various extra assertions in the mutex code.
# SLEEPQUEUE_PROFILING enables rudimentary profiling of the hash table
# used to hold active sleep queues.
@@ -197,6 +202,7 @@ options MUTEX_WAKE_ALL
# a lock hierarchy violation occurs or if locks are held when going to
# sleep.
# WITNESS_SKIPSPIN disables the witness checks on spin mutexes.
+options FULL_PREEMPTION
options MUTEX_DEBUG
options WITNESS
options WITNESS_DDB
diff --git a/sys/conf/options b/sys/conf/options
index d086703..2fb6582 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -61,6 +61,7 @@ DDB_NUMSYM opt_ddb.h
DDB_TRACE
DDB_UNATTENDED
DIRECTIO opt_directio.h
+FULL_PREEMPTION
GDB_REMOTE_CHAT opt_ddb.h
GDBSPEED opt_ddb.h
GEOM_AES opt_geom.h
diff --git a/sys/i386/i386/intr_machdep.c b/sys/i386/i386/intr_machdep.c
index e23df93..6b591d4 100644
--- a/sys/i386/i386/intr_machdep.c
+++ b/sys/i386/i386/intr_machdep.c
@@ -215,7 +215,7 @@ intr_execute_handlers(struct intsrc *isrc, struct intrframe *iframe)
if (ih == NULL)
error = EINVAL;
else
- error = ithread_schedule(it, !cold);
+ error = ithread_schedule(it);
}
if (error == EINVAL) {
atomic_add_long(isrc->is_straycount, 1);
diff --git a/sys/i386/include/param.h b/sys/i386/include/param.h
index a4064e3..f455ba2 100644
--- a/sys/i386/include/param.h
+++ b/sys/i386/include/param.h
@@ -97,6 +97,8 @@
#define NBPDR (1<<PDRSHIFT) /* bytes/page dir */
#define PDRMASK (NBPDR-1)
+#define PREEMPTION
+
#define IOPAGES 2 /* pages of i/o permission bitmap */
#ifndef KSTACK_PAGES
diff --git a/sys/ia64/ia64/interrupt.c b/sys/ia64/ia64/interrupt.c
index 1b63841..65812c9 100644
--- a/sys/ia64/ia64/interrupt.c
+++ b/sys/ia64/ia64/interrupt.c
@@ -384,7 +384,7 @@ ia64_dispatch_intr(void *frame, unsigned long vector)
return;
}
- error = ithread_schedule(ithd, 0); /* XXX:no preemption for now */
+ error = ithread_schedule(ithd);
KASSERT(error == 0, ("got an impossible stray interrupt"));
}
diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c
index d11e9d2..99283ff 100644
--- a/sys/kern/kern_intr.c
+++ b/sys/kern/kern_intr.c
@@ -365,7 +365,7 @@ ok:
}
int
-ithread_schedule(struct ithd *ithread, int do_switch)
+ithread_schedule(struct ithd *ithread)
{
struct int_entropy entropy;
struct thread *td;
@@ -399,10 +399,7 @@ ithread_schedule(struct ithd *ithread, int do_switch)
/*
* Set it_need to tell the thread to keep running if it is already
* running. Then, grab sched_lock and see if we actually need to
- * put this thread on the runqueue. If so and the do_switch flag is
- * true and it is safe to switch, then switch to the ithread
- * immediately. Otherwise, set the needresched flag to guarantee
- * that this ithread will run before any userland processes.
+ * put this thread on the runqueue.
*/
ithread->it_need = 1;
mtx_lock_spin(&sched_lock);
@@ -410,16 +407,6 @@ ithread_schedule(struct ithd *ithread, int do_switch)
CTR2(KTR_INTR, "%s: setrunqueue %d", __func__, p->p_pid);
TD_CLR_IWAIT(td);
setrunqueue(td);
- if (do_switch &&
- (ctd->td_critnest == 1) ) {
- KASSERT((TD_IS_RUNNING(ctd)),
- ("ithread_schedule: Bad state for curthread."));
- if (ctd->td_flags & TDF_IDLETD)
- ctd->td_state = TDS_CAN_RUN; /* XXXKSE */
- mi_switch(SW_INVOL, NULL);
- } else {
- curthread->td_flags |= TDF_NEEDRESCHED;
- }
} else {
CTR4(KTR_INTR, "%s: pid %d: it_need %d, state %d",
__func__, p->p_pid, ithread->it_need, td->td_state);
@@ -480,7 +467,7 @@ swi_sched(void *cookie, int flags)
*/
atomic_store_rel_int(&ih->ih_need, 1);
if (!(flags & SWI_DELAY)) {
- error = ithread_schedule(it, !cold && !dumping);
+ error = ithread_schedule(it);
KASSERT(error == 0, ("stray software interrupt"));
}
}
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index 2388983..2fc79de 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -621,7 +621,9 @@ void
_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
{
struct turnstile *ts;
+#ifndef PREEMPTION
struct thread *td, *td1;
+#endif
if (mtx_recursed(m)) {
if (--(m->mtx_recurse) == 0)
@@ -646,8 +648,10 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
#else
MPASS(ts != NULL);
#endif
+#ifndef PREEMPTION
/* XXX */
td1 = turnstile_head(ts);
+#endif
#ifdef MUTEX_WAKE_ALL
turnstile_broadcast(ts);
_release_lock_quick(m);
@@ -665,6 +669,7 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
#endif
turnstile_unpend(ts);
+#ifndef PREEMPTION
/*
* XXX: This is just a hack until preemption is done. However,
* once preemption is done we need to either wrap the
@@ -701,6 +706,7 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
m, (void *)m->mtx_lock);
}
mtx_unlock_spin(&sched_lock);
+#endif
return;
}
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index b9bfd39..fbf4660 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -269,7 +269,9 @@ boot(int howto)
if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
register struct buf *bp;
int iter, nbusy, pbusy;
+#ifndef PREEMPTION
int subiter;
+#endif
waittime = 0;
printf("\nsyncing disks, buffers remaining... ");
@@ -300,20 +302,29 @@ boot(int howto)
iter = 0;
pbusy = nbusy;
sync(&thread0, NULL);
- if (curthread != NULL) {
- DROP_GIANT();
- for (subiter = 0; subiter < 50 * iter; subiter++) {
- mtx_lock_spin(&sched_lock);
- /*
- * Allow interrupt threads to run
- */
- mi_switch(SW_VOL, NULL);
- mtx_unlock_spin(&sched_lock);
- DELAY(1000);
- }
- PICKUP_GIANT();
- } else
+
+#ifdef PREEMPTION
+ /*
+ * Drop Giant and spin for a while to allow
+ * interrupt threads to run.
+ */
+ DROP_GIANT();
DELAY(50000 * iter);
+ PICKUP_GIANT();
+#else
+ /*
+ * Drop Giant and context switch several times to
+ * allow interrupt threads to run.
+ */
+ DROP_GIANT();
+ for (subiter = 0; subiter < 50 * iter; subiter++) {
+ mtx_lock_spin(&sched_lock);
+ mi_switch(SW_VOL, NULL);
+ mtx_unlock_spin(&sched_lock);
+ DELAY(1000);
+ }
+ PICKUP_GIANT();
+#endif
}
printf("\n");
/*
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index 464c6cf..d41974e 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -88,6 +88,8 @@ reassigned to keep this true.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_full_preemption.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -423,10 +425,10 @@ setrunqueue(struct thread *td)
}
}
-/************************************************************************
- * Critical section marker functions *
- ************************************************************************/
-/* Critical sections that prevent preemption. */
+/*
+ * Kernel thread preemption implementation. Critical sections mark
+ * regions of code in which preemptions are not allowed.
+ */
void
critical_enter(void)
{
@@ -447,6 +449,13 @@ critical_exit(void)
KASSERT(td->td_critnest != 0,
("critical_exit: td_critnest == 0"));
if (td->td_critnest == 1) {
+#ifdef PREEMPTION
+ if (td->td_flags & TDF_OWEPREEMPT) {
+ mtx_lock_spin(&sched_lock);
+ mi_switch(SW_INVOL, NULL);
+ mtx_unlock_spin(&sched_lock);
+ }
+#endif
td->td_critnest = 0;
cpu_critical_exit();
} else {
@@ -454,6 +463,86 @@ critical_exit(void)
}
}
+/*
+ * This function is called when a thread is about to be put on run queue
+ * because it has been made runnable or its priority has been adjusted. It
+ * determines if the new thread should be immediately preempted to. If so,
+ * it switches to it and eventually returns true. If not, it returns false
+ * so that the caller may place the thread on an appropriate run queue.
+ */
+int
+maybe_preempt(struct thread *td)
+{
+ struct thread *ctd;
+ int cpri, pri;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+#ifdef PREEMPTION
+ /*
+ * The new thread should not preempt the current thread if any of the
+ * following conditions are true:
+ *
+ * - The current thread has a higher (numerically lower) priority.
+ * - It is too early in the boot for context switches (cold is set).
+ * - The current thread has an inhibitor set or is in the process of
+ * exiting. In this case, the current thread is about to switch
+ * out anyways, so there's no point in preempting. If we did,
+ * the current thread would not be properly resumed as well, so
+ * just avoid that whole landmine.
+ * - If the new thread's priority is not a realtime priority and
+ * the current thread's priority is not an idle priority and
+ * FULL_PREEMPTION is disabled.
+ *
+ * If all of these conditions are false, but the current thread is in
+ * a nested critical section, then we have to defer the preemption
+ * until we exit the critical section. Otherwise, switch immediately
+ * to the new thread.
+ */
+ ctd = curthread;
+ pri = td->td_priority;
+ cpri = ctd->td_priority;
+ if (pri >= cpri || cold /* || dumping */ || TD_IS_INHIBITED(ctd) ||
+ td->td_kse->ke_state != KES_THREAD)
+ return (0);
+#ifndef FULL_PREEMPTION
+ if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) &&
+ !(cpri >= PRI_MIN_IDLE))
+ return (0);
+#endif
+ if (ctd->td_critnest > 1) {
+ CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
+ ctd->td_critnest);
+ ctd->td_flags |= TDF_OWEPREEMPT;
+ return (0);
+ }
+
+ /*
+ * Our thread state says that we are already on a run queue, so
+ * update our state as if we had been dequeued by choosethread().
+ */
+ MPASS(TD_ON_RUNQ(td));
+ TD_SET_RUNNING(td);
+ CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
+ td->td_proc->p_pid, td->td_proc->p_comm);
+ mi_switch(SW_INVOL, td);
+ return (1);
+#else
+ return (0);
+#endif
+}
+
+#ifndef PREEMPTION
+/* XXX: There should be a non-static version of this. */
+static void
+printf_caddr_t(void *data)
+{
+ printf("%s", (char *)data);
+}
+static char preempt_warning[] =
+ "WARNING: Kernel preemption is disabled, expect reduced performance.\n";
+SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t,
+ preempt_warning)
+#endif
/************************************************************************
* SYSTEM RUN QUEUE manipulations and tests *
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 5c211d1..f8f4dd7 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -299,7 +299,9 @@ mi_switch(int flags, struct thread *newtd)
if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td))
mtx_assert(&Giant, MA_NOTOWNED);
#endif
- KASSERT(td->td_critnest == 1,
+ KASSERT(td->td_critnest == 1 || (td->td_critnest == 2 &&
+ (td->td_flags & TDF_OWEPREEMPT) != 0 && (flags & SW_INVOL) != 0 &&
+ newtd == NULL),
("mi_switch: switch in a critical section"));
KASSERT((flags & (SW_INVOL | SW_VOL)) != 0,
("mi_switch: switch must be voluntary or involuntary"));
@@ -308,6 +310,7 @@ mi_switch(int flags, struct thread *newtd)
p->p_stats->p_ru.ru_nvcsw++;
else
p->p_stats->p_ru.ru_nivcsw++;
+
/*
* Compute the amount of time during which the current
* process was running, and add that to its total so far.
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 5d8961e..b2ae3dd 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -654,7 +654,7 @@ sched_switch(struct thread *td, struct thread *newtd)
sched_tdcnt++;
td->td_lastcpu = td->td_oncpu;
td->td_last_kse = ke;
- td->td_flags &= ~TDF_NEEDRESCHED;
+ td->td_flags &= ~(TDF_NEEDRESCHED | TDF_OWEPREEMPT);
td->td_oncpu = NOCPU;
/*
* At the last moment, if this thread is still marked RUNNING,
@@ -712,6 +712,16 @@ sched_add(struct thread *td)
ke->ke_proc->p_comm));
KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
("sched_add: process swapped out"));
+
+#ifdef SMP
+ /*
+ * Only try to preempt if the thread is unpinned or pinned to the
+ * current CPU.
+ */
+ if (KSE_CAN_MIGRATE(ke) || ke->ke_runq == &runq_pcpu[PCPU_GET(cpuid)])
+#endif
+ if (maybe_preempt(td))
+ return;
ke->ke_ksegrp->kg_runq_kses++;
ke->ke_state = KES_ONRUNQ;
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index 6340b71..ac2b0c6 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -1139,7 +1139,7 @@ sched_switch(struct thread *td, struct thread *newtd)
td->td_last_kse = ke;
td->td_lastcpu = td->td_oncpu;
td->td_oncpu = NOCPU;
- td->td_flags &= ~TDF_NEEDRESCHED;
+ td->td_flags &= ~(TDF_NEEDRESCHED | TDF_OWEPREEMPT);
/*
* If the KSE has been assigned it may be in the process of switching
@@ -1623,6 +1623,15 @@ sched_add(struct thread *td)
if (td->td_priority < curthread->td_priority)
curthread->td_flags |= TDF_NEEDRESCHED;
+#ifdef SMP
+ /*
+ * Only try to preempt if the thread is unpinned or pinned to the
+ * current CPU.
+ */
+ if (KSE_CAN_MIGRATE(ke) || ke->ke_cpu == PCPU_GET(cpuid))
+#endif
+ if (maybe_preempt(td))
+ return;
ke->ke_ksegrp->kg_runq_kses++;
ke->ke_state = KES_ONRUNQ;
diff --git a/sys/powerpc/powerpc/intr_machdep.c b/sys/powerpc/powerpc/intr_machdep.c
index 991c499..0cde8c5 100644
--- a/sys/powerpc/powerpc/intr_machdep.c
+++ b/sys/powerpc/powerpc/intr_machdep.c
@@ -308,7 +308,7 @@ sched_ithd(void *cookie)
ih = (struct intr_handler *)cookie;
- error = ithread_schedule(ih->ih_ithd, 0);
+ error = ithread_schedule(ih->ih_ithd);
if (error == EINVAL)
intr_stray_handler(ih);
diff --git a/sys/sparc64/sparc64/intr_machdep.c b/sys/sparc64/sparc64/intr_machdep.c
index 5fc0bd0..c9ba8ea 100644
--- a/sys/sparc64/sparc64/intr_machdep.c
+++ b/sys/sparc64/sparc64/intr_machdep.c
@@ -230,11 +230,7 @@ sched_ithd(void *cookie)
int error;
iv = cookie;
-#ifdef notyet
error = ithread_schedule(iv->iv_ithd);
-#else
- error = ithread_schedule(iv->iv_ithd, 0);
-#endif
if (error == EINVAL)
intr_stray_vector(iv);
}
diff --git a/sys/sys/interrupt.h b/sys/sys/interrupt.h
index 7f7ddef..28d6413 100644
--- a/sys/sys/interrupt.h
+++ b/sys/sys/interrupt.h
@@ -122,7 +122,7 @@ int ithread_add_handler(struct ithd *ithread, const char *name,
driver_intr_t handler, void *arg, u_char pri, enum intr_type flags,
void **cookiep);
int ithread_remove_handler(void *cookie);
-int ithread_schedule(struct ithd *ithread, int do_switch);
+int ithread_schedule(struct ithd *ithread);
int swi_add(struct ithd **ithdp, const char *name,
driver_intr_t handler, void *arg, int pri, enum intr_type flags,
void **cookiep);
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 6e268b2..8246c59 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -346,6 +346,7 @@ struct thread {
#define TDF_IDLETD 0x000020 /* This is one of the per-CPU idle threads. */
#define TDF_SELECT 0x000040 /* Selecting; wakeup/waiting danger. */
#define TDF_TSNOBLOCK 0x000100 /* Don't block on a turnstile due to race. */
+#define TDF_OWEPREEMPT 0x000200 /* Thread has a pending preemption. */
#define TDF_ASTPENDING 0x000800 /* Thread has some asynchronous events. */
#define TDF_TIMOFAIL 0x001000 /* Timeout from sleep after we were awake. */
#define TDF_INTERRUPT 0x002000 /* Thread is marked as interrupted. */
@@ -850,6 +851,7 @@ void fork_exit(void (*)(void *, struct trapframe *), void *,
void fork_return(struct thread *, struct trapframe *);
int inferior(struct proc *p);
int leavepgrp(struct proc *p);
+int maybe_preempt(struct thread *td);
void mi_switch(int flags, struct thread *newtd);
int p_candebug(struct thread *td, struct proc *p);
int p_cansee(struct thread *td, struct proc *p);
diff --git a/sys/vm/vm_zeroidle.c b/sys/vm/vm_zeroidle.c
index 71b7a71..bf0e5f2 100644
--- a/sys/vm/vm_zeroidle.c
+++ b/sys/vm/vm_zeroidle.c
@@ -151,12 +151,14 @@ vm_pagezero(void __unused *arg)
for (;;) {
if (vm_page_zero_check()) {
pages += vm_page_zero_idle();
+#ifndef PREEMPTION
if (pages > idlezero_maxrun || sched_runnable()) {
mtx_lock_spin(&sched_lock);
mi_switch(SW_VOL, NULL);
mtx_unlock_spin(&sched_lock);
pages = 0;
}
+#endif
} else {
tsleep(&zero_state, pri, "pgzero", hz * 300);
pages = 0;
OpenPOWER on IntegriCloud