summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorattilio <attilio@FreeBSD.org>2010-01-09 01:46:38 +0000
committerattilio <attilio@FreeBSD.org>2010-01-09 01:46:38 +0000
commitfde84f320b44c90f431d0a4145d7b83d2a221253 (patch)
treee46dd2a25d662a40bd7324ff4489431a2403931d
parentda19d81aaa4a2ec26f7ad15e4c609a36f868e9e0 (diff)
downloadFreeBSD-src-fde84f320b44c90f431d0a4145d7b83d2a221253.zip
FreeBSD-src-fde84f320b44c90f431d0a4145d7b83d2a221253.tar.gz
Introduce the new kernel thread called "deadlock resolver".
While the name is pretentious, a good explanation of its targets is reported in this 17 months old presentation e-mail: http://lists.freebsd.org/pipermail/freebsd-arch/2008-August/008452.html In order to implement it, the sq_type in sleepqueues is mandatory and not only compiled along with INVARIANTS option. Additively, a new sleepqueue function, sleepq_type() is added, returning the type of the sleepqueue linked to a wchan. Three new sysctls are added in order to configure the thread: debug.deadlkres.slptime_threshold debug.deadlkres.blktime_threshold debug.deadlkres.sleepfreq rappresenting the thresholds for sleep and block time that will lead to a deadlock matching (when exceeded), while the sleepfreq rappresents the number of seconds between 2 consecutive thread runnings. In order to enable the deadlock resolver thread recompile your kernel with the option DEADLKRES. Reviewed by: jeff Tested by: pho, Giovanni Trematerra Sponsored by: Nokia Incorporated, Sandvine Incorporated MFC after: 2 weeks
-rw-r--r--UPDATING5
-rw-r--r--share/man/man9/sleepqueue.911
-rw-r--r--sys/conf/NOTES5
-rw-r--r--sys/conf/options1
-rw-r--r--sys/kern/kern_clock.c122
-rw-r--r--sys/kern/subr_sleepqueue.c28
-rw-r--r--sys/kern/subr_turnstile.c2
-rw-r--r--sys/sys/proc.h1
-rw-r--r--sys/sys/sleepqueue.h1
9 files changed, 171 insertions, 5 deletions
diff --git a/UPDATING b/UPDATING
index f730b25..8e0ad19 100644
--- a/UPDATING
+++ b/UPDATING
@@ -22,6 +22,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.x IS SLOW:
machines to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
+20100108:
+ Introduce the kernel thread "deadlock resolver" (which can be enabled
+ via the DEADLKRES option, see NOTES for more details) and the
+ sleepq_type() function for sleepqueues.
+
20091202:
The rc.firewall and rc.firewall6 were unified, and
rc.firewall6 and rc.d/ip6fw were removed.
diff --git a/share/man/man9/sleepqueue.9 b/share/man/man9/sleepqueue.9
index 36cab69..3c0e9a5 100644
--- a/share/man/man9/sleepqueue.9
+++ b/share/man/man9/sleepqueue.9
@@ -23,7 +23,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd December 12, 2009
+.Dd January 8, 2010
.Dt SLEEPQUEUE 9
.Os
.Sh NAME
@@ -44,6 +44,7 @@
.Nm sleepq_sleepcnt ,
.Nm sleepq_timedwait ,
.Nm sleepq_timedwait_sig ,
+.Nm sleepq_type ,
.Nm sleepq_wait ,
.Nm sleepq_wait_sig
.Nd manage the queues of sleeping threads
@@ -84,6 +85,8 @@
.Fn sleepq_timedwait "void *wchan"
.Ft int
.Fn sleepq_timedwait_sig "void *wchan" "int signal_caught"
+.Ft int
+.Fn sleepq_type "void *wchan"
.Ft void
.Fn sleepq_wait "void *wchan"
.Ft int
@@ -366,6 +369,12 @@ given a
.Fa wchan .
.Pp
The
+.Fn sleepq_type
+function returns the type of
+.Fa wchan
+associated to a sleepqueue.
+.Pp
+The
.Fn sleepq_abort ,
.Fn sleepq_broadcast ,
and
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index b57b50f..8e949b5 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -2531,6 +2531,11 @@ options BOOTP_BLOCKSIZE=8192 # Override NFS block size
options SW_WATCHDOG
#
+# Add the software deadlock resolver thread.
+#
+options DEADLKRES
+
+#
# Disable swapping of stack pages. This option removes all
# code which actually performs swapping, so it's not possible to turn
# it back on at run-time.
diff --git a/sys/conf/options b/sys/conf/options
index 75f69c2..84bfbd1 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -72,6 +72,7 @@ COMPAT_FREEBSD6 opt_compat.h
COMPAT_FREEBSD7 opt_compat.h
COMPILING_LINT opt_global.h
CY_PCI_FASTINTR
+DEADLKRES opt_watchdog.h
DIRECTIO
FULL_PREEMPTION opt_sched.h
IPI_PREEMPTION opt_sched.h
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index e95bc19..2844103 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -48,14 +48,16 @@ __FBSDID("$FreeBSD$");
#include <sys/callout.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
-#include <sys/lock.h>
+#include <sys/kthread.h>
#include <sys/ktr.h>
+#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/signalvar.h>
+#include <sys/sleepqueue.h>
#include <sys/smp.h>
#include <vm/vm.h>
#include <vm/pmap.h>
@@ -159,6 +161,124 @@ sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
+#ifdef DEADLKRES
+static int slptime_threshold = 1800;
+static int blktime_threshold = 900;
+static int sleepfreq = 3;
+
+static void
+deadlkres(void)
+{
+ struct proc *p;
+ struct thread *td;
+ void *wchan;
+ int blkticks, slpticks, slptype, tryl, tticks;
+
+ tryl = 0;
+ for (;;) {
+ blkticks = blktime_threshold * hz;
+ slpticks = slptime_threshold * hz;
+
+ /*
+ * Avoid to sleep on the sx_lock in order to avoid a possible
+ * priority inversion problem leading to starvation.
+ * If the lock can't be held after 100 tries, panic.
+ */
+ if (!sx_try_slock(&allproc_lock)) {
+ if (tryl > 100)
+ panic("%s: possible deadlock detected on allproc_lock\n",
+ __func__);
+ tryl++;
+ pause("allproc_lock deadlkres", sleepfreq * hz);
+ continue;
+ }
+ tryl = 0;
+ FOREACH_PROC_IN_SYSTEM(p) {
+ PROC_LOCK(p);
+ FOREACH_THREAD_IN_PROC(p, td) {
+ thread_lock(td);
+ if (TD_ON_LOCK(td)) {
+
+ /*
+ * The thread should be blocked on a
+ * turnstile, simply check if the
+ * turnstile channel is in good state.
+ */
+ MPASS(td->td_blocked != NULL);
+ tticks = ticks - td->td_blktick;
+ thread_unlock(td);
+ if (tticks > blkticks) {
+
+ /*
+ * Accordingly with provided
+ * thresholds, this thread is
+ * stuck for too long on a
+ * turnstile.
+ */
+ PROC_UNLOCK(p);
+ sx_sunlock(&allproc_lock);
+ panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
+ __func__, td, tticks);
+ }
+ } else if (TD_IS_SLEEPING(td)) {
+
+ /*
+ * Check if the thread is sleeping on a
+ * lock, otherwise skip the check.
+ * Drop the thread lock in order to
+ * avoid a LOR with the sleepqueue
+ * spinlock.
+ */
+ wchan = td->td_wchan;
+ tticks = ticks - td->td_slptick;
+ thread_unlock(td);
+ slptype = sleepq_type(wchan);
+ if ((slptype == SLEEPQ_SX ||
+ slptype == SLEEPQ_LK) &&
+ tticks > slpticks) {
+
+ /*
+ * Accordingly with provided
+ * thresholds, this thread is
+ * stuck for too long on a
+ * sleepqueue.
+ */
+ PROC_UNLOCK(p);
+ sx_sunlock(&allproc_lock);
+ panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
+ __func__, td, tticks);
+ }
+ } else
+ thread_unlock(td);
+ }
+ PROC_UNLOCK(p);
+ }
+ sx_sunlock(&allproc_lock);
+
+ /* Sleep for sleepfreq seconds. */
+ pause("deadlkres", sleepfreq * hz);
+ }
+}
+
+static struct kthread_desc deadlkres_kd = {
+ "deadlkres",
+ deadlkres,
+ (struct thread **)NULL
+};
+
+SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
+
+SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, "Deadlock resolver");
+SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW,
+ &slptime_threshold, 0,
+ "Number of seconds within is valid to sleep on a sleepqueue");
+SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW,
+ &blktime_threshold, 0,
+ "Number of seconds within is valid to block on a turnstile");
+SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0,
+ "Number of seconds between any deadlock resolver thread run");
+#endif /* DEADLKRES */
+
void
read_cpu_time(long *cp_time)
{
diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c
index a0496bd..5df74d0 100644
--- a/sys/kern/subr_sleepqueue.c
+++ b/sys/kern/subr_sleepqueue.c
@@ -122,8 +122,8 @@ struct sleepqueue {
LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */
LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */
void *sq_wchan; /* (c) Wait channel. */
-#ifdef INVARIANTS
int sq_type; /* (c) Queue type. */
+#ifdef INVARIANTS
struct lock_object *sq_lock; /* (c) Associated lock. */
#endif
};
@@ -317,7 +317,6 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
("thread's sleep queue has a non-empty free list"));
KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
sq->sq_lock = lock;
- sq->sq_type = flags & SLEEPQ_TYPE;
#endif
#ifdef SLEEPQUEUE_PROFILING
sc->sc_depth++;
@@ -330,6 +329,7 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
sq = td->td_sleepqueue;
LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
sq->sq_wchan = wchan;
+ sq->sq_type = flags & SLEEPQ_TYPE;
} else {
MPASS(wchan == sq->sq_wchan);
MPASS(lock == sq->sq_lock);
@@ -669,6 +669,28 @@ sleepq_timedwait_sig(void *wchan, int pri)
}
/*
+ * Returns the type of sleepqueue given a waitchannel.
+ */
+int
+sleepq_type(void *wchan)
+{
+ struct sleepqueue *sq;
+ int type;
+
+ MPASS(wchan != NULL);
+
+ sleepq_lock(wchan);
+ sq = sleepq_lookup(wchan);
+ if (sq == NULL) {
+ sleepq_release(wchan);
+ return (-1);
+ }
+ type = sq->sq_type;
+ sleepq_release(wchan);
+ return (type);
+}
+
+/*
* Removes a thread from a sleep queue and makes it
* runnable.
*/
@@ -1176,8 +1198,8 @@ DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
return;
found:
db_printf("Wait channel: %p\n", sq->sq_wchan);
-#ifdef INVARIANTS
db_printf("Queue type: %d\n", sq->sq_type);
+#ifdef INVARIANTS
if (sq->sq_lock) {
lock = sq->sq_lock;
db_printf("Associated Interlock: %p - (%s) %s\n", lock,
diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c
index 2aad232..114b139 100644
--- a/sys/kern/subr_turnstile.c
+++ b/sys/kern/subr_turnstile.c
@@ -733,6 +733,7 @@ turnstile_wait(struct turnstile *ts, struct thread *owner, int queue)
td->td_tsqueue = queue;
td->td_blocked = ts;
td->td_lockname = lock->lo_name;
+ td->td_blktick = ticks;
TD_SET_LOCK(td);
mtx_unlock_spin(&tc->tc_lock);
propagate_priority(td);
@@ -925,6 +926,7 @@ turnstile_unpend(struct turnstile *ts, int owner_type)
MPASS(TD_CAN_RUN(td));
td->td_blocked = NULL;
td->td_lockname = NULL;
+ td->td_blktick = 0;
#ifdef INVARIANTS
td->td_tsqueue = 0xff;
#endif
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index b88cc62..0ae36af 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -218,6 +218,7 @@ struct thread {
struct ucred *td_ucred; /* (k) Reference to credentials. */
u_int td_estcpu; /* (t) estimated cpu utilization */
int td_slptick; /* (t) Time at sleep. */
+ int td_blktick; /* (t) Time spent blocked. */
struct rusage td_ru; /* (t) rusage information */
uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */
uint64_t td_runtime; /* (t) How many cpu ticks we've run. */
diff --git a/sys/sys/sleepqueue.h b/sys/sys/sleepqueue.h
index 224d602..3e33e6b 100644
--- a/sys/sys/sleepqueue.h
+++ b/sys/sys/sleepqueue.h
@@ -112,6 +112,7 @@ void sleepq_set_timeout(void *wchan, int timo);
u_int sleepq_sleepcnt(void *wchan, int queue);
int sleepq_timedwait(void *wchan, int pri);
int sleepq_timedwait_sig(void *wchan, int pri);
+int sleepq_type(void *wchan);
void sleepq_wait(void *wchan, int pri);
int sleepq_wait_sig(void *wchan, int pri);
OpenPOWER on IntegriCloud