From b749c76e4e8c3e48596eef4a3bef997bd3be47f9 Mon Sep 17 00:00:00 2001 From: attilio Date: Mon, 25 Jan 2010 12:05:51 +0000 Subject: [PATCH] MFC r201879: Introduce the new kernel thread called "deadlock resolver". It is used in order to seek within the threads state and heuristically understand if there is any deadlock happening. In order to implement it, the sq_type in sleepqueues is mandatory and not only compiled along with INVARIANTS option. Additively, a new sleepqueue function, sleepq_type() is added, returning the type of the sleepqueue linked to a wchan. Three new sysctls are added in order to configure the thread: debug.deadlkres.slptime_threshold debug.deadlkres.blktime_threshold debug.deadlkres.sleepfreq rappresenting the thresholds for sleep and block time that will lead to a deadlock matching (when exceeded), while the sleepfreq rappresents the number of seconds between 2 consecutive thread runnings. In order to enable the deadlock resolver thread recompile your kernel with the option DEADLKRES. Sponsored by: Sandvine Incorporated git-svn-id: svn://svn.freebsd.org/base/stable/8@202966 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- UPDATING | 5 ++ share/man/man9/sleepqueue.9 | 11 +++- sys/conf/NOTES | 5 ++ sys/conf/options | 1 + sys/kern/kern_clock.c | 122 +++++++++++++++++++++++++++++++++++- sys/kern/subr_sleepqueue.c | 28 ++++++++- sys/kern/subr_turnstile.c | 2 + sys/sys/proc.h | 1 + sys/sys/sleepqueue.h | 1 + 9 files changed, 171 insertions(+), 5 deletions(-) diff --git a/UPDATING b/UPDATING index cba0fb95f..288925611 100644 --- a/UPDATING +++ b/UPDATING @@ -15,6 +15,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW ON IA64 OR SUN4V: debugging tools present in HEAD were left in place because sun4v support still needs work to become production ready. +20100125: + Introduce the kernel thread "deadlock resolver" (which can be enabled + via the DEADLKRES option, see NOTES for more details) and the + sleepq_type() function for sleepqueues. + 20090929: 802.11s D3.03 support was committed. This is incompatible with the previous code, which was based on D3.0. diff --git a/share/man/man9/sleepqueue.9 b/share/man/man9/sleepqueue.9 index d1d17cd63..63d0ebbfd 100644 --- a/share/man/man9/sleepqueue.9 +++ b/share/man/man9/sleepqueue.9 @@ -23,7 +23,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 18, 2010 +.Dd January 25, 2010 .Dt SLEEPQUEUE 9 .Os .Sh NAME @@ -44,6 +44,7 @@ .Nm sleepq_sleepcnt , .Nm sleepq_timedwait , .Nm sleepq_timedwait_sig , +.Nm sleepq_type , .Nm sleepq_wait , .Nm sleepq_wait_sig .Nd manage the queues of sleeping threads @@ -84,6 +85,8 @@ .Fn sleepq_timedwait "void *wchan" .Ft int .Fn sleepq_timedwait_sig "void *wchan" "int signal_caught" +.Ft int +.Fn sleepq_type "void *wchan" .Ft void .Fn sleepq_wait "void *wchan" .Ft int @@ -366,6 +369,12 @@ given a .Fa wchan . .Pp The +.Fn sleepq_type +function returns the type of +.Fa wchan +associated to a sleepqueue. +.Pp +The .Fn sleepq_abort , .Fn sleepq_broadcast , and diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 3c2a99b66..3d737246a 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2472,6 +2472,11 @@ options BOOTP_BLOCKSIZE=8192 # Override NFS block size # options SW_WATCHDOG +# +# Add the software deadlock resolver thread. +# +options DEADLKRES + # # Disable swapping of stack pages. This option removes all # code which actually performs swapping, so it's not possible to turn diff --git a/sys/conf/options b/sys/conf/options index 540555956..e336fdc84 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -72,6 +72,7 @@ COMPAT_FREEBSD6 opt_compat.h COMPAT_FREEBSD7 opt_compat.h COMPILING_LINT opt_global.h CY_PCI_FASTINTR +DEADLKRES opt_watchdog.h DIRECTIO FULL_PREEMPTION opt_sched.h IPI_PREEMPTION opt_sched.h diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index e95bc1915..2844103fa 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -48,14 +48,16 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -159,6 +161,124 @@ sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); +#ifdef DEADLKRES +static int slptime_threshold = 1800; +static int blktime_threshold = 900; +static int sleepfreq = 3; + +static void +deadlkres(void) +{ + struct proc *p; + struct thread *td; + void *wchan; + int blkticks, slpticks, slptype, tryl, tticks; + + tryl = 0; + for (;;) { + blkticks = blktime_threshold * hz; + slpticks = slptime_threshold * hz; + + /* + * Avoid to sleep on the sx_lock in order to avoid a possible + * priority inversion problem leading to starvation. + * If the lock can't be held after 100 tries, panic. + */ + if (!sx_try_slock(&allproc_lock)) { + if (tryl > 100) + panic("%s: possible deadlock detected on allproc_lock\n", + __func__); + tryl++; + pause("allproc_lock deadlkres", sleepfreq * hz); + continue; + } + tryl = 0; + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + if (TD_ON_LOCK(td)) { + + /* + * The thread should be blocked on a + * turnstile, simply check if the + * turnstile channel is in good state. + */ + MPASS(td->td_blocked != NULL); + tticks = ticks - td->td_blktick; + thread_unlock(td); + if (tticks > blkticks) { + + /* + * Accordingly with provided + * thresholds, this thread is + * stuck for too long on a + * turnstile. + */ + PROC_UNLOCK(p); + sx_sunlock(&allproc_lock); + panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", + __func__, td, tticks); + } + } else if (TD_IS_SLEEPING(td)) { + + /* + * Check if the thread is sleeping on a + * lock, otherwise skip the check. + * Drop the thread lock in order to + * avoid a LOR with the sleepqueue + * spinlock. + */ + wchan = td->td_wchan; + tticks = ticks - td->td_slptick; + thread_unlock(td); + slptype = sleepq_type(wchan); + if ((slptype == SLEEPQ_SX || + slptype == SLEEPQ_LK) && + tticks > slpticks) { + + /* + * Accordingly with provided + * thresholds, this thread is + * stuck for too long on a + * sleepqueue. + */ + PROC_UNLOCK(p); + sx_sunlock(&allproc_lock); + panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", + __func__, td, tticks); + } + } else + thread_unlock(td); + } + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + + /* Sleep for sleepfreq seconds. */ + pause("deadlkres", sleepfreq * hz); + } +} + +static struct kthread_desc deadlkres_kd = { + "deadlkres", + deadlkres, + (struct thread **)NULL +}; + +SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); + +SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, "Deadlock resolver"); +SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW, + &slptime_threshold, 0, + "Number of seconds within is valid to sleep on a sleepqueue"); +SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW, + &blktime_threshold, 0, + "Number of seconds within is valid to block on a turnstile"); +SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0, + "Number of seconds between any deadlock resolver thread run"); +#endif /* DEADLKRES */ + void read_cpu_time(long *cp_time) { diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c index a0496bdaa..5df74d06e 100644 --- a/sys/kern/subr_sleepqueue.c +++ b/sys/kern/subr_sleepqueue.c @@ -122,8 +122,8 @@ struct sleepqueue { LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */ LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */ void *sq_wchan; /* (c) Wait channel. */ -#ifdef INVARIANTS int sq_type; /* (c) Queue type. */ +#ifdef INVARIANTS struct lock_object *sq_lock; /* (c) Associated lock. */ #endif }; @@ -317,7 +317,6 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, ("thread's sleep queue has a non-empty free list")); KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer")); sq->sq_lock = lock; - sq->sq_type = flags & SLEEPQ_TYPE; #endif #ifdef SLEEPQUEUE_PROFILING sc->sc_depth++; @@ -330,6 +329,7 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, sq = td->td_sleepqueue; LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash); sq->sq_wchan = wchan; + sq->sq_type = flags & SLEEPQ_TYPE; } else { MPASS(wchan == sq->sq_wchan); MPASS(lock == sq->sq_lock); @@ -668,6 +668,28 @@ sleepq_timedwait_sig(void *wchan, int pri) return (rvalt); } +/* + * Returns the type of sleepqueue given a waitchannel. + */ +int +sleepq_type(void *wchan) +{ + struct sleepqueue *sq; + int type; + + MPASS(wchan != NULL); + + sleepq_lock(wchan); + sq = sleepq_lookup(wchan); + if (sq == NULL) { + sleepq_release(wchan); + return (-1); + } + type = sq->sq_type; + sleepq_release(wchan); + return (type); +} + /* * Removes a thread from a sleep queue and makes it * runnable. @@ -1176,8 +1198,8 @@ DB_SHOW_COMMAND(sleepq, db_show_sleepqueue) return; found: db_printf("Wait channel: %p\n", sq->sq_wchan); -#ifdef INVARIANTS db_printf("Queue type: %d\n", sq->sq_type); +#ifdef INVARIANTS if (sq->sq_lock) { lock = sq->sq_lock; db_printf("Associated Interlock: %p - (%s) %s\n", lock, diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c index 31c8cfc48..d8f774899 100644 --- a/sys/kern/subr_turnstile.c +++ b/sys/kern/subr_turnstile.c @@ -733,6 +733,7 @@ turnstile_wait(struct turnstile *ts, struct thread *owner, int queue) td->td_tsqueue = queue; td->td_blocked = ts; td->td_lockname = lock->lo_name; + td->td_blktick = ticks; TD_SET_LOCK(td); mtx_unlock_spin(&tc->tc_lock); propagate_priority(td); @@ -925,6 +926,7 @@ turnstile_unpend(struct turnstile *ts, int owner_type) MPASS(TD_CAN_RUN(td)); td->td_blocked = NULL; td->td_lockname = NULL; + td->td_blktick = 0; #ifdef INVARIANTS td->td_tsqueue = 0xff; #endif diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 642a6307f..c0af11f6b 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -217,6 +217,7 @@ struct thread { struct ucred *td_ucred; /* (k) Reference to credentials. */ u_int td_estcpu; /* (t) estimated cpu utilization */ int td_slptick; /* (t) Time at sleep. */ + int td_blktick; /* (t) Time spent blocked. */ struct rusage td_ru; /* (t) rusage information */ uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */ uint64_t td_runtime; /* (t) How many cpu ticks we've run. */ diff --git a/sys/sys/sleepqueue.h b/sys/sys/sleepqueue.h index 224d602c0..3e33e6b34 100644 --- a/sys/sys/sleepqueue.h +++ b/sys/sys/sleepqueue.h @@ -112,6 +112,7 @@ void sleepq_set_timeout(void *wchan, int timo); u_int sleepq_sleepcnt(void *wchan, int queue); int sleepq_timedwait(void *wchan, int pri); int sleepq_timedwait_sig(void *wchan, int pri); +int sleepq_type(void *wchan); void sleepq_wait(void *wchan, int pri); int sleepq_wait_sig(void *wchan, int pri); -- 2.45.0