]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/subr_witness.c
This commit was generated by cvs2svn to compensate for changes in r106163,
[FreeBSD/FreeBSD.git] / sys / kern / subr_witness.c
1 /*-
2  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Berkeley Software Design Inc's name may not be used to endorse or
13  *    promote products derived from this software without specific prior
14  *    written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  *      from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29  *      and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30  * $FreeBSD$
31  */
32
33 /*
34  * Implementation of the `witness' lock verifier.  Originally implemented for
35  * mutexes in BSD/OS.  Extended to handle generic lock objects and lock
36  * classes in FreeBSD.
37  */
38
39 /*
40  *      Main Entry: witness
41  *      Pronunciation: 'wit-n&s
42  *      Function: noun
43  *      Etymology: Middle English witnesse, from Old English witnes knowledge,
44  *          testimony, witness, from 2wit
45  *      Date: before 12th century
46  *      1 : attestation of a fact or event : TESTIMONY
47  *      2 : one that gives evidence; specifically : one who testifies in
48  *          a cause or before a judicial tribunal
49  *      3 : one asked to be present at a transaction so as to be able to
50  *          testify to its having taken place
51  *      4 : one who has personal knowledge of something
52  *      5 a : something serving as evidence or proof : SIGN
53  *        b : public affirmation by word or example of usually
54  *            religious faith or conviction <the heroic witness to divine
55  *            life -- Pilot>
56  *      6 capitalized : a member of the Jehovah's Witnesses 
57  */
58
59 #include "opt_ddb.h"
60 #include "opt_witness.h"
61
62 #include <sys/param.h>
63 #include <sys/bus.h>
64 #include <sys/kernel.h>
65 #include <sys/ktr.h>
66 #include <sys/lock.h>
67 #include <sys/malloc.h>
68 #include <sys/mutex.h>
69 #include <sys/proc.h>
70 #include <sys/sysctl.h>
71 #include <sys/systm.h>
72
73 #include <ddb/ddb.h>
74
75 /* Define this to check for blessed mutexes */
76 #undef BLESSING
77
78 #define WITNESS_COUNT 200
79 #define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4)
80 /*
81  * XXX: This is somewhat bogus, as we assume here that at most 1024 threads
82  * will hold LOCK_NCHILDREN * 2 locks.  We handle failure ok, and we should
83  * probably be safe for the most part, but it's still a SWAG.
84  */
85 #define LOCK_CHILDCOUNT (MAXCPU + 1024) * 2
86
87 #define WITNESS_NCHILDREN 6
88
89 struct witness_child_list_entry;
90
91 struct witness {
92         const   char *w_name;
93         struct  lock_class *w_class;
94         STAILQ_ENTRY(witness) w_list;           /* List of all witnesses. */
95         STAILQ_ENTRY(witness) w_typelist;       /* Witnesses of a type. */
96         struct  witness_child_list_entry *w_children;   /* Great evilness... */
97         const   char *w_file;
98         int     w_line;
99         u_int   w_level;
100         u_int   w_refcount;
101         u_char  w_Giant_squawked:1;
102         u_char  w_other_squawked:1;
103         u_char  w_same_squawked:1;
104 };
105
106 struct witness_child_list_entry {
107         struct  witness_child_list_entry *wcl_next;
108         struct  witness *wcl_children[WITNESS_NCHILDREN];
109         u_int   wcl_count;
110 };
111
112 STAILQ_HEAD(witness_list, witness);
113
114 #ifdef BLESSING
115 struct witness_blessed {
116         const   char *b_lock1;
117         const   char *b_lock2;
118 };
119 #endif
120
121 struct witness_order_list_entry {
122         const   char *w_name;
123         struct  lock_class *w_class;
124 };
125
126 static struct   witness *enroll(const char *description,
127                                 struct lock_class *lock_class);
128 static int      itismychild(struct witness *parent, struct witness *child);
129 static void     removechild(struct witness *parent, struct witness *child);
130 static int      isitmychild(struct witness *parent, struct witness *child);
131 static int      isitmydescendant(struct witness *parent, struct witness *child);
132 #ifdef BLESSING
133 static int      blessed(struct witness *, struct witness *);
134 #endif
135 static void     witness_displaydescendants(void(*)(const char *fmt, ...),
136                                            struct witness *);
137 static void     witness_leveldescendents(struct witness *parent, int level);
138 static void     witness_levelall(void);
139 static struct   witness *witness_get(void);
140 static void     witness_free(struct witness *m);
141 static struct   witness_child_list_entry *witness_child_get(void);
142 static void     witness_child_free(struct witness_child_list_entry *wcl);
143 static struct   lock_list_entry *witness_lock_list_get(void);
144 static void     witness_lock_list_free(struct lock_list_entry *lle);
145 static struct   lock_instance *find_instance(struct lock_list_entry *lock_list,
146                                              struct lock_object *lock);
147 #if defined(DDB)
148 static void     witness_display_list(void(*prnt)(const char *fmt, ...),
149                                      struct witness_list *list);
150 static void     witness_display(void(*)(const char *fmt, ...));
151 #endif
152
153 MALLOC_DEFINE(M_WITNESS, "witness", "witness structure");
154
155 static int witness_watch = 1;
156 TUNABLE_INT("debug.witness_watch", &witness_watch);
157 SYSCTL_INT(_debug, OID_AUTO, witness_watch, CTLFLAG_RD, &witness_watch, 0, "");
158
159 #ifdef DDB
160 /*
161  * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
162  * drop into kdebug() when:
163  *      - a lock heirarchy violation occurs
164  *      - locks are held when going to sleep.
165  */
166 #ifdef WITNESS_DDB
167 int     witness_ddb = 1;
168 #else
169 int     witness_ddb = 0;
170 #endif
171 TUNABLE_INT("debug.witness_ddb", &witness_ddb);
172 SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
173 #endif /* DDB */
174
175 #ifdef WITNESS_SKIPSPIN
176 int     witness_skipspin = 1;
177 #else
178 int     witness_skipspin = 0;
179 #endif
180 TUNABLE_INT("debug.witness_skipspin", &witness_skipspin);
181 SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
182     "");
183
184 static struct mtx w_mtx;
185 static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free);
186 static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all);
187 static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin);
188 static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep);
189 static struct witness_child_list_entry *w_child_free = NULL;
190 static struct lock_list_entry *w_lock_list_free = NULL;
191 static int witness_dead;        /* fatal error, probably no memory */
192
193 static struct witness w_data[WITNESS_COUNT];
194 static struct witness_child_list_entry w_childdata[WITNESS_CHILDCOUNT];
195 static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT];
196
197 static struct witness_order_list_entry order_lists[] = {
198         { "Giant", &lock_class_mtx_sleep },
199         { "proctree", &lock_class_sx },
200         { "allproc", &lock_class_sx },
201         { "sigio lock", &lock_class_mtx_sleep },
202         { "process group", &lock_class_mtx_sleep },
203         { "process lock", &lock_class_mtx_sleep },
204         { "session", &lock_class_mtx_sleep },
205         { "uidinfo hash", &lock_class_mtx_sleep },
206         { "uidinfo struct", &lock_class_mtx_sleep },
207         { NULL, NULL },
208         /*
209          * spin locks
210          */
211 #ifdef SMP
212         { "ap boot", &lock_class_mtx_spin },
213 #ifdef __i386__
214         { "com", &lock_class_mtx_spin },
215 #endif
216 #endif
217         { "sio", &lock_class_mtx_spin },
218 #ifdef __i386__
219         { "cy", &lock_class_mtx_spin },
220 #endif
221         { "sabtty", &lock_class_mtx_spin },
222         { "ng_node", &lock_class_mtx_spin },
223         { "ng_worklist", &lock_class_mtx_spin },
224         { "ithread table lock", &lock_class_mtx_spin },
225         { "sched lock", &lock_class_mtx_spin },
226         { "callout", &lock_class_mtx_spin },
227         /*
228          * leaf locks
229          */
230         { "allpmaps", &lock_class_mtx_spin },
231         { "vm page buckets mutex", &lock_class_mtx_spin },
232         { "vm page queue free mutex", &lock_class_mtx_spin },
233         { "icu", &lock_class_mtx_spin },
234 #ifdef SMP
235         { "smp rendezvous", &lock_class_mtx_spin },
236 #if defined(__i386__) && defined(APIC_IO)
237         { "tlb", &lock_class_mtx_spin },
238 #endif
239 #endif
240         { "clk", &lock_class_mtx_spin },
241         { "mutex profiling lock", &lock_class_mtx_spin },
242         { "zombie_thread_lock", &lock_class_mtx_spin },
243         { "ALD Queue", &lock_class_mtx_spin },
244 #ifdef __ia64__
245         { "MCA spin lock", &lock_class_mtx_spin },
246 #endif
247         { NULL, NULL },
248         { NULL, NULL }
249 };
250
251 #ifdef BLESSING
252 /*
253  * Pairs of locks which have been blessed
254  * Don't complain about order problems with blessed locks
255  */
256 static struct witness_blessed blessed_list[] = {
257 };
258 static int blessed_count =
259         sizeof(blessed_list) / sizeof(struct witness_blessed);
260 #endif
261
262 /*
263  * List of all locks in the system.
264  */
265 TAILQ_HEAD(, lock_object) all_locks = TAILQ_HEAD_INITIALIZER(all_locks);
266
267 static struct mtx all_mtx = {
268         { &lock_class_mtx_sleep,        /* mtx_object.lo_class */
269           "All locks list",             /* mtx_object.lo_name */
270           "All locks list",             /* mtx_object.lo_type */
271           LO_INITIALIZED,               /* mtx_object.lo_flags */
272           { NULL, NULL },               /* mtx_object.lo_list */
273           NULL },                       /* mtx_object.lo_witness */
274         MTX_UNOWNED, 0,                 /* mtx_lock, mtx_recurse */
275         TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
276         { NULL, NULL }                  /* mtx_contested */
277 };
278
279 /*
280  * This global is set to 0 once it becomes safe to use the witness code.
281  */
282 static int witness_cold = 1;
283
284 /*
285  * Global variables for book keeping.
286  */
287 static int lock_cur_cnt;
288 static int lock_max_cnt;
289
290 /*
291  * The WITNESS-enabled diagnostic code.
292  */
293 static void
294 witness_initialize(void *dummy __unused)
295 {
296         struct lock_object *lock;
297         struct witness_order_list_entry *order;
298         struct witness *w, *w1;
299         int i;
300
301         /*
302          * We have to release Giant before initializing its witness
303          * structure so that WITNESS doesn't get confused.
304          */
305         mtx_unlock(&Giant);
306         mtx_assert(&Giant, MA_NOTOWNED);
307
308         CTR1(KTR_WITNESS, "%s: initializing witness", __func__);
309         TAILQ_INSERT_HEAD(&all_locks, &all_mtx.mtx_object, lo_list);
310         mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET |
311             MTX_NOWITNESS);
312         for (i = 0; i < WITNESS_COUNT; i++)
313                 witness_free(&w_data[i]);
314         for (i = 0; i < WITNESS_CHILDCOUNT; i++)
315                 witness_child_free(&w_childdata[i]);
316         for (i = 0; i < LOCK_CHILDCOUNT; i++)
317                 witness_lock_list_free(&w_locklistdata[i]);
318
319         /* First add in all the specified order lists. */
320         for (order = order_lists; order->w_name != NULL; order++) {
321                 w = enroll(order->w_name, order->w_class);
322                 if (w == NULL)
323                         continue;
324                 w->w_file = "order list";
325                 for (order++; order->w_name != NULL; order++) {
326                         w1 = enroll(order->w_name, order->w_class);
327                         if (w1 == NULL)
328                                 continue;
329                         w1->w_file = "order list";
330                         itismychild(w, w1);
331                         w = w1;
332                 }
333         }
334
335         /* Iterate through all locks and add them to witness. */
336         mtx_lock(&all_mtx);
337         TAILQ_FOREACH(lock, &all_locks, lo_list) {
338                 if (lock->lo_flags & LO_WITNESS)
339                         lock->lo_witness = enroll(lock->lo_type,
340                             lock->lo_class);
341                 else
342                         lock->lo_witness = NULL;
343         }
344         mtx_unlock(&all_mtx);
345
346         /* Mark the witness code as being ready for use. */
347         atomic_store_rel_int(&witness_cold, 0);
348
349         mtx_lock(&Giant);
350 }
351 SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, NULL)
352
353 void
354 witness_init(struct lock_object *lock)
355 {
356         struct lock_class *class;
357
358         class = lock->lo_class;
359         if (lock->lo_flags & LO_INITIALIZED)
360                 panic("%s: lock (%s) %s is already initialized", __func__,
361                     class->lc_name, lock->lo_name);
362         if ((lock->lo_flags & LO_RECURSABLE) != 0 &&
363             (class->lc_flags & LC_RECURSABLE) == 0)
364                 panic("%s: lock (%s) %s can not be recursable", __func__,
365                     class->lc_name, lock->lo_name);
366         if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
367             (class->lc_flags & LC_SLEEPABLE) == 0)
368                 panic("%s: lock (%s) %s can not be sleepable", __func__,
369                     class->lc_name, lock->lo_name);
370         if ((lock->lo_flags & LO_UPGRADABLE) != 0 &&
371             (class->lc_flags & LC_UPGRADABLE) == 0)
372                 panic("%s: lock (%s) %s can not be upgradable", __func__,
373                     class->lc_name, lock->lo_name);
374
375         mtx_lock(&all_mtx);
376         TAILQ_INSERT_TAIL(&all_locks, lock, lo_list);
377         lock->lo_flags |= LO_INITIALIZED;
378         lock_cur_cnt++;
379         if (lock_cur_cnt > lock_max_cnt)
380                 lock_max_cnt = lock_cur_cnt;
381         mtx_unlock(&all_mtx);
382         if (!witness_cold && !witness_dead && panicstr == NULL &&
383             (lock->lo_flags & LO_WITNESS) != 0)
384                 lock->lo_witness = enroll(lock->lo_type, class);
385         else
386                 lock->lo_witness = NULL;
387 }
388
389 void
390 witness_destroy(struct lock_object *lock)
391 {
392         struct witness *w;
393
394         if (witness_cold)
395                 panic("lock (%s) %s destroyed while witness_cold",
396                     lock->lo_class->lc_name, lock->lo_name);
397         if ((lock->lo_flags & LO_INITIALIZED) == 0)
398                 panic("%s: lock (%s) %s is not initialized", __func__,
399                     lock->lo_class->lc_name, lock->lo_name);
400
401         /* XXX: need to verify that no one holds the lock */
402         w = lock->lo_witness;
403         if (w != NULL) {
404                 mtx_lock_spin(&w_mtx);
405                 MPASS(w->w_refcount > 0);
406                 w->w_refcount--;
407                 mtx_unlock_spin(&w_mtx);
408         }
409
410         mtx_lock(&all_mtx);
411         lock_cur_cnt--;
412         TAILQ_REMOVE(&all_locks, lock, lo_list);
413         lock->lo_flags &= ~LO_INITIALIZED;
414         mtx_unlock(&all_mtx);
415 }
416
417 #if defined(DDB)
418 static void
419 witness_display_list(void(*prnt)(const char *fmt, ...),
420                      struct witness_list *list)
421 {
422         struct witness *w, *w1;
423         int found;
424
425         STAILQ_FOREACH(w, list, w_typelist) {
426                 if (w->w_file == NULL)
427                         continue;
428                 found = 0;
429                 STAILQ_FOREACH(w1, list, w_typelist) {
430                         if (isitmychild(w1, w)) {
431                                 found++;
432                                 break;
433                         }
434                 }
435                 if (found)
436                         continue;
437                 /*
438                  * This lock has no anscestors, display its descendants. 
439                  */
440                 witness_displaydescendants(prnt, w);
441         }
442 }
443         
444 static void
445 witness_display(void(*prnt)(const char *fmt, ...))
446 {
447         struct witness *w;
448
449         KASSERT(!witness_cold, ("%s: witness_cold", __func__));
450         witness_levelall();
451
452         /*
453          * First, handle sleep locks which have been acquired at least
454          * once.
455          */
456         prnt("Sleep locks:\n");
457         witness_display_list(prnt, &w_sleep);
458         
459         /*
460          * Now do spin locks which have been acquired at least once.
461          */
462         prnt("\nSpin locks:\n");
463         witness_display_list(prnt, &w_spin);
464         
465         /*
466          * Finally, any locks which have not been acquired yet.
467          */
468         prnt("\nLocks which were never acquired:\n");
469         STAILQ_FOREACH(w, &w_all, w_list) {
470                 if (w->w_file != NULL || w->w_refcount == 0)
471                         continue;
472                 prnt("%s\n", w->w_name);
473         }
474 }
475 #endif
476
477 void
478 witness_lock(struct lock_object *lock, int flags, const char *file, int line)
479 {
480         struct lock_list_entry **lock_list, *lle;
481         struct lock_instance *lock1, *lock2;
482         struct lock_class *class;
483         struct witness *w, *w1;
484         struct thread *td;
485         int i, j;
486 #ifdef DDB
487         int go_into_ddb = 0;
488 #endif /* DDB */
489
490         if (witness_cold || witness_dead || lock->lo_witness == NULL ||
491             panicstr != NULL)
492                 return;
493         w = lock->lo_witness;
494         class = lock->lo_class;
495         td = curthread;
496
497         if (class->lc_flags & LC_SLEEPLOCK) {
498                 /*
499                  * Since spin locks include a critical section, this check
500                  * impliclty enforces a lock order of all sleep locks before
501                  * all spin locks.
502                  */
503                 if (td->td_critnest != 0 && (flags & LOP_TRYLOCK) == 0)
504                         panic("blockable sleep lock (%s) %s @ %s:%d",
505                             class->lc_name, lock->lo_name, file, line);
506                 lock_list = &td->td_sleeplocks;
507         } else
508                 lock_list = PCPU_PTR(spinlocks);
509
510         /*
511          * Try locks do not block if they fail to acquire the lock, thus
512          * there is no danger of deadlocks or of switching while holding a
513          * spin lock if we acquire a lock via a try operation.
514          */
515         if (flags & LOP_TRYLOCK)
516                 goto out;
517
518         /*
519          * Is this the first lock acquired?  If so, then no order checking
520          * is needed.
521          */
522         if (*lock_list == NULL)
523                 goto out;
524
525         /*
526          * Check to see if we are recursing on a lock we already own.
527          */
528         lock1 = find_instance(*lock_list, lock);
529         if (lock1 != NULL) {
530                 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 &&
531                     (flags & LOP_EXCLUSIVE) == 0) {
532                         printf("shared lock of (%s) %s @ %s:%d\n",
533                             class->lc_name, lock->lo_name, file, line);
534                         printf("while exclusively locked from %s:%d\n",
535                             lock1->li_file, lock1->li_line);
536                         panic("share->excl");
537                 }
538                 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 &&
539                     (flags & LOP_EXCLUSIVE) != 0) {
540                         printf("exclusive lock of (%s) %s @ %s:%d\n",
541                             class->lc_name, lock->lo_name, file, line);
542                         printf("while share locked from %s:%d\n",
543                             lock1->li_file, lock1->li_line);
544                         panic("excl->share");
545                 }
546                 lock1->li_flags++;
547                 if ((lock->lo_flags & LO_RECURSABLE) == 0) {
548                         printf(
549                         "recursed on non-recursive lock (%s) %s @ %s:%d\n",
550                             class->lc_name, lock->lo_name, file, line);
551                         printf("first acquired @ %s:%d\n", lock1->li_file,
552                             lock1->li_line);
553                         panic("recurse");
554                 }
555                 CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__,
556                     td->td_proc->p_pid, lock->lo_name,
557                     lock1->li_flags & LI_RECURSEMASK);
558                 lock1->li_file = file;
559                 lock1->li_line = line;
560                 return;
561         }
562
563         /*
564          * Check for duplicate locks of the same type.  Note that we only
565          * have to check for this on the last lock we just acquired.  Any
566          * other cases will be caught as lock order violations.
567          */
568         lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1];
569         w1 = lock1->li_lock->lo_witness;
570         if (w1 == w) {
571                 if (w->w_same_squawked || (lock->lo_flags & LO_DUPOK))
572                         goto out;
573                 w->w_same_squawked = 1;
574                 printf("acquiring duplicate lock of same type: \"%s\"\n", 
575                         lock->lo_type);
576                 printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name,
577                     lock1->li_file, lock1->li_line);
578                 printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line);
579 #ifdef DDB
580                 go_into_ddb = 1;
581 #endif /* DDB */
582                 goto out;
583         }
584         MPASS(!mtx_owned(&w_mtx));
585         mtx_lock_spin(&w_mtx);
586         /*
587          * If we have a known higher number just say ok
588          */
589         if (witness_watch > 1 && w->w_level > w1->w_level) {
590                 mtx_unlock_spin(&w_mtx);
591                 goto out;
592         }
593         if (isitmydescendant(w1, w)) {
594                 mtx_unlock_spin(&w_mtx);
595                 goto out;
596         }
597         for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) {
598                 for (i = lle->ll_count - 1; i >= 0; i--, j++) {
599
600                         MPASS(j < WITNESS_COUNT);
601                         lock1 = &lle->ll_children[i];
602                         w1 = lock1->li_lock->lo_witness;
603
604                         /*
605                          * If this lock doesn't undergo witness checking,
606                          * then skip it.
607                          */
608                         if (w1 == NULL) {
609                                 KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0,
610                                     ("lock missing witness structure"));
611                                 continue;
612                         }
613                         /*
614                          * If we are locking Giant and we slept with this
615                          * lock, then skip it.
616                          */
617                         if ((lock1->li_flags & LI_SLEPT) != 0 &&
618                             lock == &Giant.mtx_object)
619                                 continue;
620                         /*
621                          * If we are locking a sleepable lock and this lock
622                          * isn't sleepable and isn't Giant, we want to treat
623                          * it as a lock order violation to enfore a general
624                          * lock order of sleepable locks before non-sleepable
625                          * locks.  Thus, we only bother checking the lock
626                          * order hierarchy if we pass the initial test.
627                          */
628                         if (!((lock->lo_flags & LO_SLEEPABLE) != 0 &&
629                             ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 &&
630                             lock1->li_lock != &Giant.mtx_object)) &&
631                             !isitmydescendant(w, w1))
632                                 continue;
633                         /*
634                          * We have a lock order violation, check to see if it
635                          * is allowed or has already been yelled about.
636                          */
637                         mtx_unlock_spin(&w_mtx);
638 #ifdef BLESSING
639                         if (blessed(w, w1))
640                                 goto out;
641 #endif
642                         if (lock1->li_lock == &Giant.mtx_object) {
643                                 if (w1->w_Giant_squawked)
644                                         goto out;
645                                 else
646                                         w1->w_Giant_squawked = 1;
647                         } else {
648                                 if (w1->w_other_squawked)
649                                         goto out;
650                                 else
651                                         w1->w_other_squawked = 1;
652                         }
653                         /*
654                          * Ok, yell about it.
655                          */
656                         printf("lock order reversal\n");
657                         /*
658                          * Try to locate an earlier lock with
659                          * witness w in our list.
660                          */
661                         do {
662                                 lock2 = &lle->ll_children[i];
663                                 MPASS(lock2->li_lock != NULL);
664                                 if (lock2->li_lock->lo_witness == w)
665                                         break;
666                                 i--;
667                                 if (i == 0 && lle->ll_next != NULL) {
668                                         lle = lle->ll_next;
669                                         i = lle->ll_count - 1;
670                                         MPASS(i != 0);
671                                 }
672                         } while (i >= 0);
673                         if (i < 0) {
674                                 printf(" 1st %p %s (%s) @ %s:%d\n",
675                                     lock1->li_lock, lock1->li_lock->lo_name,
676                                     lock1->li_lock->lo_type, lock1->li_file,
677                                     lock1->li_line);
678                                 printf(" 2nd %p %s (%s) @ %s:%d\n", lock,
679                                     lock->lo_name, lock->lo_type, file, line);
680                         } else {
681                                 printf(" 1st %p %s (%s) @ %s:%d\n",
682                                     lock2->li_lock, lock2->li_lock->lo_name,
683                                     lock2->li_lock->lo_type, lock2->li_file,
684                                     lock2->li_line);
685                                 printf(" 2nd %p %s (%s) @ %s:%d\n",
686                                     lock1->li_lock, lock1->li_lock->lo_name,
687                                     lock1->li_lock->lo_type, lock1->li_file,
688                                     lock1->li_line);
689                                 printf(" 3rd %p %s (%s) @ %s:%d\n", lock,
690                                     lock->lo_name, lock->lo_type, file, line);
691                         }
692 #ifdef DDB
693                         go_into_ddb = 1;
694 #endif /* DDB */
695                         goto out;
696                 }
697         }
698         lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1];
699         /*
700          * Don't build a new relationship if we are locking Giant just
701          * after waking up and the previous lock in the list was acquired
702          * prior to blocking.
703          */
704         if (lock == &Giant.mtx_object && (lock1->li_flags & LI_SLEPT) != 0)
705                 mtx_unlock_spin(&w_mtx);
706         else {
707                 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__,
708                     lock->lo_type, lock1->li_lock->lo_type);
709                 if (!itismychild(lock1->li_lock->lo_witness, w))
710                         mtx_unlock_spin(&w_mtx);
711         } 
712
713 out:
714 #ifdef DDB
715         if (witness_ddb && go_into_ddb)
716                 Debugger(__func__);
717 #endif /* DDB */
718         w->w_file = file;
719         w->w_line = line;
720         
721         lle = *lock_list;
722         if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) {
723                 lle = witness_lock_list_get();
724                 if (lle == NULL)
725                         return;
726                 lle->ll_next = *lock_list;
727                 CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__,
728                     td->td_proc->p_pid, lle);
729                 *lock_list = lle;
730         }
731         lock1 = &lle->ll_children[lle->ll_count++];
732         lock1->li_lock = lock;
733         lock1->li_line = line;
734         lock1->li_file = file;
735         if ((flags & LOP_EXCLUSIVE) != 0)
736                 lock1->li_flags = LI_EXCLUSIVE;
737         else
738                 lock1->li_flags = 0;
739         CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__,
740             td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1);
741 }
742
743 void
744 witness_upgrade(struct lock_object *lock, int flags, const char *file, int line)
745 {
746         struct lock_instance *instance;
747         struct lock_class *class;
748
749         KASSERT(!witness_cold, ("%s: witness_cold", __func__));
750         if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
751                 return;
752         class = lock->lo_class;
753         if ((lock->lo_flags & LO_UPGRADABLE) == 0)
754                 panic("upgrade of non-upgradable lock (%s) %s @ %s:%d",
755                     class->lc_name, lock->lo_name, file, line);
756         if ((flags & LOP_TRYLOCK) == 0)
757                 panic("non-try upgrade of lock (%s) %s @ %s:%d", class->lc_name,
758                     lock->lo_name, file, line);
759         if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0)
760                 panic("upgrade of non-sleep lock (%s) %s @ %s:%d",
761                     class->lc_name, lock->lo_name, file, line);
762         instance = find_instance(curthread->td_sleeplocks, lock);
763         if (instance == NULL)
764                 panic("upgrade of unlocked lock (%s) %s @ %s:%d",
765                     class->lc_name, lock->lo_name, file, line);
766         if ((instance->li_flags & LI_EXCLUSIVE) != 0)
767                 panic("upgrade of exclusive lock (%s) %s @ %s:%d",
768                     class->lc_name, lock->lo_name, file, line);
769         if ((instance->li_flags & LI_RECURSEMASK) != 0)
770                 panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d",
771                     class->lc_name, lock->lo_name,
772                     instance->li_flags & LI_RECURSEMASK, file, line);
773         instance->li_flags |= LI_EXCLUSIVE;
774 }
775
776 void
777 witness_downgrade(struct lock_object *lock, int flags, const char *file,
778     int line)
779 {
780         struct lock_instance *instance;
781         struct lock_class *class;
782
783         KASSERT(!witness_cold, ("%s: witness_cold", __func__));
784         if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
785                 return;
786         class = lock->lo_class;
787         if ((lock->lo_flags & LO_UPGRADABLE) == 0)
788                 panic("downgrade of non-upgradable lock (%s) %s @ %s:%d",
789                     class->lc_name, lock->lo_name, file, line);
790         if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0)
791                 panic("downgrade of non-sleep lock (%s) %s @ %s:%d",
792                     class->lc_name, lock->lo_name, file, line);
793         instance = find_instance(curthread->td_sleeplocks, lock);
794         if (instance == NULL)
795                 panic("downgrade of unlocked lock (%s) %s @ %s:%d",
796                     class->lc_name, lock->lo_name, file, line);
797         if ((instance->li_flags & LI_EXCLUSIVE) == 0)
798                 panic("downgrade of shared lock (%s) %s @ %s:%d",
799                     class->lc_name, lock->lo_name, file, line);
800         if ((instance->li_flags & LI_RECURSEMASK) != 0)
801                 panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d",
802                     class->lc_name, lock->lo_name,
803                     instance->li_flags & LI_RECURSEMASK, file, line);
804         instance->li_flags &= ~LI_EXCLUSIVE;
805 }
806
807 void
808 witness_unlock(struct lock_object *lock, int flags, const char *file, int line)
809 {
810         struct lock_list_entry **lock_list, *lle;
811         struct lock_instance *instance;
812         struct lock_class *class;
813         struct thread *td;
814         register_t s;
815         int i, j;
816
817         if (witness_cold || witness_dead || lock->lo_witness == NULL ||
818             panicstr != NULL)
819                 return;
820         td = curthread;
821         class = lock->lo_class;
822         if (class->lc_flags & LC_SLEEPLOCK)
823                 lock_list = &td->td_sleeplocks;
824         else
825                 lock_list = PCPU_PTR(spinlocks);
826         for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next)
827                 for (i = 0; i < (*lock_list)->ll_count; i++) {
828                         instance = &(*lock_list)->ll_children[i];
829                         if (instance->li_lock == lock) {
830                                 if ((instance->li_flags & LI_EXCLUSIVE) != 0 &&
831                                     (flags & LOP_EXCLUSIVE) == 0) {
832                                         printf(
833                                         "shared unlock of (%s) %s @ %s:%d\n",
834                                             class->lc_name, lock->lo_name,
835                                             file, line);
836                                         printf(
837                                         "while exclusively locked from %s:%d\n",
838                                             instance->li_file,
839                                             instance->li_line);
840                                         panic("excl->ushare");
841                                 }
842                                 if ((instance->li_flags & LI_EXCLUSIVE) == 0 &&
843                                     (flags & LOP_EXCLUSIVE) != 0) {
844                                         printf(
845                                         "exclusive unlock of (%s) %s @ %s:%d\n",
846                                             class->lc_name, lock->lo_name,
847                                             file, line);
848                                         printf(
849                                         "while share locked from %s:%d\n",
850                                             instance->li_file,
851                                             instance->li_line);
852                                         panic("share->uexcl");
853                                 }
854                                 /* If we are recursed, unrecurse. */
855                                 if ((instance->li_flags & LI_RECURSEMASK) > 0) {
856                                         CTR4(KTR_WITNESS,
857                                     "%s: pid %d unrecursed on %s r=%d", __func__,
858                                             td->td_proc->p_pid,
859                                             instance->li_lock->lo_name,
860                                             instance->li_flags);
861                                         instance->li_flags--;
862                                         return;
863                                 }
864                                 s = intr_disable();
865                                 CTR4(KTR_WITNESS,
866                                     "%s: pid %d removed %s from lle[%d]", __func__,
867                                     td->td_proc->p_pid,
868                                     instance->li_lock->lo_name,
869                                     (*lock_list)->ll_count - 1);
870                                 for (j = i; j < (*lock_list)->ll_count - 1; j++)
871                                         (*lock_list)->ll_children[j] =
872                                             (*lock_list)->ll_children[j + 1];
873                                 (*lock_list)->ll_count--;
874                                 intr_restore(s);
875                                 if ((*lock_list)->ll_count == 0) {
876                                         lle = *lock_list;
877                                         *lock_list = lle->ll_next;
878                                         CTR3(KTR_WITNESS,
879                                             "%s: pid %d removed lle %p", __func__,
880                                             td->td_proc->p_pid, lle);
881                                         witness_lock_list_free(lle);
882                                 }
883                                 return;
884                         }
885                 }
886         panic("lock (%s) %s not locked @ %s:%d", class->lc_name, lock->lo_name,
887             file, line);
888 }
889
890 /*
891  * Warn if any held locks are not sleepable.  Note that Giant and the lock
892  * passed in are both special cases since they are both released during the
893  * sleep process and aren't actually held while the thread is asleep.
894  */
895 int
896 witness_sleep(int check_only, struct lock_object *lock, const char *file,
897               int line)
898 {
899         struct lock_list_entry **lock_list, *lle;
900         struct lock_instance *lock1;
901         struct thread *td;
902         int i, n;
903
904         if (witness_cold || witness_dead || panicstr != NULL)
905                 return (0);
906         n = 0;
907         td = curthread;
908         lock_list = &td->td_sleeplocks;
909 again:
910         for (lle = *lock_list; lle != NULL; lle = lle->ll_next)
911                 for (i = lle->ll_count - 1; i >= 0; i--) {
912                         lock1 = &lle->ll_children[i];
913                         if (lock1->li_lock == lock ||
914                             lock1->li_lock == &Giant.mtx_object)
915                                 continue;
916                         if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) {
917                                 if (check_only == 0) {
918                                         CTR3(KTR_WITNESS,
919                                     "pid %d: sleeping with lock (%s) %s held",
920                                             td->td_proc->p_pid,
921                                             lock1->li_lock->lo_class->lc_name,
922                                             lock1->li_lock->lo_name);
923                                         lock1->li_flags |= LI_SLEPT;
924                                 }
925                                 continue;
926                         }
927                         n++;
928                         printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
929                             file, line, check_only ? "could sleep" : "sleeping",
930                             lock1->li_lock->lo_name, lock1->li_file,
931                             lock1->li_line);
932                 }
933         if (lock_list == &td->td_sleeplocks && PCPU_GET(spinlocks) != NULL) {
934                 /*
935                  * Since we already hold a spinlock preemption is
936                  * already blocked.
937                  */
938                 lock_list = PCPU_PTR(spinlocks);
939                 goto again;
940         }
941 #ifdef DDB
942         if (witness_ddb && n)
943                 Debugger(__func__);
944 #endif /* DDB */
945         return (n);
946 }
947
948 const char *
949 witness_file(struct lock_object *lock)
950 {
951         struct witness *w;
952
953         if (witness_cold || witness_dead || lock->lo_witness == NULL)
954                 return ("?");
955         w = lock->lo_witness;
956         return (w->w_file);
957 }
958
959 int
960 witness_line(struct lock_object *lock)
961 {
962         struct witness *w;
963
964         if (witness_cold || witness_dead || lock->lo_witness == NULL)
965                 return (0);
966         w = lock->lo_witness;
967         return (w->w_line);
968 }
969
970 static struct witness *
971 enroll(const char *description, struct lock_class *lock_class)
972 {
973         struct witness *w;
974
975         if (!witness_watch || witness_dead || panicstr != NULL)
976                 return (NULL);
977         if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin)
978                 return (NULL);
979         mtx_lock_spin(&w_mtx);
980         STAILQ_FOREACH(w, &w_all, w_list) {
981                 if (w->w_name == description || (w->w_refcount > 0 &&
982                     strcmp(description, w->w_name) == 0)) {
983                         w->w_refcount++;
984                         mtx_unlock_spin(&w_mtx);
985                         if (lock_class != w->w_class)
986                                 panic(
987                                 "lock (%s) %s does not match earlier (%s) lock",
988                                     description, lock_class->lc_name,
989                                     w->w_class->lc_name);
990                         return (w);
991                 }
992         }
993         /*
994          * This isn't quite right, as witness_cold is still 0 while we
995          * enroll all the locks initialized before witness_initialize().
996          */
997         if ((lock_class->lc_flags & LC_SPINLOCK) && !witness_cold) {
998                 mtx_unlock_spin(&w_mtx);
999                 panic("spin lock %s not in order list", description);
1000         }
1001         if ((w = witness_get()) == NULL)
1002                 return (NULL);
1003         w->w_name = description;
1004         w->w_class = lock_class;
1005         w->w_refcount = 1;
1006         STAILQ_INSERT_HEAD(&w_all, w, w_list);
1007         if (lock_class->lc_flags & LC_SPINLOCK)
1008                 STAILQ_INSERT_HEAD(&w_spin, w, w_typelist);
1009         else if (lock_class->lc_flags & LC_SLEEPLOCK)
1010                 STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist);
1011         else {
1012                 mtx_unlock_spin(&w_mtx);
1013                 panic("lock class %s is not sleep or spin",
1014                     lock_class->lc_name);
1015         }
1016         mtx_unlock_spin(&w_mtx);
1017         return (w);
1018 }
1019
1020 static int
1021 itismychild(struct witness *parent, struct witness *child)
1022 {
1023         static int recursed;
1024         struct witness_child_list_entry **wcl;
1025         struct witness_list *list;
1026
1027         MPASS(child != NULL && parent != NULL);
1028         if ((parent->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) !=
1029             (child->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)))
1030                 panic(
1031                 "%s: parent (%s) and child (%s) are not the same lock type",
1032                     __func__, parent->w_class->lc_name,
1033                     child->w_class->lc_name);
1034
1035         /*
1036          * Insert "child" after "parent"
1037          */
1038         wcl = &parent->w_children;
1039         while (*wcl != NULL && (*wcl)->wcl_count == WITNESS_NCHILDREN)
1040                 wcl = &(*wcl)->wcl_next;
1041         if (*wcl == NULL) {
1042                 *wcl = witness_child_get();
1043                 if (*wcl == NULL)
1044                         return (1);
1045         }
1046         (*wcl)->wcl_children[(*wcl)->wcl_count++] = child;
1047
1048         /*
1049          * Now prune whole tree.  We look for cases where a lock is now
1050          * both a descendant and a direct child of a given lock.  In that
1051          * case, we want to remove the direct child link from the tree.
1052          */
1053         if (recursed)
1054                 return (0);
1055         recursed = 1;
1056         if (parent->w_class->lc_flags & LC_SLEEPLOCK)
1057                 list = &w_sleep;
1058         else
1059                 list = &w_spin;
1060         STAILQ_FOREACH(child, list, w_typelist) {
1061                 STAILQ_FOREACH(parent, list, w_typelist) {
1062                         if (!isitmychild(parent, child))
1063                                 continue;
1064                         removechild(parent, child);
1065                         if (isitmydescendant(parent, child))
1066                                 continue;
1067                         itismychild(parent, child);
1068                 }
1069         }
1070         recursed = 0;
1071         witness_levelall();
1072         return (0);
1073 }
1074
1075 static void
1076 removechild(struct witness *parent, struct witness *child)
1077 {
1078         struct witness_child_list_entry **wcl, *wcl1;
1079         int i;
1080
1081         for (wcl = &parent->w_children; *wcl != NULL; wcl = &(*wcl)->wcl_next)
1082                 for (i = 0; i < (*wcl)->wcl_count; i++)
1083                         if ((*wcl)->wcl_children[i] == child)
1084                                 goto found;
1085         return;
1086 found:
1087         (*wcl)->wcl_count--;
1088         if ((*wcl)->wcl_count > i)
1089                 (*wcl)->wcl_children[i] =
1090                     (*wcl)->wcl_children[(*wcl)->wcl_count];
1091         MPASS((*wcl)->wcl_children[i] != NULL);
1092         if ((*wcl)->wcl_count != 0)
1093                 return;
1094         wcl1 = *wcl;
1095         *wcl = wcl1->wcl_next;
1096         witness_child_free(wcl1);
1097 }
1098
1099 static int
1100 isitmychild(struct witness *parent, struct witness *child)
1101 {
1102         struct witness_child_list_entry *wcl;
1103         int i;
1104
1105         for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) {
1106                 for (i = 0; i < wcl->wcl_count; i++) {
1107                         if (wcl->wcl_children[i] == child)
1108                                 return (1);
1109                 }
1110         }
1111         return (0);
1112 }
1113
1114 static int
1115 isitmydescendant(struct witness *parent, struct witness *child)
1116 {
1117         struct witness_child_list_entry *wcl;
1118         int i, j;
1119
1120         if (isitmychild(parent, child))
1121                 return (1);
1122         j = 0;
1123         for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) {
1124                 MPASS(j < 1000);
1125                 for (i = 0; i < wcl->wcl_count; i++) {
1126                         if (isitmydescendant(wcl->wcl_children[i], child))
1127                                 return (1);
1128                 }
1129                 j++;
1130         }
1131         return (0);
1132 }
1133
1134 static void
1135 witness_levelall (void)
1136 {
1137         struct witness_list *list;
1138         struct witness *w, *w1;
1139
1140         /*
1141          * First clear all levels.
1142          */
1143         STAILQ_FOREACH(w, &w_all, w_list) {
1144                 w->w_level = 0;
1145         }
1146
1147         /*
1148          * Look for locks with no parent and level all their descendants.
1149          */
1150         STAILQ_FOREACH(w, &w_all, w_list) {
1151                 /*
1152                  * This is just an optimization, technically we could get
1153                  * away just walking the all list each time.
1154                  */
1155                 if (w->w_class->lc_flags & LC_SLEEPLOCK)
1156                         list = &w_sleep;
1157                 else
1158                         list = &w_spin;
1159                 STAILQ_FOREACH(w1, list, w_typelist) {
1160                         if (isitmychild(w1, w))
1161                                 goto skip;
1162                 }
1163                 witness_leveldescendents(w, 0);
1164         skip:
1165                 ;       /* silence GCC 3.x */
1166         }
1167 }
1168
1169 static void
1170 witness_leveldescendents(struct witness *parent, int level)
1171 {
1172         struct witness_child_list_entry *wcl;
1173         int i;
1174
1175         if (parent->w_level < level)
1176                 parent->w_level = level;
1177         level++;
1178         for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next)
1179                 for (i = 0; i < wcl->wcl_count; i++)
1180                         witness_leveldescendents(wcl->wcl_children[i], level);
1181 }
1182
1183 static void
1184 witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1185                            struct witness *parent)
1186 {
1187         struct witness_child_list_entry *wcl;
1188         int i, level;
1189
1190         level = parent->w_level;
1191         prnt("%-2d", level);
1192         for (i = 0; i < level; i++)
1193                 prnt(" ");
1194         if (parent->w_refcount > 0) {
1195                 prnt("%s", parent->w_name);
1196                 if (parent->w_file != NULL)
1197                         prnt(" -- last acquired @ %s:%d\n", parent->w_file,
1198                             parent->w_line);
1199         } else
1200                 prnt("(dead)\n");
1201         for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next)
1202                 for (i = 0; i < wcl->wcl_count; i++)
1203                             witness_displaydescendants(prnt,
1204                                 wcl->wcl_children[i]);
1205 }
1206
1207 #ifdef BLESSING
1208 static int
1209 blessed(struct witness *w1, struct witness *w2)
1210 {
1211         int i;
1212         struct witness_blessed *b;
1213
1214         for (i = 0; i < blessed_count; i++) {
1215                 b = &blessed_list[i];
1216                 if (strcmp(w1->w_name, b->b_lock1) == 0) {
1217                         if (strcmp(w2->w_name, b->b_lock2) == 0)
1218                                 return (1);
1219                         continue;
1220                 }
1221                 if (strcmp(w1->w_name, b->b_lock2) == 0)
1222                         if (strcmp(w2->w_name, b->b_lock1) == 0)
1223                                 return (1);
1224         }
1225         return (0);
1226 }
1227 #endif
1228
1229 static struct witness *
1230 witness_get(void)
1231 {
1232         struct witness *w;
1233
1234         if (witness_dead) {
1235                 mtx_unlock_spin(&w_mtx);
1236                 return (NULL);
1237         }
1238         if (STAILQ_EMPTY(&w_free)) {
1239                 witness_dead = 1;
1240                 mtx_unlock_spin(&w_mtx);
1241                 printf("%s: witness exhausted\n", __func__);
1242                 return (NULL);
1243         }
1244         w = STAILQ_FIRST(&w_free);
1245         STAILQ_REMOVE_HEAD(&w_free, w_list);
1246         bzero(w, sizeof(*w));
1247         return (w);
1248 }
1249
1250 static void
1251 witness_free(struct witness *w)
1252 {
1253
1254         STAILQ_INSERT_HEAD(&w_free, w, w_list);
1255 }
1256
1257 static struct witness_child_list_entry *
1258 witness_child_get(void)
1259 {
1260         struct witness_child_list_entry *wcl;
1261
1262         if (witness_dead) {
1263                 mtx_unlock_spin(&w_mtx);
1264                 return (NULL);
1265         }
1266         wcl = w_child_free;
1267         if (wcl == NULL) {
1268                 witness_dead = 1;
1269                 mtx_unlock_spin(&w_mtx);
1270                 printf("%s: witness exhausted\n", __func__);
1271                 return (NULL);
1272         }
1273         w_child_free = wcl->wcl_next;
1274         bzero(wcl, sizeof(*wcl));
1275         return (wcl);
1276 }
1277
1278 static void
1279 witness_child_free(struct witness_child_list_entry *wcl)
1280 {
1281
1282         wcl->wcl_next = w_child_free;
1283         w_child_free = wcl;
1284 }
1285
1286 static struct lock_list_entry *
1287 witness_lock_list_get(void)
1288 {
1289         struct lock_list_entry *lle;
1290
1291         if (witness_dead)
1292                 return (NULL);
1293         mtx_lock_spin(&w_mtx);
1294         lle = w_lock_list_free;
1295         if (lle == NULL) {
1296                 witness_dead = 1;
1297                 mtx_unlock_spin(&w_mtx);
1298                 printf("%s: witness exhausted\n", __func__);
1299                 return (NULL);
1300         }
1301         w_lock_list_free = lle->ll_next;
1302         mtx_unlock_spin(&w_mtx);
1303         bzero(lle, sizeof(*lle));
1304         return (lle);
1305 }
1306                 
1307 static void
1308 witness_lock_list_free(struct lock_list_entry *lle)
1309 {
1310
1311         mtx_lock_spin(&w_mtx);
1312         lle->ll_next = w_lock_list_free;
1313         w_lock_list_free = lle;
1314         mtx_unlock_spin(&w_mtx);
1315 }
1316
1317 static struct lock_instance *
1318 find_instance(struct lock_list_entry *lock_list, struct lock_object *lock)
1319 {
1320         struct lock_list_entry *lle;
1321         struct lock_instance *instance;
1322         int i;
1323
1324         for (lle = lock_list; lle != NULL; lle = lle->ll_next)
1325                 for (i = lle->ll_count - 1; i >= 0; i--) {
1326                         instance = &lle->ll_children[i];
1327                         if (instance->li_lock == lock)
1328                                 return (instance);
1329                 }
1330         return (NULL);
1331 }
1332
1333 int
1334 witness_list_locks(struct lock_list_entry **lock_list)
1335 {
1336         struct lock_list_entry *lle;
1337         struct lock_instance *instance;
1338         struct lock_object *lock;
1339         int i, nheld;
1340
1341         nheld = 0;
1342         for (lle = *lock_list; lle != NULL; lle = lle->ll_next)
1343                 for (i = lle->ll_count - 1; i >= 0; i--) {
1344                         instance = &lle->ll_children[i];
1345                         lock = instance->li_lock;
1346                         printf("%s %s %s",
1347                             (instance->li_flags & LI_EXCLUSIVE) != 0 ?
1348                             "exclusive" : "shared",
1349                             lock->lo_class->lc_name, lock->lo_name);
1350                         if (lock->lo_type != lock->lo_name)
1351                                 printf(" (%s)", lock->lo_type);
1352                         printf(" r = %d (%p) locked @ %s:%d\n",
1353                             instance->li_flags & LI_RECURSEMASK, lock,
1354                             instance->li_file, instance->li_line);
1355                         nheld++;
1356                 }
1357         return (nheld);
1358 }
1359
1360 /*
1361  * Calling this on td != curthread is bad unless we are in ddb.
1362  */
1363 int
1364 witness_list(struct thread *td)
1365 {
1366         int nheld;
1367
1368         KASSERT(!witness_cold, ("%s: witness_cold", __func__));
1369 #ifdef DDB
1370         KASSERT(td == curthread || db_active,
1371             ("%s: td != curthread and we aren't in the debugger", __func__));
1372         if (!db_active && witness_dead)
1373                 return (0);
1374 #else
1375         KASSERT(td == curthread, ("%s: p != curthread", __func__));
1376         if (witness_dead)
1377                 return (0);
1378 #endif
1379         nheld = witness_list_locks(&td->td_sleeplocks);
1380
1381         /*
1382          * We only handle spinlocks if td == curthread.  This is somewhat broken
1383          * if td is currently executing on some other CPU and holds spin locks
1384          * as we won't display those locks.  If we had a MI way of getting
1385          * the per-cpu data for a given cpu then we could use
1386          * td->td_kse->ke_oncpu to get the list of spinlocks for this thread
1387          * and "fix" this.
1388          *
1389          * That still wouldn't really fix this unless we locked sched_lock
1390          * or stopped the other CPU to make sure it wasn't changing the list
1391          * out from under us.  It is probably best to just not try to handle
1392          * threads on other CPU's for now.
1393          */
1394         if (td == curthread && PCPU_GET(spinlocks) != NULL)
1395                 nheld += witness_list_locks(PCPU_PTR(spinlocks));
1396
1397         return (nheld);
1398 }
1399
1400 void
1401 witness_save(struct lock_object *lock, const char **filep, int *linep)
1402 {
1403         struct lock_instance *instance;
1404
1405         KASSERT(!witness_cold, ("%s: witness_cold", __func__));
1406         if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
1407                 return;
1408         if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0)
1409                 panic("%s: lock (%s) %s is not a sleep lock", __func__,
1410                     lock->lo_class->lc_name, lock->lo_name);
1411         instance = find_instance(curthread->td_sleeplocks, lock);
1412         if (instance == NULL)
1413                 panic("%s: lock (%s) %s not locked", __func__,
1414                     lock->lo_class->lc_name, lock->lo_name);
1415         *filep = instance->li_file;
1416         *linep = instance->li_line;
1417 }
1418
1419 void
1420 witness_restore(struct lock_object *lock, const char *file, int line)
1421 {
1422         struct lock_instance *instance;
1423
1424         KASSERT(!witness_cold, ("%s: witness_cold", __func__));
1425         if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
1426                 return;
1427         if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0)
1428                 panic("%s: lock (%s) %s is not a sleep lock", __func__,
1429                     lock->lo_class->lc_name, lock->lo_name);
1430         instance = find_instance(curthread->td_sleeplocks, lock);
1431         if (instance == NULL)
1432                 panic("%s: lock (%s) %s not locked", __func__,
1433                     lock->lo_class->lc_name, lock->lo_name);
1434         lock->lo_witness->w_file = file;
1435         lock->lo_witness->w_line = line;
1436         instance->li_file = file;
1437         instance->li_line = line;
1438 }
1439
1440 void
1441 witness_assert(struct lock_object *lock, int flags, const char *file, int line)
1442 {
1443 #ifdef INVARIANT_SUPPORT
1444         struct lock_instance *instance;
1445
1446         if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
1447                 return;
1448         if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) != 0)
1449                 instance = find_instance(curthread->td_sleeplocks, lock);
1450         else if ((lock->lo_class->lc_flags & LC_SPINLOCK) != 0)
1451                 instance = find_instance(PCPU_GET(spinlocks), lock);
1452         else {
1453                 panic("Lock (%s) %s is not sleep or spin!",
1454                     lock->lo_class->lc_name, lock->lo_name);
1455                 return;
1456         }
1457         switch (flags) {
1458         case LA_UNLOCKED:
1459                 if (instance != NULL)
1460                         panic("Lock (%s) %s locked @ %s:%d.",
1461                             lock->lo_class->lc_name, lock->lo_name, file, line);
1462                 break;
1463         case LA_LOCKED:
1464         case LA_LOCKED | LA_RECURSED:
1465         case LA_LOCKED | LA_NOTRECURSED:
1466         case LA_SLOCKED:
1467         case LA_SLOCKED | LA_RECURSED:
1468         case LA_SLOCKED | LA_NOTRECURSED:
1469         case LA_XLOCKED:
1470         case LA_XLOCKED | LA_RECURSED:
1471         case LA_XLOCKED | LA_NOTRECURSED:
1472                 if (instance == NULL) {
1473                         panic("Lock (%s) %s not locked @ %s:%d.",
1474                             lock->lo_class->lc_name, lock->lo_name, file, line);
1475                         break;
1476                 }
1477                 if ((flags & LA_XLOCKED) != 0 &&
1478                     (instance->li_flags & LI_EXCLUSIVE) == 0)
1479                         panic("Lock (%s) %s not exclusively locked @ %s:%d.",
1480                             lock->lo_class->lc_name, lock->lo_name, file, line);
1481                 if ((flags & LA_SLOCKED) != 0 &&
1482                     (instance->li_flags & LI_EXCLUSIVE) != 0)
1483                         panic("Lock (%s) %s exclusively locked @ %s:%d.",
1484                             lock->lo_class->lc_name, lock->lo_name, file, line);
1485                 if ((flags & LA_RECURSED) != 0 &&
1486                     (instance->li_flags & LI_RECURSEMASK) == 0)
1487                         panic("Lock (%s) %s not recursed @ %s:%d.",
1488                             lock->lo_class->lc_name, lock->lo_name, file, line);
1489                 if ((flags & LA_NOTRECURSED) != 0 &&
1490                     (instance->li_flags & LI_RECURSEMASK) != 0)
1491                         panic("Lock (%s) %s recursed @ %s:%d.",
1492                             lock->lo_class->lc_name, lock->lo_name, file, line);
1493                 break;
1494         default:
1495                 panic("Invalid lock assertion at %s:%d.", file, line);
1496
1497         }
1498 #endif  /* INVARIANT_SUPPORT */
1499 }
1500
1501 #ifdef DDB
1502
1503 DB_SHOW_COMMAND(locks, db_witness_list)
1504 {
1505         struct thread *td;
1506         pid_t pid;
1507         struct proc *p;
1508
1509         if (have_addr) {
1510                 pid = (addr % 16) + ((addr >> 4) % 16) * 10 +
1511                     ((addr >> 8) % 16) * 100 + ((addr >> 12) % 16) * 1000 +
1512                     ((addr >> 16) % 16) * 10000;
1513                 /* sx_slock(&allproc_lock); */
1514                 FOREACH_PROC_IN_SYSTEM(p) {
1515                         if (p->p_pid == pid)
1516                                 break;
1517                 }
1518                 /* sx_sunlock(&allproc_lock); */
1519                 if (p == NULL) {
1520                         db_printf("pid %d not found\n", pid);
1521                         return;
1522                 }
1523                 FOREACH_THREAD_IN_PROC(p, td) {
1524                         witness_list(td);
1525                 }
1526         } else {
1527                 td = curthread;
1528                 witness_list(td);
1529         }
1530 }
1531
1532 DB_SHOW_COMMAND(witness, db_witness_display)
1533 {
1534
1535         witness_display(db_printf);
1536 }
1537 #endif