]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/kern/kern_lock.c
MFC r319414:
[FreeBSD/stable/9.git] / sys / kern / kern_lock.c
1 /*-
2  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice(s), this list of conditions and the following disclaimer as
10  *    the first lines of this file unmodified other than the possible
11  *    addition of one or more copyright notices.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice(s), this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  */
28
29 #include "opt_adaptive_lockmgrs.h"
30 #include "opt_ddb.h"
31 #include "opt_hwpmc_hooks.h"
32 #include "opt_kdtrace.h"
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include <sys/param.h>
38 #include <sys/kdb.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/lock_profile.h>
42 #include <sys/lockmgr.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sleepqueue.h>
46 #ifdef DEBUG_LOCKS
47 #include <sys/stack.h>
48 #endif
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51
52 #include <machine/cpu.h>
53
54 #ifdef DDB
55 #include <ddb/ddb.h>
56 #endif
57
58 #ifdef HWPMC_HOOKS
59 #include <sys/pmckern.h>
60 PMC_SOFT_DECLARE( , , lock, failed);
61 #endif
62
63 CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
64     (LK_ADAPTIVE | LK_NOSHARE));
65 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
66     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
67
68 #define SQ_EXCLUSIVE_QUEUE      0
69 #define SQ_SHARED_QUEUE         1
70
71 #ifdef ADAPTIVE_LOCKMGRS
72 #define ALK_RETRIES             10
73 #define ALK_LOOPS               10000
74 #endif
75
76 #ifndef INVARIANTS
77 #define _lockmgr_assert(lk, what, file, line)
78 #define TD_LOCKS_INC(td)
79 #define TD_LOCKS_DEC(td)
80 #else
81 #define TD_LOCKS_INC(td)        ((td)->td_locks++)
82 #define TD_LOCKS_DEC(td)        ((td)->td_locks--)
83 #endif
84 #define TD_SLOCKS_INC(td)       ((td)->td_lk_slocks++)
85 #define TD_SLOCKS_DEC(td)       ((td)->td_lk_slocks--)
86
87 #ifndef DEBUG_LOCKS
88 #define STACK_PRINT(lk)
89 #define STACK_SAVE(lk)
90 #define STACK_ZERO(lk)
91 #else
92 #define STACK_PRINT(lk) stack_print_ddb(&(lk)->lk_stack)
93 #define STACK_SAVE(lk)  stack_save(&(lk)->lk_stack)
94 #define STACK_ZERO(lk)  stack_zero(&(lk)->lk_stack)
95 #endif
96
97 #define LOCK_LOG2(lk, string, arg1, arg2)                               \
98         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
99                 CTR2(KTR_LOCK, (string), (arg1), (arg2))
100 #define LOCK_LOG3(lk, string, arg1, arg2, arg3)                         \
101         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
102                 CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
103
104 #define GIANT_DECLARE                                                   \
105         int _i = 0;                                                     \
106         WITNESS_SAVE_DECL(Giant)
107 #define GIANT_RESTORE() do {                                            \
108         if (_i > 0) {                                                   \
109                 while (_i--)                                            \
110                         mtx_lock(&Giant);                               \
111                 WITNESS_RESTORE(&Giant.lock_object, Giant);             \
112         }                                                               \
113 } while (0)
114 #define GIANT_SAVE() do {                                               \
115         if (mtx_owned(&Giant)) {                                        \
116                 WITNESS_SAVE(&Giant.lock_object, Giant);                \
117                 while (mtx_owned(&Giant)) {                             \
118                         _i++;                                           \
119                         mtx_unlock(&Giant);                             \
120                 }                                                       \
121         }                                                               \
122 } while (0)
123
124 #define LK_CAN_SHARE(x)                                                 \
125         (((x) & LK_SHARE) && (((x) & LK_EXCLUSIVE_WAITERS) == 0 ||      \
126         ((x) & LK_EXCLUSIVE_SPINNERS) == 0 ||                           \
127         curthread->td_lk_slocks || (curthread->td_pflags & TDP_DEADLKTREAT)))
128 #define LK_TRYOP(x)                                                     \
129         ((x) & LK_NOWAIT)
130
131 #define LK_CAN_WITNESS(x)                                               \
132         (((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
133 #define LK_TRYWIT(x)                                                    \
134         (LK_TRYOP(x) ? LOP_TRYLOCK : 0)
135
136 #define LK_CAN_ADAPT(lk, f)                                             \
137         (((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&             \
138         ((f) & LK_SLEEPFAIL) == 0)
139
140 #define lockmgr_disowned(lk)                                            \
141         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
142
143 #define lockmgr_xlocked(lk)                                             \
144         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
145
146 static void      assert_lockmgr(struct lock_object *lock, int how);
147 #ifdef DDB
148 static void      db_show_lockmgr(struct lock_object *lock);
149 #endif
150 static void      lock_lockmgr(struct lock_object *lock, int how);
151 #ifdef KDTRACE_HOOKS
152 static int       owner_lockmgr(struct lock_object *lock, struct thread **owner);
153 #endif
154 static int       unlock_lockmgr(struct lock_object *lock);
155
156 struct lock_class lock_class_lockmgr = {
157         .lc_name = "lockmgr",
158         .lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
159         .lc_assert = assert_lockmgr,
160 #ifdef DDB
161         .lc_ddb_show = db_show_lockmgr,
162 #endif
163         .lc_lock = lock_lockmgr,
164         .lc_unlock = unlock_lockmgr,
165 #ifdef KDTRACE_HOOKS
166         .lc_owner = owner_lockmgr,
167 #endif
168 };
169
170 static __inline struct thread *
171 lockmgr_xholder(struct lock *lk)
172 {
173         uintptr_t x;
174
175         x = lk->lk_lock;
176         return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
177 }
178
179 /*
180  * It assumes sleepq_lock held and returns with this one unheld.
181  * It also assumes the generic interlock is sane and previously checked.
182  * If LK_INTERLOCK is specified the interlock is not reacquired after the
183  * sleep.
184  */
185 static __inline int
186 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
187     const char *wmesg, int pri, int timo, int queue)
188 {
189         GIANT_DECLARE;
190         struct lock_class *class;
191         int catch, error;
192
193         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
194         catch = pri & PCATCH;
195         pri &= PRIMASK;
196         error = 0;
197
198         LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
199             (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
200
201         if (flags & LK_INTERLOCK)
202                 class->lc_unlock(ilk);
203         if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
204                 lk->lk_exslpfail++;
205         GIANT_SAVE();
206         sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
207             SLEEPQ_INTERRUPTIBLE : 0), queue);
208         if ((flags & LK_TIMELOCK) && timo)
209                 sleepq_set_timeout(&lk->lock_object, timo);
210
211         /*
212          * Decisional switch for real sleeping.
213          */
214         if ((flags & LK_TIMELOCK) && timo && catch)
215                 error = sleepq_timedwait_sig(&lk->lock_object, pri);
216         else if ((flags & LK_TIMELOCK) && timo)
217                 error = sleepq_timedwait(&lk->lock_object, pri);
218         else if (catch)
219                 error = sleepq_wait_sig(&lk->lock_object, pri);
220         else
221                 sleepq_wait(&lk->lock_object, pri);
222         GIANT_RESTORE();
223         if ((flags & LK_SLEEPFAIL) && error == 0)
224                 error = ENOLCK;
225
226         return (error);
227 }
228
229 static __inline int
230 wakeupshlk(struct lock *lk, const char *file, int line)
231 {
232         uintptr_t v, x;
233         u_int realexslp;
234         int queue, wakeup_swapper;
235
236         TD_LOCKS_DEC(curthread);
237         TD_SLOCKS_DEC(curthread);
238         WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
239         LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
240
241         wakeup_swapper = 0;
242         for (;;) {
243                 x = lk->lk_lock;
244
245                 /*
246                  * If there is more than one shared lock held, just drop one
247                  * and return.
248                  */
249                 if (LK_SHARERS(x) > 1) {
250                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, x,
251                             x - LK_ONE_SHARER))
252                                 break;
253                         continue;
254                 }
255
256                 /*
257                  * If there are not waiters on the exclusive queue, drop the
258                  * lock quickly.
259                  */
260                 if ((x & LK_ALL_WAITERS) == 0) {
261                         MPASS((x & ~LK_EXCLUSIVE_SPINNERS) ==
262                             LK_SHARERS_LOCK(1));
263                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, x, LK_UNLOCKED))
264                                 break;
265                         continue;
266                 }
267
268                 /*
269                  * We should have a sharer with waiters, so enter the hard
270                  * path in order to handle wakeups correctly.
271                  */
272                 sleepq_lock(&lk->lock_object);
273                 x = lk->lk_lock & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
274                 v = LK_UNLOCKED;
275
276                 /*
277                  * If the lock has exclusive waiters, give them preference in
278                  * order to avoid deadlock with shared runners up.
279                  * If interruptible sleeps left the exclusive queue empty
280                  * avoid a starvation for the threads sleeping on the shared
281                  * queue by giving them precedence and cleaning up the
282                  * exclusive waiters bit anyway.
283                  * Please note that lk_exslpfail count may be lying about
284                  * the real number of waiters with the LK_SLEEPFAIL flag on
285                  * because they may be used in conjuction with interruptible
286                  * sleeps so lk_exslpfail might be considered an 'upper limit'
287                  * bound, including the edge cases.
288                  */
289                 realexslp = sleepq_sleepcnt(&lk->lock_object,
290                     SQ_EXCLUSIVE_QUEUE);
291                 if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
292                         if (lk->lk_exslpfail < realexslp) {
293                                 lk->lk_exslpfail = 0;
294                                 queue = SQ_EXCLUSIVE_QUEUE;
295                                 v |= (x & LK_SHARED_WAITERS);
296                         } else {
297                                 lk->lk_exslpfail = 0;
298                                 LOCK_LOG2(lk,
299                                     "%s: %p has only LK_SLEEPFAIL sleepers",
300                                     __func__, lk);
301                                 LOCK_LOG2(lk,
302                             "%s: %p waking up threads on the exclusive queue",
303                                     __func__, lk);
304                                 wakeup_swapper =
305                                     sleepq_broadcast(&lk->lock_object,
306                                     SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
307                                 queue = SQ_SHARED_QUEUE;
308                         }
309                                 
310                 } else {
311
312                         /*
313                          * Exclusive waiters sleeping with LK_SLEEPFAIL on
314                          * and using interruptible sleeps/timeout may have
315                          * left spourious lk_exslpfail counts on, so clean
316                          * it up anyway.
317                          */
318                         lk->lk_exslpfail = 0;
319                         queue = SQ_SHARED_QUEUE;
320                 }
321
322                 if (!atomic_cmpset_rel_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
323                     v)) {
324                         sleepq_release(&lk->lock_object);
325                         continue;
326                 }
327                 LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
328                     __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
329                     "exclusive");
330                 wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
331                     0, queue);
332                 sleepq_release(&lk->lock_object);
333                 break;
334         }
335
336         lock_profile_release_lock(&lk->lock_object);
337         return (wakeup_swapper);
338 }
339
340 static void
341 assert_lockmgr(struct lock_object *lock, int what)
342 {
343
344         panic("lockmgr locks do not support assertions");
345 }
346
347 static void
348 lock_lockmgr(struct lock_object *lock, int how)
349 {
350
351         panic("lockmgr locks do not support sleep interlocking");
352 }
353
354 static int
355 unlock_lockmgr(struct lock_object *lock)
356 {
357
358         panic("lockmgr locks do not support sleep interlocking");
359 }
360
361 #ifdef KDTRACE_HOOKS
362 static int
363 owner_lockmgr(struct lock_object *lock, struct thread **owner)
364 {
365
366         panic("lockmgr locks do not support owner inquiring");
367 }
368 #endif
369
370 void
371 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
372 {
373         int iflags;
374
375         MPASS((flags & ~LK_INIT_MASK) == 0);
376         ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
377             ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
378             &lk->lk_lock));
379
380         iflags = LO_SLEEPABLE | LO_UPGRADABLE;
381         if (flags & LK_CANRECURSE)
382                 iflags |= LO_RECURSABLE;
383         if ((flags & LK_NODUP) == 0)
384                 iflags |= LO_DUPOK;
385         if (flags & LK_NOPROFILE)
386                 iflags |= LO_NOPROFILE;
387         if ((flags & LK_NOWITNESS) == 0)
388                 iflags |= LO_WITNESS;
389         if (flags & LK_QUIET)
390                 iflags |= LO_QUIET;
391         iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
392
393         lk->lk_lock = LK_UNLOCKED;
394         lk->lk_recurse = 0;
395         lk->lk_exslpfail = 0;
396         lk->lk_timo = timo;
397         lk->lk_pri = pri;
398         lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
399         STACK_ZERO(lk);
400 }
401
402 /*
403  * XXX: Gross hacks to manipulate external lock flags after
404  * initialization.  Used for certain vnode and buf locks.
405  */
406 void
407 lockallowshare(struct lock *lk)
408 {
409
410         lockmgr_assert(lk, KA_XLOCKED);
411         lk->lock_object.lo_flags &= ~LK_NOSHARE;
412 }
413
414 void
415 lockallowrecurse(struct lock *lk)
416 {
417
418         lockmgr_assert(lk, KA_XLOCKED);
419         lk->lock_object.lo_flags |= LO_RECURSABLE;
420 }
421
422 void
423 lockdisablerecurse(struct lock *lk)
424 {
425
426         lockmgr_assert(lk, KA_XLOCKED);
427         lk->lock_object.lo_flags &= ~LO_RECURSABLE;
428 }
429
430 void
431 lockdestroy(struct lock *lk)
432 {
433
434         KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
435         KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
436         KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
437         lock_destroy(&lk->lock_object);
438 }
439
440 int
441 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
442     const char *wmesg, int pri, int timo, const char *file, int line)
443 {
444         GIANT_DECLARE;
445         struct lock_class *class;
446         const char *iwmesg;
447         uintptr_t tid, v, x;
448         u_int op, realexslp;
449         int error, ipri, itimo, queue, wakeup_swapper;
450 #ifdef LOCK_PROFILING
451         uint64_t waittime = 0;
452         int contested = 0;
453 #endif
454 #ifdef ADAPTIVE_LOCKMGRS
455         volatile struct thread *owner;
456         u_int i, spintries = 0;
457 #endif
458
459         error = 0;
460         tid = (uintptr_t)curthread;
461         op = (flags & LK_TYPE_MASK);
462         iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
463         ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
464         itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
465
466         MPASS((flags & ~LK_TOTAL_MASK) == 0);
467         KASSERT((op & (op - 1)) == 0,
468             ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
469         KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
470             (op != LK_DOWNGRADE && op != LK_RELEASE),
471             ("%s: Invalid flags in regard of the operation desired @ %s:%d",
472             __func__, file, line));
473         KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
474             ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
475             __func__, file, line));
476         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
477             ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
478             lk->lock_object.lo_name, file, line));
479
480         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
481         if (panicstr != NULL) {
482                 if (flags & LK_INTERLOCK)
483                         class->lc_unlock(ilk);
484                 return (0);
485         }
486
487         if (lk->lock_object.lo_flags & LK_NOSHARE) {
488                 switch (op) {
489                 case LK_SHARED:
490                         op = LK_EXCLUSIVE;
491                         break;
492                 case LK_UPGRADE:
493                 case LK_TRYUPGRADE:
494                 case LK_DOWNGRADE:
495                         _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
496                             file, line);
497                         if (flags & LK_INTERLOCK)
498                                 class->lc_unlock(ilk);
499                         return (0);
500                 }
501         }
502
503         wakeup_swapper = 0;
504         switch (op) {
505         case LK_SHARED:
506                 if (LK_CAN_WITNESS(flags))
507                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
508                             file, line, ilk);
509                 for (;;) {
510                         x = lk->lk_lock;
511
512                         /*
513                          * If no other thread has an exclusive lock, or
514                          * no exclusive waiter is present, bump the count of
515                          * sharers.  Since we have to preserve the state of
516                          * waiters, if we fail to acquire the shared lock
517                          * loop back and retry.
518                          */
519                         if (LK_CAN_SHARE(x)) {
520                                 if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
521                                     x + LK_ONE_SHARER))
522                                         break;
523                                 continue;
524                         }
525 #ifdef HWPMC_HOOKS
526                         PMC_SOFT_CALL( , , lock, failed);
527 #endif
528                         lock_profile_obtain_lock_failed(&lk->lock_object,
529                             &contested, &waittime);
530
531                         /*
532                          * If the lock is already held by curthread in
533                          * exclusive way avoid a deadlock.
534                          */
535                         if (LK_HOLDER(x) == tid) {
536                                 LOCK_LOG2(lk,
537                                     "%s: %p already held in exclusive mode",
538                                     __func__, lk);
539                                 error = EDEADLK;
540                                 break;
541                         }
542
543                         /*
544                          * If the lock is expected to not sleep just give up
545                          * and return.
546                          */
547                         if (LK_TRYOP(flags)) {
548                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
549                                     __func__, lk);
550                                 error = EBUSY;
551                                 break;
552                         }
553
554 #ifdef ADAPTIVE_LOCKMGRS
555                         /*
556                          * If the owner is running on another CPU, spin until
557                          * the owner stops running or the state of the lock
558                          * changes.  We need a double-state handle here
559                          * because for a failed acquisition the lock can be
560                          * either held in exclusive mode or shared mode
561                          * (for the writer starvation avoidance technique).
562                          */
563                         if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
564                             LK_HOLDER(x) != LK_KERNPROC) {
565                                 owner = (struct thread *)LK_HOLDER(x);
566                                 if (LOCK_LOG_TEST(&lk->lock_object, 0))
567                                         CTR3(KTR_LOCK,
568                                             "%s: spinning on %p held by %p",
569                                             __func__, lk, owner);
570
571                                 /*
572                                  * If we are holding also an interlock drop it
573                                  * in order to avoid a deadlock if the lockmgr
574                                  * owner is adaptively spinning on the
575                                  * interlock itself.
576                                  */
577                                 if (flags & LK_INTERLOCK) {
578                                         class->lc_unlock(ilk);
579                                         flags &= ~LK_INTERLOCK;
580                                 }
581                                 GIANT_SAVE();
582                                 while (LK_HOLDER(lk->lk_lock) ==
583                                     (uintptr_t)owner && TD_IS_RUNNING(owner))
584                                         cpu_spinwait();
585                                 GIANT_RESTORE();
586                                 continue;
587                         } else if (LK_CAN_ADAPT(lk, flags) &&
588                             (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
589                             spintries < ALK_RETRIES) {
590                                 if (flags & LK_INTERLOCK) {
591                                         class->lc_unlock(ilk);
592                                         flags &= ~LK_INTERLOCK;
593                                 }
594                                 GIANT_SAVE();
595                                 spintries++;
596                                 for (i = 0; i < ALK_LOOPS; i++) {
597                                         if (LOCK_LOG_TEST(&lk->lock_object, 0))
598                                                 CTR4(KTR_LOCK,
599                                     "%s: shared spinning on %p with %u and %u",
600                                                     __func__, lk, spintries, i);
601                                         x = lk->lk_lock;
602                                         if ((x & LK_SHARE) == 0 ||
603                                             LK_CAN_SHARE(x) != 0)
604                                                 break;
605                                         cpu_spinwait();
606                                 }
607                                 GIANT_RESTORE();
608                                 if (i != ALK_LOOPS)
609                                         continue;
610                         }
611 #endif
612
613                         /*
614                          * Acquire the sleepqueue chain lock because we
615                          * probabilly will need to manipulate waiters flags.
616                          */
617                         sleepq_lock(&lk->lock_object);
618                         x = lk->lk_lock;
619
620                         /*
621                          * if the lock can be acquired in shared mode, try
622                          * again.
623                          */
624                         if (LK_CAN_SHARE(x)) {
625                                 sleepq_release(&lk->lock_object);
626                                 continue;
627                         }
628
629 #ifdef ADAPTIVE_LOCKMGRS
630                         /*
631                          * The current lock owner might have started executing
632                          * on another CPU (or the lock could have changed
633                          * owner) while we were waiting on the turnstile
634                          * chain lock.  If so, drop the turnstile lock and try
635                          * again.
636                          */
637                         if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
638                             LK_HOLDER(x) != LK_KERNPROC) {
639                                 owner = (struct thread *)LK_HOLDER(x);
640                                 if (TD_IS_RUNNING(owner)) {
641                                         sleepq_release(&lk->lock_object);
642                                         continue;
643                                 }
644                         }
645 #endif
646
647                         /*
648                          * Try to set the LK_SHARED_WAITERS flag.  If we fail,
649                          * loop back and retry.
650                          */
651                         if ((x & LK_SHARED_WAITERS) == 0) {
652                                 if (!atomic_cmpset_acq_ptr(&lk->lk_lock, x,
653                                     x | LK_SHARED_WAITERS)) {
654                                         sleepq_release(&lk->lock_object);
655                                         continue;
656                                 }
657                                 LOCK_LOG2(lk, "%s: %p set shared waiters flag",
658                                     __func__, lk);
659                         }
660
661                         /*
662                          * As far as we have been unable to acquire the
663                          * shared lock and the shared waiters flag is set,
664                          * we will sleep.
665                          */
666                         error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
667                             SQ_SHARED_QUEUE);
668                         flags &= ~LK_INTERLOCK;
669                         if (error) {
670                                 LOCK_LOG3(lk,
671                                     "%s: interrupted sleep for %p with %d",
672                                     __func__, lk, error);
673                                 break;
674                         }
675                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
676                             __func__, lk);
677                 }
678                 if (error == 0) {
679                         lock_profile_obtain_lock_success(&lk->lock_object,
680                             contested, waittime, file, line);
681                         LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file,
682                             line);
683                         WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file,
684                             line);
685                         TD_LOCKS_INC(curthread);
686                         TD_SLOCKS_INC(curthread);
687                         STACK_SAVE(lk);
688                 }
689                 break;
690         case LK_UPGRADE:
691         case LK_TRYUPGRADE:
692                 _lockmgr_assert(lk, KA_SLOCKED, file, line);
693                 v = lk->lk_lock;
694                 x = v & LK_ALL_WAITERS;
695                 v &= LK_EXCLUSIVE_SPINNERS;
696
697                 /*
698                  * Try to switch from one shared lock to an exclusive one.
699                  * We need to preserve waiters flags during the operation.
700                  */
701                 if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
702                     tid | x)) {
703                         LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
704                             line);
705                         WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
706                             LK_TRYWIT(flags), file, line);
707                         TD_SLOCKS_DEC(curthread);
708                         break;
709                 }
710
711                 /*
712                  * In LK_TRYUPGRADE mode, do not drop the lock,
713                  * returning EBUSY instead.
714                  */
715                 if (op == LK_TRYUPGRADE) {
716                         LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
717                             __func__, lk);
718                         error = EBUSY;
719                         break;
720                 }
721
722                 /*
723                  * We have been unable to succeed in upgrading, so just
724                  * give up the shared lock.
725                  */
726                 wakeup_swapper |= wakeupshlk(lk, file, line);
727
728                 /* FALLTHROUGH */
729         case LK_EXCLUSIVE:
730                 if (LK_CAN_WITNESS(flags))
731                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
732                             LOP_EXCLUSIVE, file, line, ilk);
733
734                 /*
735                  * If curthread already holds the lock and this one is
736                  * allowed to recurse, simply recurse on it.
737                  */
738                 if (lockmgr_xlocked(lk)) {
739                         if ((flags & LK_CANRECURSE) == 0 &&
740                             (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
741
742                                 /*
743                                  * If the lock is expected to not panic just
744                                  * give up and return.
745                                  */
746                                 if (LK_TRYOP(flags)) {
747                                         LOCK_LOG2(lk,
748                                             "%s: %p fails the try operation",
749                                             __func__, lk);
750                                         error = EBUSY;
751                                         break;
752                                 }
753                                 if (flags & LK_INTERLOCK)
754                                         class->lc_unlock(ilk);
755                 panic("%s: recursing on non recursive lockmgr %s @ %s:%d\n",
756                                     __func__, iwmesg, file, line);
757                         }
758                         lk->lk_recurse++;
759                         LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
760                         LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
761                             lk->lk_recurse, file, line);
762                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
763                             LK_TRYWIT(flags), file, line);
764                         TD_LOCKS_INC(curthread);
765                         break;
766                 }
767
768                 while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED,
769                     tid)) {
770 #ifdef HWPMC_HOOKS
771                         PMC_SOFT_CALL( , , lock, failed);
772 #endif
773                         lock_profile_obtain_lock_failed(&lk->lock_object,
774                             &contested, &waittime);
775
776                         /*
777                          * If the lock is expected to not sleep just give up
778                          * and return.
779                          */
780                         if (LK_TRYOP(flags)) {
781                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
782                                     __func__, lk);
783                                 error = EBUSY;
784                                 break;
785                         }
786
787 #ifdef ADAPTIVE_LOCKMGRS
788                         /*
789                          * If the owner is running on another CPU, spin until
790                          * the owner stops running or the state of the lock
791                          * changes.
792                          */
793                         x = lk->lk_lock;
794                         if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
795                             LK_HOLDER(x) != LK_KERNPROC) {
796                                 owner = (struct thread *)LK_HOLDER(x);
797                                 if (LOCK_LOG_TEST(&lk->lock_object, 0))
798                                         CTR3(KTR_LOCK,
799                                             "%s: spinning on %p held by %p",
800                                             __func__, lk, owner);
801
802                                 /*
803                                  * If we are holding also an interlock drop it
804                                  * in order to avoid a deadlock if the lockmgr
805                                  * owner is adaptively spinning on the
806                                  * interlock itself.
807                                  */
808                                 if (flags & LK_INTERLOCK) {
809                                         class->lc_unlock(ilk);
810                                         flags &= ~LK_INTERLOCK;
811                                 }
812                                 GIANT_SAVE();
813                                 while (LK_HOLDER(lk->lk_lock) ==
814                                     (uintptr_t)owner && TD_IS_RUNNING(owner))
815                                         cpu_spinwait();
816                                 GIANT_RESTORE();
817                                 continue;
818                         } else if (LK_CAN_ADAPT(lk, flags) &&
819                             (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
820                             spintries < ALK_RETRIES) {
821                                 if ((x & LK_EXCLUSIVE_SPINNERS) == 0 &&
822                                     !atomic_cmpset_ptr(&lk->lk_lock, x,
823                                     x | LK_EXCLUSIVE_SPINNERS))
824                                         continue;
825                                 if (flags & LK_INTERLOCK) {
826                                         class->lc_unlock(ilk);
827                                         flags &= ~LK_INTERLOCK;
828                                 }
829                                 GIANT_SAVE();
830                                 spintries++;
831                                 for (i = 0; i < ALK_LOOPS; i++) {
832                                         if (LOCK_LOG_TEST(&lk->lock_object, 0))
833                                                 CTR4(KTR_LOCK,
834                                     "%s: shared spinning on %p with %u and %u",
835                                                     __func__, lk, spintries, i);
836                                         if ((lk->lk_lock &
837                                             LK_EXCLUSIVE_SPINNERS) == 0)
838                                                 break;
839                                         cpu_spinwait();
840                                 }
841                                 GIANT_RESTORE();
842                                 if (i != ALK_LOOPS)
843                                         continue;
844                         }
845 #endif
846
847                         /*
848                          * Acquire the sleepqueue chain lock because we
849                          * probabilly will need to manipulate waiters flags.
850                          */
851                         sleepq_lock(&lk->lock_object);
852                         x = lk->lk_lock;
853
854                         /*
855                          * if the lock has been released while we spun on
856                          * the sleepqueue chain lock just try again.
857                          */
858                         if (x == LK_UNLOCKED) {
859                                 sleepq_release(&lk->lock_object);
860                                 continue;
861                         }
862
863 #ifdef ADAPTIVE_LOCKMGRS
864                         /*
865                          * The current lock owner might have started executing
866                          * on another CPU (or the lock could have changed
867                          * owner) while we were waiting on the turnstile
868                          * chain lock.  If so, drop the turnstile lock and try
869                          * again.
870                          */
871                         if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
872                             LK_HOLDER(x) != LK_KERNPROC) {
873                                 owner = (struct thread *)LK_HOLDER(x);
874                                 if (TD_IS_RUNNING(owner)) {
875                                         sleepq_release(&lk->lock_object);
876                                         continue;
877                                 }
878                         }
879 #endif
880
881                         /*
882                          * The lock can be in the state where there is a
883                          * pending queue of waiters, but still no owner.
884                          * This happens when the lock is contested and an
885                          * owner is going to claim the lock.
886                          * If curthread is the one successfully acquiring it
887                          * claim lock ownership and return, preserving waiters
888                          * flags.
889                          */
890                         v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
891                         if ((x & ~v) == LK_UNLOCKED) {
892                                 v &= ~LK_EXCLUSIVE_SPINNERS;
893                                 if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
894                                     tid | v)) {
895                                         sleepq_release(&lk->lock_object);
896                                         LOCK_LOG2(lk,
897                                             "%s: %p claimed by a new writer",
898                                             __func__, lk);
899                                         break;
900                                 }
901                                 sleepq_release(&lk->lock_object);
902                                 continue;
903                         }
904
905                         /*
906                          * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
907                          * fail, loop back and retry.
908                          */
909                         if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
910                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x,
911                                     x | LK_EXCLUSIVE_WAITERS)) {
912                                         sleepq_release(&lk->lock_object);
913                                         continue;
914                                 }
915                                 LOCK_LOG2(lk, "%s: %p set excl waiters flag",
916                                     __func__, lk);
917                         }
918
919                         /*
920                          * As far as we have been unable to acquire the
921                          * exclusive lock and the exclusive waiters flag
922                          * is set, we will sleep.
923                          */
924                         error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
925                             SQ_EXCLUSIVE_QUEUE);
926                         flags &= ~LK_INTERLOCK;
927                         if (error) {
928                                 LOCK_LOG3(lk,
929                                     "%s: interrupted sleep for %p with %d",
930                                     __func__, lk, error);
931                                 break;
932                         }
933                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
934                             __func__, lk);
935                 }
936                 if (error == 0) {
937                         lock_profile_obtain_lock_success(&lk->lock_object,
938                             contested, waittime, file, line);
939                         LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
940                             lk->lk_recurse, file, line);
941                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
942                             LK_TRYWIT(flags), file, line);
943                         TD_LOCKS_INC(curthread);
944                         STACK_SAVE(lk);
945                 }
946                 break;
947         case LK_DOWNGRADE:
948                 _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
949                 LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
950                 WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
951                 TD_SLOCKS_INC(curthread);
952
953                 /*
954                  * In order to preserve waiters flags, just spin.
955                  */
956                 for (;;) {
957                         x = lk->lk_lock;
958                         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
959                         x &= LK_ALL_WAITERS;
960                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
961                             LK_SHARERS_LOCK(1) | x))
962                                 break;
963                         cpu_spinwait();
964                 }
965                 break;
966         case LK_RELEASE:
967                 _lockmgr_assert(lk, KA_LOCKED, file, line);
968                 x = lk->lk_lock;
969
970                 if ((x & LK_SHARE) == 0) {
971
972                         /*
973                          * As first option, treact the lock as if it has not
974                          * any waiter.
975                          * Fix-up the tid var if the lock has been disowned.
976                          */
977                         if (LK_HOLDER(x) == LK_KERNPROC)
978                                 tid = LK_KERNPROC;
979                         else {
980                                 WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE,
981                                     file, line);
982                                 TD_LOCKS_DEC(curthread);
983                         }
984                         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0,
985                             lk->lk_recurse, file, line);
986
987                         /*
988                          * The lock is held in exclusive mode.
989                          * If the lock is recursed also, then unrecurse it.
990                          */
991                         if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
992                                 LOCK_LOG2(lk, "%s: %p unrecursing", __func__,
993                                     lk);
994                                 lk->lk_recurse--;
995                                 break;
996                         }
997                         if (tid != LK_KERNPROC)
998                                 lock_profile_release_lock(&lk->lock_object);
999
1000                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid,
1001                             LK_UNLOCKED))
1002                                 break;
1003
1004                         sleepq_lock(&lk->lock_object);
1005                         x = lk->lk_lock;
1006                         v = LK_UNLOCKED;
1007
1008                         /*
1009                          * If the lock has exclusive waiters, give them
1010                          * preference in order to avoid deadlock with
1011                          * shared runners up.
1012                          * If interruptible sleeps left the exclusive queue
1013                          * empty avoid a starvation for the threads sleeping
1014                          * on the shared queue by giving them precedence
1015                          * and cleaning up the exclusive waiters bit anyway.
1016                          * Please note that lk_exslpfail count may be lying
1017                          * about the real number of waiters with the
1018                          * LK_SLEEPFAIL flag on because they may be used in
1019                          * conjuction with interruptible sleeps so
1020                          * lk_exslpfail might be considered an 'upper limit'
1021                          * bound, including the edge cases.
1022                          */
1023                         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1024                         realexslp = sleepq_sleepcnt(&lk->lock_object,
1025                             SQ_EXCLUSIVE_QUEUE);
1026                         if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1027                                 if (lk->lk_exslpfail < realexslp) {
1028                                         lk->lk_exslpfail = 0;
1029                                         queue = SQ_EXCLUSIVE_QUEUE;
1030                                         v |= (x & LK_SHARED_WAITERS);
1031                                 } else {
1032                                         lk->lk_exslpfail = 0;
1033                                         LOCK_LOG2(lk,
1034                                         "%s: %p has only LK_SLEEPFAIL sleepers",
1035                                             __func__, lk);
1036                                         LOCK_LOG2(lk,
1037                         "%s: %p waking up threads on the exclusive queue",
1038                                             __func__, lk);
1039                                         wakeup_swapper =
1040                                             sleepq_broadcast(&lk->lock_object,
1041                                             SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1042                                         queue = SQ_SHARED_QUEUE;
1043                                 }
1044                         } else {
1045
1046                                 /*
1047                                  * Exclusive waiters sleeping with LK_SLEEPFAIL
1048                                  * on and using interruptible sleeps/timeout
1049                                  * may have left spourious lk_exslpfail counts
1050                                  * on, so clean it up anyway. 
1051                                  */
1052                                 lk->lk_exslpfail = 0;
1053                                 queue = SQ_SHARED_QUEUE;
1054                         }
1055
1056                         LOCK_LOG3(lk,
1057                             "%s: %p waking up threads on the %s queue",
1058                             __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1059                             "exclusive");
1060                         atomic_store_rel_ptr(&lk->lk_lock, v);
1061                         wakeup_swapper |= sleepq_broadcast(&lk->lock_object,
1062                             SLEEPQ_LK, 0, queue);
1063                         sleepq_release(&lk->lock_object);
1064                         break;
1065                 } else
1066                         wakeup_swapper = wakeupshlk(lk, file, line);
1067                 break;
1068         case LK_DRAIN:
1069                 if (LK_CAN_WITNESS(flags))
1070                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1071                             LOP_EXCLUSIVE, file, line, ilk);
1072
1073                 /*
1074                  * Trying to drain a lock we already own will result in a
1075                  * deadlock.
1076                  */
1077                 if (lockmgr_xlocked(lk)) {
1078                         if (flags & LK_INTERLOCK)
1079                                 class->lc_unlock(ilk);
1080                         panic("%s: draining %s with the lock held @ %s:%d\n",
1081                             __func__, iwmesg, file, line);
1082                 }
1083
1084                 while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
1085 #ifdef HWPMC_HOOKS
1086                         PMC_SOFT_CALL( , , lock, failed);
1087 #endif
1088                         lock_profile_obtain_lock_failed(&lk->lock_object,
1089                             &contested, &waittime);
1090
1091                         /*
1092                          * If the lock is expected to not sleep just give up
1093                          * and return.
1094                          */
1095                         if (LK_TRYOP(flags)) {
1096                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
1097                                     __func__, lk);
1098                                 error = EBUSY;
1099                                 break;
1100                         }
1101
1102                         /*
1103                          * Acquire the sleepqueue chain lock because we
1104                          * probabilly will need to manipulate waiters flags.
1105                          */
1106                         sleepq_lock(&lk->lock_object);
1107                         x = lk->lk_lock;
1108
1109                         /*
1110                          * if the lock has been released while we spun on
1111                          * the sleepqueue chain lock just try again.
1112                          */
1113                         if (x == LK_UNLOCKED) {
1114                                 sleepq_release(&lk->lock_object);
1115                                 continue;
1116                         }
1117
1118                         v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1119                         if ((x & ~v) == LK_UNLOCKED) {
1120                                 v = (x & ~LK_EXCLUSIVE_SPINNERS);
1121
1122                                 /*
1123                                  * If interruptible sleeps left the exclusive
1124                                  * queue empty avoid a starvation for the
1125                                  * threads sleeping on the shared queue by
1126                                  * giving them precedence and cleaning up the
1127                                  * exclusive waiters bit anyway.
1128                                  * Please note that lk_exslpfail count may be
1129                                  * lying about the real number of waiters with
1130                                  * the LK_SLEEPFAIL flag on because they may
1131                                  * be used in conjuction with interruptible
1132                                  * sleeps so lk_exslpfail might be considered
1133                                  * an 'upper limit' bound, including the edge
1134                                  * cases.
1135                                  */
1136                                 if (v & LK_EXCLUSIVE_WAITERS) {
1137                                         queue = SQ_EXCLUSIVE_QUEUE;
1138                                         v &= ~LK_EXCLUSIVE_WAITERS;
1139                                 } else {
1140
1141                                         /*
1142                                          * Exclusive waiters sleeping with
1143                                          * LK_SLEEPFAIL on and using
1144                                          * interruptible sleeps/timeout may
1145                                          * have left spourious lk_exslpfail
1146                                          * counts on, so clean it up anyway.
1147                                          */
1148                                         MPASS(v & LK_SHARED_WAITERS);
1149                                         lk->lk_exslpfail = 0;
1150                                         queue = SQ_SHARED_QUEUE;
1151                                         v &= ~LK_SHARED_WAITERS;
1152                                 }
1153                                 if (queue == SQ_EXCLUSIVE_QUEUE) {
1154                                         realexslp =
1155                                             sleepq_sleepcnt(&lk->lock_object,
1156                                             SQ_EXCLUSIVE_QUEUE);
1157                                         if (lk->lk_exslpfail >= realexslp) {
1158                                                 lk->lk_exslpfail = 0;
1159                                                 queue = SQ_SHARED_QUEUE;
1160                                                 v &= ~LK_SHARED_WAITERS;
1161                                                 if (realexslp != 0) {
1162                                                         LOCK_LOG2(lk,
1163                                         "%s: %p has only LK_SLEEPFAIL sleepers",
1164                                                             __func__, lk);
1165                                                         LOCK_LOG2(lk,
1166                         "%s: %p waking up threads on the exclusive queue",
1167                                                             __func__, lk);
1168                                                         wakeup_swapper =
1169                                                             sleepq_broadcast(
1170                                                             &lk->lock_object,
1171                                                             SLEEPQ_LK, 0,
1172                                                             SQ_EXCLUSIVE_QUEUE);
1173                                                 }
1174                                         } else
1175                                                 lk->lk_exslpfail = 0;
1176                                 }
1177                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1178                                         sleepq_release(&lk->lock_object);
1179                                         continue;
1180                                 }
1181                                 LOCK_LOG3(lk,
1182                                 "%s: %p waking up all threads on the %s queue",
1183                                     __func__, lk, queue == SQ_SHARED_QUEUE ?
1184                                     "shared" : "exclusive");
1185                                 wakeup_swapper |= sleepq_broadcast(
1186                                     &lk->lock_object, SLEEPQ_LK, 0, queue);
1187
1188                                 /*
1189                                  * If shared waiters have been woken up we need
1190                                  * to wait for one of them to acquire the lock
1191                                  * before to set the exclusive waiters in
1192                                  * order to avoid a deadlock.
1193                                  */
1194                                 if (queue == SQ_SHARED_QUEUE) {
1195                                         for (v = lk->lk_lock;
1196                                             (v & LK_SHARE) && !LK_SHARERS(v);
1197                                             v = lk->lk_lock)
1198                                                 cpu_spinwait();
1199                                 }
1200                         }
1201
1202                         /*
1203                          * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1204                          * fail, loop back and retry.
1205                          */
1206                         if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1207                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1208                                     x | LK_EXCLUSIVE_WAITERS)) {
1209                                         sleepq_release(&lk->lock_object);
1210                                         continue;
1211                                 }
1212                                 LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1213                                     __func__, lk);
1214                         }
1215
1216                         /*
1217                          * As far as we have been unable to acquire the
1218                          * exclusive lock and the exclusive waiters flag
1219                          * is set, we will sleep.
1220                          */
1221                         if (flags & LK_INTERLOCK) {
1222                                 class->lc_unlock(ilk);
1223                                 flags &= ~LK_INTERLOCK;
1224                         }
1225                         GIANT_SAVE();
1226                         sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1227                             SQ_EXCLUSIVE_QUEUE);
1228                         sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1229                         GIANT_RESTORE();
1230                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1231                             __func__, lk);
1232                 }
1233
1234                 if (error == 0) {
1235                         lock_profile_obtain_lock_success(&lk->lock_object,
1236                             contested, waittime, file, line);
1237                         LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1238                             lk->lk_recurse, file, line);
1239                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1240                             LK_TRYWIT(flags), file, line);
1241                         TD_LOCKS_INC(curthread);
1242                         STACK_SAVE(lk);
1243                 }
1244                 break;
1245         default:
1246                 if (flags & LK_INTERLOCK)
1247                         class->lc_unlock(ilk);
1248                 panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1249         }
1250
1251         if (flags & LK_INTERLOCK)
1252                 class->lc_unlock(ilk);
1253         if (wakeup_swapper)
1254                 kick_proc0();
1255
1256         return (error);
1257 }
1258
1259 void
1260 _lockmgr_disown(struct lock *lk, const char *file, int line)
1261 {
1262         uintptr_t tid, x;
1263
1264         if (SCHEDULER_STOPPED())
1265                 return;
1266
1267         tid = (uintptr_t)curthread;
1268         _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
1269
1270         /*
1271          * If the owner is already LK_KERNPROC just skip the whole operation.
1272          */
1273         if (LK_HOLDER(lk->lk_lock) != tid)
1274                 return;
1275         lock_profile_release_lock(&lk->lock_object);
1276         LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1277         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1278         TD_LOCKS_DEC(curthread);
1279         STACK_SAVE(lk);
1280
1281         /*
1282          * In order to preserve waiters flags, just spin.
1283          */
1284         for (;;) {
1285                 x = lk->lk_lock;
1286                 MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1287                 x &= LK_ALL_WAITERS;
1288                 if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1289                     LK_KERNPROC | x))
1290                         return;
1291                 cpu_spinwait();
1292         }
1293 }
1294
1295 void
1296 lockmgr_printinfo(struct lock *lk)
1297 {
1298         struct thread *td;
1299         uintptr_t x;
1300
1301         if (lk->lk_lock == LK_UNLOCKED)
1302                 printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1303         else if (lk->lk_lock & LK_SHARE)
1304                 printf("lock type %s: SHARED (count %ju)\n",
1305                     lk->lock_object.lo_name,
1306                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1307         else {
1308                 td = lockmgr_xholder(lk);
1309                 printf("lock type %s: EXCL by thread %p (pid %d)\n",
1310                     lk->lock_object.lo_name, td, td->td_proc->p_pid);
1311         }
1312
1313         x = lk->lk_lock;
1314         if (x & LK_EXCLUSIVE_WAITERS)
1315                 printf(" with exclusive waiters pending\n");
1316         if (x & LK_SHARED_WAITERS)
1317                 printf(" with shared waiters pending\n");
1318         if (x & LK_EXCLUSIVE_SPINNERS)
1319                 printf(" with exclusive spinners pending\n");
1320
1321         STACK_PRINT(lk);
1322 }
1323
1324 int
1325 lockstatus(struct lock *lk)
1326 {
1327         uintptr_t v, x;
1328         int ret;
1329
1330         ret = LK_SHARED;
1331         x = lk->lk_lock;
1332         v = LK_HOLDER(x);
1333
1334         if ((x & LK_SHARE) == 0) {
1335                 if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1336                         ret = LK_EXCLUSIVE;
1337                 else
1338                         ret = LK_EXCLOTHER;
1339         } else if (x == LK_UNLOCKED)
1340                 ret = 0;
1341
1342         return (ret);
1343 }
1344
1345 #ifdef INVARIANT_SUPPORT
1346
1347 FEATURE(invariant_support,
1348     "Support for modules compiled with INVARIANTS option");
1349
1350 #ifndef INVARIANTS
1351 #undef  _lockmgr_assert
1352 #endif
1353
1354 void
1355 _lockmgr_assert(struct lock *lk, int what, const char *file, int line)
1356 {
1357         int slocked = 0;
1358
1359         if (panicstr != NULL)
1360                 return;
1361         switch (what) {
1362         case KA_SLOCKED:
1363         case KA_SLOCKED | KA_NOTRECURSED:
1364         case KA_SLOCKED | KA_RECURSED:
1365                 slocked = 1;
1366         case KA_LOCKED:
1367         case KA_LOCKED | KA_NOTRECURSED:
1368         case KA_LOCKED | KA_RECURSED:
1369 #ifdef WITNESS
1370
1371                 /*
1372                  * We cannot trust WITNESS if the lock is held in exclusive
1373                  * mode and a call to lockmgr_disown() happened.
1374                  * Workaround this skipping the check if the lock is held in
1375                  * exclusive mode even for the KA_LOCKED case.
1376                  */
1377                 if (slocked || (lk->lk_lock & LK_SHARE)) {
1378                         witness_assert(&lk->lock_object, what, file, line);
1379                         break;
1380                 }
1381 #endif
1382                 if (lk->lk_lock == LK_UNLOCKED ||
1383                     ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1384                     (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1385                         panic("Lock %s not %slocked @ %s:%d\n",
1386                             lk->lock_object.lo_name, slocked ? "share" : "",
1387                             file, line);
1388
1389                 if ((lk->lk_lock & LK_SHARE) == 0) {
1390                         if (lockmgr_recursed(lk)) {
1391                                 if (what & KA_NOTRECURSED)
1392                                         panic("Lock %s recursed @ %s:%d\n",
1393                                             lk->lock_object.lo_name, file,
1394                                             line);
1395                         } else if (what & KA_RECURSED)
1396                                 panic("Lock %s not recursed @ %s:%d\n",
1397                                     lk->lock_object.lo_name, file, line);
1398                 }
1399                 break;
1400         case KA_XLOCKED:
1401         case KA_XLOCKED | KA_NOTRECURSED:
1402         case KA_XLOCKED | KA_RECURSED:
1403                 if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1404                         panic("Lock %s not exclusively locked @ %s:%d\n",
1405                             lk->lock_object.lo_name, file, line);
1406                 if (lockmgr_recursed(lk)) {
1407                         if (what & KA_NOTRECURSED)
1408                                 panic("Lock %s recursed @ %s:%d\n",
1409                                     lk->lock_object.lo_name, file, line);
1410                 } else if (what & KA_RECURSED)
1411                         panic("Lock %s not recursed @ %s:%d\n",
1412                             lk->lock_object.lo_name, file, line);
1413                 break;
1414         case KA_UNLOCKED:
1415                 if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1416                         panic("Lock %s exclusively locked @ %s:%d\n",
1417                             lk->lock_object.lo_name, file, line);
1418                 break;
1419         default:
1420                 panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1421                     line);
1422         }
1423 }
1424 #endif
1425
1426 #ifdef DDB
1427 int
1428 lockmgr_chain(struct thread *td, struct thread **ownerp)
1429 {
1430         struct lock *lk;
1431
1432         lk = td->td_wchan;
1433
1434         if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1435                 return (0);
1436         db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1437         if (lk->lk_lock & LK_SHARE)
1438                 db_printf("SHARED (count %ju)\n",
1439                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1440         else
1441                 db_printf("EXCL\n");
1442         *ownerp = lockmgr_xholder(lk);
1443
1444         return (1);
1445 }
1446
1447 static void
1448 db_show_lockmgr(struct lock_object *lock)
1449 {
1450         struct thread *td;
1451         struct lock *lk;
1452
1453         lk = (struct lock *)lock;
1454
1455         db_printf(" state: ");
1456         if (lk->lk_lock == LK_UNLOCKED)
1457                 db_printf("UNLOCKED\n");
1458         else if (lk->lk_lock & LK_SHARE)
1459                 db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1460         else {
1461                 td = lockmgr_xholder(lk);
1462                 if (td == (struct thread *)LK_KERNPROC)
1463                         db_printf("XLOCK: LK_KERNPROC\n");
1464                 else
1465                         db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1466                             td->td_tid, td->td_proc->p_pid,
1467                             td->td_proc->p_comm);
1468                 if (lockmgr_recursed(lk))
1469                         db_printf(" recursed: %d\n", lk->lk_recurse);
1470         }
1471         db_printf(" waiters: ");
1472         switch (lk->lk_lock & LK_ALL_WAITERS) {
1473         case LK_SHARED_WAITERS:
1474                 db_printf("shared\n");
1475                 break;
1476         case LK_EXCLUSIVE_WAITERS:
1477                 db_printf("exclusive\n");
1478                 break;
1479         case LK_ALL_WAITERS:
1480                 db_printf("shared and exclusive\n");
1481                 break;
1482         default:
1483                 db_printf("none\n");
1484         }
1485         db_printf(" spinners: ");
1486         if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1487                 db_printf("exclusive\n");
1488         else
1489                 db_printf("none\n");
1490 }
1491 #endif