]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_lock.c
MFV r262756:
[FreeBSD/FreeBSD.git] / sys / kern / kern_lock.c
1 /*-
2  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice(s), this list of conditions and the following disclaimer as
10  *    the first lines of this file unmodified other than the possible
11  *    addition of one or more copyright notices.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice(s), this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  */
28
29 #include "opt_adaptive_lockmgrs.h"
30 #include "opt_ddb.h"
31 #include "opt_hwpmc_hooks.h"
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/kdb.h>
38 #include <sys/ktr.h>
39 #include <sys/lock.h>
40 #include <sys/lock_profile.h>
41 #include <sys/lockmgr.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sleepqueue.h>
45 #ifdef DEBUG_LOCKS
46 #include <sys/stack.h>
47 #endif
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50
51 #include <machine/cpu.h>
52
53 #ifdef DDB
54 #include <ddb/ddb.h>
55 #endif
56
57 #ifdef HWPMC_HOOKS
58 #include <sys/pmckern.h>
59 PMC_SOFT_DECLARE( , , lock, failed);
60 #endif
61
62 CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
63     (LK_ADAPTIVE | LK_NOSHARE));
64 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
65     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
66
67 #define SQ_EXCLUSIVE_QUEUE      0
68 #define SQ_SHARED_QUEUE         1
69
70 #ifndef INVARIANTS
71 #define _lockmgr_assert(lk, what, file, line)
72 #define TD_LOCKS_INC(td)
73 #define TD_LOCKS_DEC(td)
74 #else
75 #define TD_LOCKS_INC(td)        ((td)->td_locks++)
76 #define TD_LOCKS_DEC(td)        ((td)->td_locks--)
77 #endif
78 #define TD_SLOCKS_INC(td)       ((td)->td_lk_slocks++)
79 #define TD_SLOCKS_DEC(td)       ((td)->td_lk_slocks--)
80
81 #ifndef DEBUG_LOCKS
82 #define STACK_PRINT(lk)
83 #define STACK_SAVE(lk)
84 #define STACK_ZERO(lk)
85 #else
86 #define STACK_PRINT(lk) stack_print_ddb(&(lk)->lk_stack)
87 #define STACK_SAVE(lk)  stack_save(&(lk)->lk_stack)
88 #define STACK_ZERO(lk)  stack_zero(&(lk)->lk_stack)
89 #endif
90
91 #define LOCK_LOG2(lk, string, arg1, arg2)                               \
92         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
93                 CTR2(KTR_LOCK, (string), (arg1), (arg2))
94 #define LOCK_LOG3(lk, string, arg1, arg2, arg3)                         \
95         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
96                 CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
97
98 #define GIANT_DECLARE                                                   \
99         int _i = 0;                                                     \
100         WITNESS_SAVE_DECL(Giant)
101 #define GIANT_RESTORE() do {                                            \
102         if (_i > 0) {                                                   \
103                 while (_i--)                                            \
104                         mtx_lock(&Giant);                               \
105                 WITNESS_RESTORE(&Giant.lock_object, Giant);             \
106         }                                                               \
107 } while (0)
108 #define GIANT_SAVE() do {                                               \
109         if (mtx_owned(&Giant)) {                                        \
110                 WITNESS_SAVE(&Giant.lock_object, Giant);                \
111                 while (mtx_owned(&Giant)) {                             \
112                         _i++;                                           \
113                         mtx_unlock(&Giant);                             \
114                 }                                                       \
115         }                                                               \
116 } while (0)
117
118 #define LK_CAN_SHARE(x)                                                 \
119         (((x) & LK_SHARE) && (((x) & LK_EXCLUSIVE_WAITERS) == 0 ||      \
120         ((x) & LK_EXCLUSIVE_SPINNERS) == 0 ||                           \
121         curthread->td_lk_slocks || (curthread->td_pflags & TDP_DEADLKTREAT)))
122 #define LK_TRYOP(x)                                                     \
123         ((x) & LK_NOWAIT)
124
125 #define LK_CAN_WITNESS(x)                                               \
126         (((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
127 #define LK_TRYWIT(x)                                                    \
128         (LK_TRYOP(x) ? LOP_TRYLOCK : 0)
129
130 #define LK_CAN_ADAPT(lk, f)                                             \
131         (((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&             \
132         ((f) & LK_SLEEPFAIL) == 0)
133
134 #define lockmgr_disowned(lk)                                            \
135         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
136
137 #define lockmgr_xlocked(lk)                                             \
138         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
139
140 static void     assert_lockmgr(const struct lock_object *lock, int how);
141 #ifdef DDB
142 static void     db_show_lockmgr(const struct lock_object *lock);
143 #endif
144 static void     lock_lockmgr(struct lock_object *lock, uintptr_t how);
145 #ifdef KDTRACE_HOOKS
146 static int      owner_lockmgr(const struct lock_object *lock,
147                     struct thread **owner);
148 #endif
149 static uintptr_t unlock_lockmgr(struct lock_object *lock);
150
151 struct lock_class lock_class_lockmgr = {
152         .lc_name = "lockmgr",
153         .lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
154         .lc_assert = assert_lockmgr,
155 #ifdef DDB
156         .lc_ddb_show = db_show_lockmgr,
157 #endif
158         .lc_lock = lock_lockmgr,
159         .lc_unlock = unlock_lockmgr,
160 #ifdef KDTRACE_HOOKS
161         .lc_owner = owner_lockmgr,
162 #endif
163 };
164
165 #ifdef ADAPTIVE_LOCKMGRS
166 static u_int alk_retries = 10;
167 static u_int alk_loops = 10000;
168 static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL,
169     "lockmgr debugging");
170 SYSCTL_UINT(_debug_lockmgr, OID_AUTO, retries, CTLFLAG_RW, &alk_retries, 0, "");
171 SYSCTL_UINT(_debug_lockmgr, OID_AUTO, loops, CTLFLAG_RW, &alk_loops, 0, "");
172 #endif
173
174 static __inline struct thread *
175 lockmgr_xholder(const struct lock *lk)
176 {
177         uintptr_t x;
178
179         x = lk->lk_lock;
180         return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
181 }
182
183 /*
184  * It assumes sleepq_lock held and returns with this one unheld.
185  * It also assumes the generic interlock is sane and previously checked.
186  * If LK_INTERLOCK is specified the interlock is not reacquired after the
187  * sleep.
188  */
189 static __inline int
190 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
191     const char *wmesg, int pri, int timo, int queue)
192 {
193         GIANT_DECLARE;
194         struct lock_class *class;
195         int catch, error;
196
197         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
198         catch = pri & PCATCH;
199         pri &= PRIMASK;
200         error = 0;
201
202         LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
203             (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
204
205         if (flags & LK_INTERLOCK)
206                 class->lc_unlock(ilk);
207         if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
208                 lk->lk_exslpfail++;
209         GIANT_SAVE();
210         sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
211             SLEEPQ_INTERRUPTIBLE : 0), queue);
212         if ((flags & LK_TIMELOCK) && timo)
213                 sleepq_set_timeout(&lk->lock_object, timo);
214
215         /*
216          * Decisional switch for real sleeping.
217          */
218         if ((flags & LK_TIMELOCK) && timo && catch)
219                 error = sleepq_timedwait_sig(&lk->lock_object, pri);
220         else if ((flags & LK_TIMELOCK) && timo)
221                 error = sleepq_timedwait(&lk->lock_object, pri);
222         else if (catch)
223                 error = sleepq_wait_sig(&lk->lock_object, pri);
224         else
225                 sleepq_wait(&lk->lock_object, pri);
226         GIANT_RESTORE();
227         if ((flags & LK_SLEEPFAIL) && error == 0)
228                 error = ENOLCK;
229
230         return (error);
231 }
232
233 static __inline int
234 wakeupshlk(struct lock *lk, const char *file, int line)
235 {
236         uintptr_t v, x;
237         u_int realexslp;
238         int queue, wakeup_swapper;
239
240         WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
241         LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
242
243         wakeup_swapper = 0;
244         for (;;) {
245                 x = lk->lk_lock;
246
247                 /*
248                  * If there is more than one shared lock held, just drop one
249                  * and return.
250                  */
251                 if (LK_SHARERS(x) > 1) {
252                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, x,
253                             x - LK_ONE_SHARER))
254                                 break;
255                         continue;
256                 }
257
258                 /*
259                  * If there are not waiters on the exclusive queue, drop the
260                  * lock quickly.
261                  */
262                 if ((x & LK_ALL_WAITERS) == 0) {
263                         MPASS((x & ~LK_EXCLUSIVE_SPINNERS) ==
264                             LK_SHARERS_LOCK(1));
265                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, x, LK_UNLOCKED))
266                                 break;
267                         continue;
268                 }
269
270                 /*
271                  * We should have a sharer with waiters, so enter the hard
272                  * path in order to handle wakeups correctly.
273                  */
274                 sleepq_lock(&lk->lock_object);
275                 x = lk->lk_lock & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
276                 v = LK_UNLOCKED;
277
278                 /*
279                  * If the lock has exclusive waiters, give them preference in
280                  * order to avoid deadlock with shared runners up.
281                  * If interruptible sleeps left the exclusive queue empty
282                  * avoid a starvation for the threads sleeping on the shared
283                  * queue by giving them precedence and cleaning up the
284                  * exclusive waiters bit anyway.
285                  * Please note that lk_exslpfail count may be lying about
286                  * the real number of waiters with the LK_SLEEPFAIL flag on
287                  * because they may be used in conjuction with interruptible
288                  * sleeps so lk_exslpfail might be considered an 'upper limit'
289                  * bound, including the edge cases.
290                  */
291                 realexslp = sleepq_sleepcnt(&lk->lock_object,
292                     SQ_EXCLUSIVE_QUEUE);
293                 if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
294                         if (lk->lk_exslpfail < realexslp) {
295                                 lk->lk_exslpfail = 0;
296                                 queue = SQ_EXCLUSIVE_QUEUE;
297                                 v |= (x & LK_SHARED_WAITERS);
298                         } else {
299                                 lk->lk_exslpfail = 0;
300                                 LOCK_LOG2(lk,
301                                     "%s: %p has only LK_SLEEPFAIL sleepers",
302                                     __func__, lk);
303                                 LOCK_LOG2(lk,
304                             "%s: %p waking up threads on the exclusive queue",
305                                     __func__, lk);
306                                 wakeup_swapper =
307                                     sleepq_broadcast(&lk->lock_object,
308                                     SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
309                                 queue = SQ_SHARED_QUEUE;
310                         }
311                                 
312                 } else {
313
314                         /*
315                          * Exclusive waiters sleeping with LK_SLEEPFAIL on
316                          * and using interruptible sleeps/timeout may have
317                          * left spourious lk_exslpfail counts on, so clean
318                          * it up anyway.
319                          */
320                         lk->lk_exslpfail = 0;
321                         queue = SQ_SHARED_QUEUE;
322                 }
323
324                 if (!atomic_cmpset_rel_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
325                     v)) {
326                         sleepq_release(&lk->lock_object);
327                         continue;
328                 }
329                 LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
330                     __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
331                     "exclusive");
332                 wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
333                     0, queue);
334                 sleepq_release(&lk->lock_object);
335                 break;
336         }
337
338         lock_profile_release_lock(&lk->lock_object);
339         TD_LOCKS_DEC(curthread);
340         TD_SLOCKS_DEC(curthread);
341         return (wakeup_swapper);
342 }
343
344 static void
345 assert_lockmgr(const struct lock_object *lock, int what)
346 {
347
348         panic("lockmgr locks do not support assertions");
349 }
350
351 static void
352 lock_lockmgr(struct lock_object *lock, uintptr_t how)
353 {
354
355         panic("lockmgr locks do not support sleep interlocking");
356 }
357
358 static uintptr_t
359 unlock_lockmgr(struct lock_object *lock)
360 {
361
362         panic("lockmgr locks do not support sleep interlocking");
363 }
364
365 #ifdef KDTRACE_HOOKS
366 static int
367 owner_lockmgr(const struct lock_object *lock, struct thread **owner)
368 {
369
370         panic("lockmgr locks do not support owner inquiring");
371 }
372 #endif
373
374 void
375 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
376 {
377         int iflags;
378
379         MPASS((flags & ~LK_INIT_MASK) == 0);
380         ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
381             ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
382             &lk->lk_lock));
383
384         iflags = LO_SLEEPABLE | LO_UPGRADABLE;
385         if (flags & LK_CANRECURSE)
386                 iflags |= LO_RECURSABLE;
387         if ((flags & LK_NODUP) == 0)
388                 iflags |= LO_DUPOK;
389         if (flags & LK_NOPROFILE)
390                 iflags |= LO_NOPROFILE;
391         if ((flags & LK_NOWITNESS) == 0)
392                 iflags |= LO_WITNESS;
393         if (flags & LK_QUIET)
394                 iflags |= LO_QUIET;
395         if (flags & LK_IS_VNODE)
396                 iflags |= LO_IS_VNODE;
397         iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
398
399         lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
400         lk->lk_lock = LK_UNLOCKED;
401         lk->lk_recurse = 0;
402         lk->lk_exslpfail = 0;
403         lk->lk_timo = timo;
404         lk->lk_pri = pri;
405         STACK_ZERO(lk);
406 }
407
408 /*
409  * XXX: Gross hacks to manipulate external lock flags after
410  * initialization.  Used for certain vnode and buf locks.
411  */
412 void
413 lockallowshare(struct lock *lk)
414 {
415
416         lockmgr_assert(lk, KA_XLOCKED);
417         lk->lock_object.lo_flags &= ~LK_NOSHARE;
418 }
419
420 void
421 lockallowrecurse(struct lock *lk)
422 {
423
424         lockmgr_assert(lk, KA_XLOCKED);
425         lk->lock_object.lo_flags |= LO_RECURSABLE;
426 }
427
428 void
429 lockdisablerecurse(struct lock *lk)
430 {
431
432         lockmgr_assert(lk, KA_XLOCKED);
433         lk->lock_object.lo_flags &= ~LO_RECURSABLE;
434 }
435
436 void
437 lockdestroy(struct lock *lk)
438 {
439
440         KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
441         KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
442         KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
443         lock_destroy(&lk->lock_object);
444 }
445
446 int
447 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
448     const char *wmesg, int pri, int timo, const char *file, int line)
449 {
450         GIANT_DECLARE;
451         struct lock_class *class;
452         const char *iwmesg;
453         uintptr_t tid, v, x;
454         u_int op, realexslp;
455         int error, ipri, itimo, queue, wakeup_swapper;
456 #ifdef LOCK_PROFILING
457         uint64_t waittime = 0;
458         int contested = 0;
459 #endif
460 #ifdef ADAPTIVE_LOCKMGRS
461         volatile struct thread *owner;
462         u_int i, spintries = 0;
463 #endif
464
465         error = 0;
466         tid = (uintptr_t)curthread;
467         op = (flags & LK_TYPE_MASK);
468         iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
469         ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
470         itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
471
472         MPASS((flags & ~LK_TOTAL_MASK) == 0);
473         KASSERT((op & (op - 1)) == 0,
474             ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
475         KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
476             (op != LK_DOWNGRADE && op != LK_RELEASE),
477             ("%s: Invalid flags in regard of the operation desired @ %s:%d",
478             __func__, file, line));
479         KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
480             ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
481             __func__, file, line));
482         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
483             ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
484             lk->lock_object.lo_name, file, line));
485
486         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
487         if (panicstr != NULL) {
488                 if (flags & LK_INTERLOCK)
489                         class->lc_unlock(ilk);
490                 return (0);
491         }
492
493         if (lk->lock_object.lo_flags & LK_NOSHARE) {
494                 switch (op) {
495                 case LK_SHARED:
496                         op = LK_EXCLUSIVE;
497                         break;
498                 case LK_UPGRADE:
499                 case LK_TRYUPGRADE:
500                 case LK_DOWNGRADE:
501                         _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
502                             file, line);
503                         if (flags & LK_INTERLOCK)
504                                 class->lc_unlock(ilk);
505                         return (0);
506                 }
507         }
508
509         wakeup_swapper = 0;
510         switch (op) {
511         case LK_SHARED:
512                 if (LK_CAN_WITNESS(flags))
513                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
514                             file, line, flags & LK_INTERLOCK ? ilk : NULL);
515                 for (;;) {
516                         x = lk->lk_lock;
517
518                         /*
519                          * If no other thread has an exclusive lock, or
520                          * no exclusive waiter is present, bump the count of
521                          * sharers.  Since we have to preserve the state of
522                          * waiters, if we fail to acquire the shared lock
523                          * loop back and retry.
524                          */
525                         if (LK_CAN_SHARE(x)) {
526                                 if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
527                                     x + LK_ONE_SHARER))
528                                         break;
529                                 continue;
530                         }
531 #ifdef HWPMC_HOOKS
532                         PMC_SOFT_CALL( , , lock, failed);
533 #endif
534                         lock_profile_obtain_lock_failed(&lk->lock_object,
535                             &contested, &waittime);
536
537                         /*
538                          * If the lock is already held by curthread in
539                          * exclusive way avoid a deadlock.
540                          */
541                         if (LK_HOLDER(x) == tid) {
542                                 LOCK_LOG2(lk,
543                                     "%s: %p already held in exclusive mode",
544                                     __func__, lk);
545                                 error = EDEADLK;
546                                 break;
547                         }
548
549                         /*
550                          * If the lock is expected to not sleep just give up
551                          * and return.
552                          */
553                         if (LK_TRYOP(flags)) {
554                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
555                                     __func__, lk);
556                                 error = EBUSY;
557                                 break;
558                         }
559
560 #ifdef ADAPTIVE_LOCKMGRS
561                         /*
562                          * If the owner is running on another CPU, spin until
563                          * the owner stops running or the state of the lock
564                          * changes.  We need a double-state handle here
565                          * because for a failed acquisition the lock can be
566                          * either held in exclusive mode or shared mode
567                          * (for the writer starvation avoidance technique).
568                          */
569                         if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
570                             LK_HOLDER(x) != LK_KERNPROC) {
571                                 owner = (struct thread *)LK_HOLDER(x);
572                                 if (LOCK_LOG_TEST(&lk->lock_object, 0))
573                                         CTR3(KTR_LOCK,
574                                             "%s: spinning on %p held by %p",
575                                             __func__, lk, owner);
576
577                                 /*
578                                  * If we are holding also an interlock drop it
579                                  * in order to avoid a deadlock if the lockmgr
580                                  * owner is adaptively spinning on the
581                                  * interlock itself.
582                                  */
583                                 if (flags & LK_INTERLOCK) {
584                                         class->lc_unlock(ilk);
585                                         flags &= ~LK_INTERLOCK;
586                                 }
587                                 GIANT_SAVE();
588                                 while (LK_HOLDER(lk->lk_lock) ==
589                                     (uintptr_t)owner && TD_IS_RUNNING(owner))
590                                         cpu_spinwait();
591                                 GIANT_RESTORE();
592                                 continue;
593                         } else if (LK_CAN_ADAPT(lk, flags) &&
594                             (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
595                             spintries < alk_retries) {
596                                 if (flags & LK_INTERLOCK) {
597                                         class->lc_unlock(ilk);
598                                         flags &= ~LK_INTERLOCK;
599                                 }
600                                 GIANT_SAVE();
601                                 spintries++;
602                                 for (i = 0; i < alk_loops; i++) {
603                                         if (LOCK_LOG_TEST(&lk->lock_object, 0))
604                                                 CTR4(KTR_LOCK,
605                                     "%s: shared spinning on %p with %u and %u",
606                                                     __func__, lk, spintries, i);
607                                         x = lk->lk_lock;
608                                         if ((x & LK_SHARE) == 0 ||
609                                             LK_CAN_SHARE(x) != 0)
610                                                 break;
611                                         cpu_spinwait();
612                                 }
613                                 GIANT_RESTORE();
614                                 if (i != alk_loops)
615                                         continue;
616                         }
617 #endif
618
619                         /*
620                          * Acquire the sleepqueue chain lock because we
621                          * probabilly will need to manipulate waiters flags.
622                          */
623                         sleepq_lock(&lk->lock_object);
624                         x = lk->lk_lock;
625
626                         /*
627                          * if the lock can be acquired in shared mode, try
628                          * again.
629                          */
630                         if (LK_CAN_SHARE(x)) {
631                                 sleepq_release(&lk->lock_object);
632                                 continue;
633                         }
634
635 #ifdef ADAPTIVE_LOCKMGRS
636                         /*
637                          * The current lock owner might have started executing
638                          * on another CPU (or the lock could have changed
639                          * owner) while we were waiting on the turnstile
640                          * chain lock.  If so, drop the turnstile lock and try
641                          * again.
642                          */
643                         if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
644                             LK_HOLDER(x) != LK_KERNPROC) {
645                                 owner = (struct thread *)LK_HOLDER(x);
646                                 if (TD_IS_RUNNING(owner)) {
647                                         sleepq_release(&lk->lock_object);
648                                         continue;
649                                 }
650                         }
651 #endif
652
653                         /*
654                          * Try to set the LK_SHARED_WAITERS flag.  If we fail,
655                          * loop back and retry.
656                          */
657                         if ((x & LK_SHARED_WAITERS) == 0) {
658                                 if (!atomic_cmpset_acq_ptr(&lk->lk_lock, x,
659                                     x | LK_SHARED_WAITERS)) {
660                                         sleepq_release(&lk->lock_object);
661                                         continue;
662                                 }
663                                 LOCK_LOG2(lk, "%s: %p set shared waiters flag",
664                                     __func__, lk);
665                         }
666
667                         /*
668                          * As far as we have been unable to acquire the
669                          * shared lock and the shared waiters flag is set,
670                          * we will sleep.
671                          */
672                         error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
673                             SQ_SHARED_QUEUE);
674                         flags &= ~LK_INTERLOCK;
675                         if (error) {
676                                 LOCK_LOG3(lk,
677                                     "%s: interrupted sleep for %p with %d",
678                                     __func__, lk, error);
679                                 break;
680                         }
681                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
682                             __func__, lk);
683                 }
684                 if (error == 0) {
685                         lock_profile_obtain_lock_success(&lk->lock_object,
686                             contested, waittime, file, line);
687                         LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file,
688                             line);
689                         WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file,
690                             line);
691                         TD_LOCKS_INC(curthread);
692                         TD_SLOCKS_INC(curthread);
693                         STACK_SAVE(lk);
694                 }
695                 break;
696         case LK_UPGRADE:
697         case LK_TRYUPGRADE:
698                 _lockmgr_assert(lk, KA_SLOCKED, file, line);
699                 v = lk->lk_lock;
700                 x = v & LK_ALL_WAITERS;
701                 v &= LK_EXCLUSIVE_SPINNERS;
702
703                 /*
704                  * Try to switch from one shared lock to an exclusive one.
705                  * We need to preserve waiters flags during the operation.
706                  */
707                 if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
708                     tid | x)) {
709                         LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
710                             line);
711                         WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
712                             LK_TRYWIT(flags), file, line);
713                         TD_SLOCKS_DEC(curthread);
714                         break;
715                 }
716
717                 /*
718                  * In LK_TRYUPGRADE mode, do not drop the lock,
719                  * returning EBUSY instead.
720                  */
721                 if (op == LK_TRYUPGRADE) {
722                         LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
723                             __func__, lk);
724                         error = EBUSY;
725                         break;
726                 }
727
728                 /*
729                  * We have been unable to succeed in upgrading, so just
730                  * give up the shared lock.
731                  */
732                 wakeup_swapper |= wakeupshlk(lk, file, line);
733
734                 /* FALLTHROUGH */
735         case LK_EXCLUSIVE:
736                 if (LK_CAN_WITNESS(flags))
737                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
738                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
739                             ilk : NULL);
740
741                 /*
742                  * If curthread already holds the lock and this one is
743                  * allowed to recurse, simply recurse on it.
744                  */
745                 if (lockmgr_xlocked(lk)) {
746                         if ((flags & LK_CANRECURSE) == 0 &&
747                             (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
748
749                                 /*
750                                  * If the lock is expected to not panic just
751                                  * give up and return.
752                                  */
753                                 if (LK_TRYOP(flags)) {
754                                         LOCK_LOG2(lk,
755                                             "%s: %p fails the try operation",
756                                             __func__, lk);
757                                         error = EBUSY;
758                                         break;
759                                 }
760                                 if (flags & LK_INTERLOCK)
761                                         class->lc_unlock(ilk);
762                 panic("%s: recursing on non recursive lockmgr %s @ %s:%d\n",
763                                     __func__, iwmesg, file, line);
764                         }
765                         lk->lk_recurse++;
766                         LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
767                         LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
768                             lk->lk_recurse, file, line);
769                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
770                             LK_TRYWIT(flags), file, line);
771                         TD_LOCKS_INC(curthread);
772                         break;
773                 }
774
775                 while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED,
776                     tid)) {
777 #ifdef HWPMC_HOOKS
778                         PMC_SOFT_CALL( , , lock, failed);
779 #endif
780                         lock_profile_obtain_lock_failed(&lk->lock_object,
781                             &contested, &waittime);
782
783                         /*
784                          * If the lock is expected to not sleep just give up
785                          * and return.
786                          */
787                         if (LK_TRYOP(flags)) {
788                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
789                                     __func__, lk);
790                                 error = EBUSY;
791                                 break;
792                         }
793
794 #ifdef ADAPTIVE_LOCKMGRS
795                         /*
796                          * If the owner is running on another CPU, spin until
797                          * the owner stops running or the state of the lock
798                          * changes.
799                          */
800                         x = lk->lk_lock;
801                         if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
802                             LK_HOLDER(x) != LK_KERNPROC) {
803                                 owner = (struct thread *)LK_HOLDER(x);
804                                 if (LOCK_LOG_TEST(&lk->lock_object, 0))
805                                         CTR3(KTR_LOCK,
806                                             "%s: spinning on %p held by %p",
807                                             __func__, lk, owner);
808
809                                 /*
810                                  * If we are holding also an interlock drop it
811                                  * in order to avoid a deadlock if the lockmgr
812                                  * owner is adaptively spinning on the
813                                  * interlock itself.
814                                  */
815                                 if (flags & LK_INTERLOCK) {
816                                         class->lc_unlock(ilk);
817                                         flags &= ~LK_INTERLOCK;
818                                 }
819                                 GIANT_SAVE();
820                                 while (LK_HOLDER(lk->lk_lock) ==
821                                     (uintptr_t)owner && TD_IS_RUNNING(owner))
822                                         cpu_spinwait();
823                                 GIANT_RESTORE();
824                                 continue;
825                         } else if (LK_CAN_ADAPT(lk, flags) &&
826                             (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
827                             spintries < alk_retries) {
828                                 if ((x & LK_EXCLUSIVE_SPINNERS) == 0 &&
829                                     !atomic_cmpset_ptr(&lk->lk_lock, x,
830                                     x | LK_EXCLUSIVE_SPINNERS))
831                                         continue;
832                                 if (flags & LK_INTERLOCK) {
833                                         class->lc_unlock(ilk);
834                                         flags &= ~LK_INTERLOCK;
835                                 }
836                                 GIANT_SAVE();
837                                 spintries++;
838                                 for (i = 0; i < alk_loops; i++) {
839                                         if (LOCK_LOG_TEST(&lk->lock_object, 0))
840                                                 CTR4(KTR_LOCK,
841                                     "%s: shared spinning on %p with %u and %u",
842                                                     __func__, lk, spintries, i);
843                                         if ((lk->lk_lock &
844                                             LK_EXCLUSIVE_SPINNERS) == 0)
845                                                 break;
846                                         cpu_spinwait();
847                                 }
848                                 GIANT_RESTORE();
849                                 if (i != alk_loops)
850                                         continue;
851                         }
852 #endif
853
854                         /*
855                          * Acquire the sleepqueue chain lock because we
856                          * probabilly will need to manipulate waiters flags.
857                          */
858                         sleepq_lock(&lk->lock_object);
859                         x = lk->lk_lock;
860
861                         /*
862                          * if the lock has been released while we spun on
863                          * the sleepqueue chain lock just try again.
864                          */
865                         if (x == LK_UNLOCKED) {
866                                 sleepq_release(&lk->lock_object);
867                                 continue;
868                         }
869
870 #ifdef ADAPTIVE_LOCKMGRS
871                         /*
872                          * The current lock owner might have started executing
873                          * on another CPU (or the lock could have changed
874                          * owner) while we were waiting on the turnstile
875                          * chain lock.  If so, drop the turnstile lock and try
876                          * again.
877                          */
878                         if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
879                             LK_HOLDER(x) != LK_KERNPROC) {
880                                 owner = (struct thread *)LK_HOLDER(x);
881                                 if (TD_IS_RUNNING(owner)) {
882                                         sleepq_release(&lk->lock_object);
883                                         continue;
884                                 }
885                         }
886 #endif
887
888                         /*
889                          * The lock can be in the state where there is a
890                          * pending queue of waiters, but still no owner.
891                          * This happens when the lock is contested and an
892                          * owner is going to claim the lock.
893                          * If curthread is the one successfully acquiring it
894                          * claim lock ownership and return, preserving waiters
895                          * flags.
896                          */
897                         v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
898                         if ((x & ~v) == LK_UNLOCKED) {
899                                 v &= ~LK_EXCLUSIVE_SPINNERS;
900                                 if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
901                                     tid | v)) {
902                                         sleepq_release(&lk->lock_object);
903                                         LOCK_LOG2(lk,
904                                             "%s: %p claimed by a new writer",
905                                             __func__, lk);
906                                         break;
907                                 }
908                                 sleepq_release(&lk->lock_object);
909                                 continue;
910                         }
911
912                         /*
913                          * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
914                          * fail, loop back and retry.
915                          */
916                         if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
917                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x,
918                                     x | LK_EXCLUSIVE_WAITERS)) {
919                                         sleepq_release(&lk->lock_object);
920                                         continue;
921                                 }
922                                 LOCK_LOG2(lk, "%s: %p set excl waiters flag",
923                                     __func__, lk);
924                         }
925
926                         /*
927                          * As far as we have been unable to acquire the
928                          * exclusive lock and the exclusive waiters flag
929                          * is set, we will sleep.
930                          */
931                         error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
932                             SQ_EXCLUSIVE_QUEUE);
933                         flags &= ~LK_INTERLOCK;
934                         if (error) {
935                                 LOCK_LOG3(lk,
936                                     "%s: interrupted sleep for %p with %d",
937                                     __func__, lk, error);
938                                 break;
939                         }
940                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
941                             __func__, lk);
942                 }
943                 if (error == 0) {
944                         lock_profile_obtain_lock_success(&lk->lock_object,
945                             contested, waittime, file, line);
946                         LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
947                             lk->lk_recurse, file, line);
948                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
949                             LK_TRYWIT(flags), file, line);
950                         TD_LOCKS_INC(curthread);
951                         STACK_SAVE(lk);
952                 }
953                 break;
954         case LK_DOWNGRADE:
955                 _lockmgr_assert(lk, KA_XLOCKED, file, line);
956                 LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
957                 WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
958
959                 /*
960                  * Panic if the lock is recursed.
961                  */
962                 if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
963                         if (flags & LK_INTERLOCK)
964                                 class->lc_unlock(ilk);
965                         panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
966                             __func__, iwmesg, file, line);
967                 }
968                 TD_SLOCKS_INC(curthread);
969
970                 /*
971                  * In order to preserve waiters flags, just spin.
972                  */
973                 for (;;) {
974                         x = lk->lk_lock;
975                         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
976                         x &= LK_ALL_WAITERS;
977                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
978                             LK_SHARERS_LOCK(1) | x))
979                                 break;
980                         cpu_spinwait();
981                 }
982                 break;
983         case LK_RELEASE:
984                 _lockmgr_assert(lk, KA_LOCKED, file, line);
985                 x = lk->lk_lock;
986
987                 if ((x & LK_SHARE) == 0) {
988
989                         /*
990                          * As first option, treact the lock as if it has not
991                          * any waiter.
992                          * Fix-up the tid var if the lock has been disowned.
993                          */
994                         if (LK_HOLDER(x) == LK_KERNPROC)
995                                 tid = LK_KERNPROC;
996                         else {
997                                 WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE,
998                                     file, line);
999                                 TD_LOCKS_DEC(curthread);
1000                         }
1001                         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0,
1002                             lk->lk_recurse, file, line);
1003
1004                         /*
1005                          * The lock is held in exclusive mode.
1006                          * If the lock is recursed also, then unrecurse it.
1007                          */
1008                         if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1009                                 LOCK_LOG2(lk, "%s: %p unrecursing", __func__,
1010                                     lk);
1011                                 lk->lk_recurse--;
1012                                 break;
1013                         }
1014                         if (tid != LK_KERNPROC)
1015                                 lock_profile_release_lock(&lk->lock_object);
1016
1017                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid,
1018                             LK_UNLOCKED))
1019                                 break;
1020
1021                         sleepq_lock(&lk->lock_object);
1022                         x = lk->lk_lock;
1023                         v = LK_UNLOCKED;
1024
1025                         /*
1026                          * If the lock has exclusive waiters, give them
1027                          * preference in order to avoid deadlock with
1028                          * shared runners up.
1029                          * If interruptible sleeps left the exclusive queue
1030                          * empty avoid a starvation for the threads sleeping
1031                          * on the shared queue by giving them precedence
1032                          * and cleaning up the exclusive waiters bit anyway.
1033                          * Please note that lk_exslpfail count may be lying
1034                          * about the real number of waiters with the
1035                          * LK_SLEEPFAIL flag on because they may be used in
1036                          * conjuction with interruptible sleeps so
1037                          * lk_exslpfail might be considered an 'upper limit'
1038                          * bound, including the edge cases.
1039                          */
1040                         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1041                         realexslp = sleepq_sleepcnt(&lk->lock_object,
1042                             SQ_EXCLUSIVE_QUEUE);
1043                         if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1044                                 if (lk->lk_exslpfail < realexslp) {
1045                                         lk->lk_exslpfail = 0;
1046                                         queue = SQ_EXCLUSIVE_QUEUE;
1047                                         v |= (x & LK_SHARED_WAITERS);
1048                                 } else {
1049                                         lk->lk_exslpfail = 0;
1050                                         LOCK_LOG2(lk,
1051                                         "%s: %p has only LK_SLEEPFAIL sleepers",
1052                                             __func__, lk);
1053                                         LOCK_LOG2(lk,
1054                         "%s: %p waking up threads on the exclusive queue",
1055                                             __func__, lk);
1056                                         wakeup_swapper =
1057                                             sleepq_broadcast(&lk->lock_object,
1058                                             SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1059                                         queue = SQ_SHARED_QUEUE;
1060                                 }
1061                         } else {
1062
1063                                 /*
1064                                  * Exclusive waiters sleeping with LK_SLEEPFAIL
1065                                  * on and using interruptible sleeps/timeout
1066                                  * may have left spourious lk_exslpfail counts
1067                                  * on, so clean it up anyway. 
1068                                  */
1069                                 lk->lk_exslpfail = 0;
1070                                 queue = SQ_SHARED_QUEUE;
1071                         }
1072
1073                         LOCK_LOG3(lk,
1074                             "%s: %p waking up threads on the %s queue",
1075                             __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1076                             "exclusive");
1077                         atomic_store_rel_ptr(&lk->lk_lock, v);
1078                         wakeup_swapper |= sleepq_broadcast(&lk->lock_object,
1079                             SLEEPQ_LK, 0, queue);
1080                         sleepq_release(&lk->lock_object);
1081                         break;
1082                 } else
1083                         wakeup_swapper = wakeupshlk(lk, file, line);
1084                 break;
1085         case LK_DRAIN:
1086                 if (LK_CAN_WITNESS(flags))
1087                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1088                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1089                             ilk : NULL);
1090
1091                 /*
1092                  * Trying to drain a lock we already own will result in a
1093                  * deadlock.
1094                  */
1095                 if (lockmgr_xlocked(lk)) {
1096                         if (flags & LK_INTERLOCK)
1097                                 class->lc_unlock(ilk);
1098                         panic("%s: draining %s with the lock held @ %s:%d\n",
1099                             __func__, iwmesg, file, line);
1100                 }
1101
1102                 while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
1103 #ifdef HWPMC_HOOKS
1104                         PMC_SOFT_CALL( , , lock, failed);
1105 #endif
1106                         lock_profile_obtain_lock_failed(&lk->lock_object,
1107                             &contested, &waittime);
1108
1109                         /*
1110                          * If the lock is expected to not sleep just give up
1111                          * and return.
1112                          */
1113                         if (LK_TRYOP(flags)) {
1114                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
1115                                     __func__, lk);
1116                                 error = EBUSY;
1117                                 break;
1118                         }
1119
1120                         /*
1121                          * Acquire the sleepqueue chain lock because we
1122                          * probabilly will need to manipulate waiters flags.
1123                          */
1124                         sleepq_lock(&lk->lock_object);
1125                         x = lk->lk_lock;
1126
1127                         /*
1128                          * if the lock has been released while we spun on
1129                          * the sleepqueue chain lock just try again.
1130                          */
1131                         if (x == LK_UNLOCKED) {
1132                                 sleepq_release(&lk->lock_object);
1133                                 continue;
1134                         }
1135
1136                         v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1137                         if ((x & ~v) == LK_UNLOCKED) {
1138                                 v = (x & ~LK_EXCLUSIVE_SPINNERS);
1139
1140                                 /*
1141                                  * If interruptible sleeps left the exclusive
1142                                  * queue empty avoid a starvation for the
1143                                  * threads sleeping on the shared queue by
1144                                  * giving them precedence and cleaning up the
1145                                  * exclusive waiters bit anyway.
1146                                  * Please note that lk_exslpfail count may be
1147                                  * lying about the real number of waiters with
1148                                  * the LK_SLEEPFAIL flag on because they may
1149                                  * be used in conjuction with interruptible
1150                                  * sleeps so lk_exslpfail might be considered
1151                                  * an 'upper limit' bound, including the edge
1152                                  * cases.
1153                                  */
1154                                 if (v & LK_EXCLUSIVE_WAITERS) {
1155                                         queue = SQ_EXCLUSIVE_QUEUE;
1156                                         v &= ~LK_EXCLUSIVE_WAITERS;
1157                                 } else {
1158
1159                                         /*
1160                                          * Exclusive waiters sleeping with
1161                                          * LK_SLEEPFAIL on and using
1162                                          * interruptible sleeps/timeout may
1163                                          * have left spourious lk_exslpfail
1164                                          * counts on, so clean it up anyway.
1165                                          */
1166                                         MPASS(v & LK_SHARED_WAITERS);
1167                                         lk->lk_exslpfail = 0;
1168                                         queue = SQ_SHARED_QUEUE;
1169                                         v &= ~LK_SHARED_WAITERS;
1170                                 }
1171                                 if (queue == SQ_EXCLUSIVE_QUEUE) {
1172                                         realexslp =
1173                                             sleepq_sleepcnt(&lk->lock_object,
1174                                             SQ_EXCLUSIVE_QUEUE);
1175                                         if (lk->lk_exslpfail >= realexslp) {
1176                                                 lk->lk_exslpfail = 0;
1177                                                 queue = SQ_SHARED_QUEUE;
1178                                                 v &= ~LK_SHARED_WAITERS;
1179                                                 if (realexslp != 0) {
1180                                                         LOCK_LOG2(lk,
1181                                         "%s: %p has only LK_SLEEPFAIL sleepers",
1182                                                             __func__, lk);
1183                                                         LOCK_LOG2(lk,
1184                         "%s: %p waking up threads on the exclusive queue",
1185                                                             __func__, lk);
1186                                                         wakeup_swapper =
1187                                                             sleepq_broadcast(
1188                                                             &lk->lock_object,
1189                                                             SLEEPQ_LK, 0,
1190                                                             SQ_EXCLUSIVE_QUEUE);
1191                                                 }
1192                                         } else
1193                                                 lk->lk_exslpfail = 0;
1194                                 }
1195                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1196                                         sleepq_release(&lk->lock_object);
1197                                         continue;
1198                                 }
1199                                 LOCK_LOG3(lk,
1200                                 "%s: %p waking up all threads on the %s queue",
1201                                     __func__, lk, queue == SQ_SHARED_QUEUE ?
1202                                     "shared" : "exclusive");
1203                                 wakeup_swapper |= sleepq_broadcast(
1204                                     &lk->lock_object, SLEEPQ_LK, 0, queue);
1205
1206                                 /*
1207                                  * If shared waiters have been woken up we need
1208                                  * to wait for one of them to acquire the lock
1209                                  * before to set the exclusive waiters in
1210                                  * order to avoid a deadlock.
1211                                  */
1212                                 if (queue == SQ_SHARED_QUEUE) {
1213                                         for (v = lk->lk_lock;
1214                                             (v & LK_SHARE) && !LK_SHARERS(v);
1215                                             v = lk->lk_lock)
1216                                                 cpu_spinwait();
1217                                 }
1218                         }
1219
1220                         /*
1221                          * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1222                          * fail, loop back and retry.
1223                          */
1224                         if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1225                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1226                                     x | LK_EXCLUSIVE_WAITERS)) {
1227                                         sleepq_release(&lk->lock_object);
1228                                         continue;
1229                                 }
1230                                 LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1231                                     __func__, lk);
1232                         }
1233
1234                         /*
1235                          * As far as we have been unable to acquire the
1236                          * exclusive lock and the exclusive waiters flag
1237                          * is set, we will sleep.
1238                          */
1239                         if (flags & LK_INTERLOCK) {
1240                                 class->lc_unlock(ilk);
1241                                 flags &= ~LK_INTERLOCK;
1242                         }
1243                         GIANT_SAVE();
1244                         sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1245                             SQ_EXCLUSIVE_QUEUE);
1246                         sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1247                         GIANT_RESTORE();
1248                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1249                             __func__, lk);
1250                 }
1251
1252                 if (error == 0) {
1253                         lock_profile_obtain_lock_success(&lk->lock_object,
1254                             contested, waittime, file, line);
1255                         LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1256                             lk->lk_recurse, file, line);
1257                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1258                             LK_TRYWIT(flags), file, line);
1259                         TD_LOCKS_INC(curthread);
1260                         STACK_SAVE(lk);
1261                 }
1262                 break;
1263         default:
1264                 if (flags & LK_INTERLOCK)
1265                         class->lc_unlock(ilk);
1266                 panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1267         }
1268
1269         if (flags & LK_INTERLOCK)
1270                 class->lc_unlock(ilk);
1271         if (wakeup_swapper)
1272                 kick_proc0();
1273
1274         return (error);
1275 }
1276
1277 void
1278 _lockmgr_disown(struct lock *lk, const char *file, int line)
1279 {
1280         uintptr_t tid, x;
1281
1282         if (SCHEDULER_STOPPED())
1283                 return;
1284
1285         tid = (uintptr_t)curthread;
1286         _lockmgr_assert(lk, KA_XLOCKED, file, line);
1287
1288         /*
1289          * Panic if the lock is recursed.
1290          */
1291         if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1292                 panic("%s: disown a recursed lockmgr @ %s:%d\n",
1293                     __func__,  file, line);
1294
1295         /*
1296          * If the owner is already LK_KERNPROC just skip the whole operation.
1297          */
1298         if (LK_HOLDER(lk->lk_lock) != tid)
1299                 return;
1300         lock_profile_release_lock(&lk->lock_object);
1301         LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1302         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1303         TD_LOCKS_DEC(curthread);
1304         STACK_SAVE(lk);
1305
1306         /*
1307          * In order to preserve waiters flags, just spin.
1308          */
1309         for (;;) {
1310                 x = lk->lk_lock;
1311                 MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1312                 x &= LK_ALL_WAITERS;
1313                 if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1314                     LK_KERNPROC | x))
1315                         return;
1316                 cpu_spinwait();
1317         }
1318 }
1319
1320 void
1321 lockmgr_printinfo(const struct lock *lk)
1322 {
1323         struct thread *td;
1324         uintptr_t x;
1325
1326         if (lk->lk_lock == LK_UNLOCKED)
1327                 printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1328         else if (lk->lk_lock & LK_SHARE)
1329                 printf("lock type %s: SHARED (count %ju)\n",
1330                     lk->lock_object.lo_name,
1331                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1332         else {
1333                 td = lockmgr_xholder(lk);
1334                 printf("lock type %s: EXCL by thread %p "
1335                     "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name, td,
1336                     td->td_proc->p_pid, td->td_proc->p_comm, td->td_tid);
1337         }
1338
1339         x = lk->lk_lock;
1340         if (x & LK_EXCLUSIVE_WAITERS)
1341                 printf(" with exclusive waiters pending\n");
1342         if (x & LK_SHARED_WAITERS)
1343                 printf(" with shared waiters pending\n");
1344         if (x & LK_EXCLUSIVE_SPINNERS)
1345                 printf(" with exclusive spinners pending\n");
1346
1347         STACK_PRINT(lk);
1348 }
1349
1350 int
1351 lockstatus(const struct lock *lk)
1352 {
1353         uintptr_t v, x;
1354         int ret;
1355
1356         ret = LK_SHARED;
1357         x = lk->lk_lock;
1358         v = LK_HOLDER(x);
1359
1360         if ((x & LK_SHARE) == 0) {
1361                 if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1362                         ret = LK_EXCLUSIVE;
1363                 else
1364                         ret = LK_EXCLOTHER;
1365         } else if (x == LK_UNLOCKED)
1366                 ret = 0;
1367
1368         return (ret);
1369 }
1370
1371 #ifdef INVARIANT_SUPPORT
1372
1373 FEATURE(invariant_support,
1374     "Support for modules compiled with INVARIANTS option");
1375
1376 #ifndef INVARIANTS
1377 #undef  _lockmgr_assert
1378 #endif
1379
1380 void
1381 _lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1382 {
1383         int slocked = 0;
1384
1385         if (panicstr != NULL)
1386                 return;
1387         switch (what) {
1388         case KA_SLOCKED:
1389         case KA_SLOCKED | KA_NOTRECURSED:
1390         case KA_SLOCKED | KA_RECURSED:
1391                 slocked = 1;
1392         case KA_LOCKED:
1393         case KA_LOCKED | KA_NOTRECURSED:
1394         case KA_LOCKED | KA_RECURSED:
1395 #ifdef WITNESS
1396
1397                 /*
1398                  * We cannot trust WITNESS if the lock is held in exclusive
1399                  * mode and a call to lockmgr_disown() happened.
1400                  * Workaround this skipping the check if the lock is held in
1401                  * exclusive mode even for the KA_LOCKED case.
1402                  */
1403                 if (slocked || (lk->lk_lock & LK_SHARE)) {
1404                         witness_assert(&lk->lock_object, what, file, line);
1405                         break;
1406                 }
1407 #endif
1408                 if (lk->lk_lock == LK_UNLOCKED ||
1409                     ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1410                     (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1411                         panic("Lock %s not %slocked @ %s:%d\n",
1412                             lk->lock_object.lo_name, slocked ? "share" : "",
1413                             file, line);
1414
1415                 if ((lk->lk_lock & LK_SHARE) == 0) {
1416                         if (lockmgr_recursed(lk)) {
1417                                 if (what & KA_NOTRECURSED)
1418                                         panic("Lock %s recursed @ %s:%d\n",
1419                                             lk->lock_object.lo_name, file,
1420                                             line);
1421                         } else if (what & KA_RECURSED)
1422                                 panic("Lock %s not recursed @ %s:%d\n",
1423                                     lk->lock_object.lo_name, file, line);
1424                 }
1425                 break;
1426         case KA_XLOCKED:
1427         case KA_XLOCKED | KA_NOTRECURSED:
1428         case KA_XLOCKED | KA_RECURSED:
1429                 if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1430                         panic("Lock %s not exclusively locked @ %s:%d\n",
1431                             lk->lock_object.lo_name, file, line);
1432                 if (lockmgr_recursed(lk)) {
1433                         if (what & KA_NOTRECURSED)
1434                                 panic("Lock %s recursed @ %s:%d\n",
1435                                     lk->lock_object.lo_name, file, line);
1436                 } else if (what & KA_RECURSED)
1437                         panic("Lock %s not recursed @ %s:%d\n",
1438                             lk->lock_object.lo_name, file, line);
1439                 break;
1440         case KA_UNLOCKED:
1441                 if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1442                         panic("Lock %s exclusively locked @ %s:%d\n",
1443                             lk->lock_object.lo_name, file, line);
1444                 break;
1445         default:
1446                 panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1447                     line);
1448         }
1449 }
1450 #endif
1451
1452 #ifdef DDB
1453 int
1454 lockmgr_chain(struct thread *td, struct thread **ownerp)
1455 {
1456         struct lock *lk;
1457
1458         lk = td->td_wchan;
1459
1460         if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1461                 return (0);
1462         db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1463         if (lk->lk_lock & LK_SHARE)
1464                 db_printf("SHARED (count %ju)\n",
1465                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1466         else
1467                 db_printf("EXCL\n");
1468         *ownerp = lockmgr_xholder(lk);
1469
1470         return (1);
1471 }
1472
1473 static void
1474 db_show_lockmgr(const struct lock_object *lock)
1475 {
1476         struct thread *td;
1477         const struct lock *lk;
1478
1479         lk = (const struct lock *)lock;
1480
1481         db_printf(" state: ");
1482         if (lk->lk_lock == LK_UNLOCKED)
1483                 db_printf("UNLOCKED\n");
1484         else if (lk->lk_lock & LK_SHARE)
1485                 db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1486         else {
1487                 td = lockmgr_xholder(lk);
1488                 if (td == (struct thread *)LK_KERNPROC)
1489                         db_printf("XLOCK: LK_KERNPROC\n");
1490                 else
1491                         db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1492                             td->td_tid, td->td_proc->p_pid,
1493                             td->td_proc->p_comm);
1494                 if (lockmgr_recursed(lk))
1495                         db_printf(" recursed: %d\n", lk->lk_recurse);
1496         }
1497         db_printf(" waiters: ");
1498         switch (lk->lk_lock & LK_ALL_WAITERS) {
1499         case LK_SHARED_WAITERS:
1500                 db_printf("shared\n");
1501                 break;
1502         case LK_EXCLUSIVE_WAITERS:
1503                 db_printf("exclusive\n");
1504                 break;
1505         case LK_ALL_WAITERS:
1506                 db_printf("shared and exclusive\n");
1507                 break;
1508         default:
1509                 db_printf("none\n");
1510         }
1511         db_printf(" spinners: ");
1512         if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1513                 db_printf("exclusive\n");
1514         else
1515                 db_printf("none\n");
1516 }
1517 #endif