]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_lock.c
MFC r353776 (dim):
[FreeBSD/FreeBSD.git] / sys / kern / kern_lock.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28  * DAMAGE.
29  */
30
31 #include "opt_ddb.h"
32 #include "opt_hwpmc_hooks.h"
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include <sys/param.h>
38 #include <sys/kdb.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/lock_profile.h>
42 #include <sys/lockmgr.h>
43 #include <sys/lockstat.h>
44 #include <sys/mutex.h>
45 #include <sys/proc.h>
46 #include <sys/sleepqueue.h>
47 #ifdef DEBUG_LOCKS
48 #include <sys/stack.h>
49 #endif
50 #include <sys/sysctl.h>
51 #include <sys/systm.h>
52
53 #include <machine/cpu.h>
54
55 #ifdef DDB
56 #include <ddb/ddb.h>
57 #endif
58
59 #ifdef HWPMC_HOOKS
60 #include <sys/pmckern.h>
61 PMC_SOFT_DECLARE( , , lock, failed);
62 #endif
63
64 CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
65     (LK_ADAPTIVE | LK_NOSHARE));
66 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
67     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
68
69 #define SQ_EXCLUSIVE_QUEUE      0
70 #define SQ_SHARED_QUEUE         1
71
72 #ifndef INVARIANTS
73 #define _lockmgr_assert(lk, what, file, line)
74 #endif
75
76 #define TD_SLOCKS_INC(td)       ((td)->td_lk_slocks++)
77 #define TD_SLOCKS_DEC(td)       ((td)->td_lk_slocks--)
78
79 #ifndef DEBUG_LOCKS
80 #define STACK_PRINT(lk)
81 #define STACK_SAVE(lk)
82 #define STACK_ZERO(lk)
83 #else
84 #define STACK_PRINT(lk) stack_print_ddb(&(lk)->lk_stack)
85 #define STACK_SAVE(lk)  stack_save(&(lk)->lk_stack)
86 #define STACK_ZERO(lk)  stack_zero(&(lk)->lk_stack)
87 #endif
88
89 #define LOCK_LOG2(lk, string, arg1, arg2)                               \
90         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
91                 CTR2(KTR_LOCK, (string), (arg1), (arg2))
92 #define LOCK_LOG3(lk, string, arg1, arg2, arg3)                         \
93         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
94                 CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
95
96 #define GIANT_DECLARE                                                   \
97         int _i = 0;                                                     \
98         WITNESS_SAVE_DECL(Giant)
99 #define GIANT_RESTORE() do {                                            \
100         if (_i > 0) {                                                   \
101                 while (_i--)                                            \
102                         mtx_lock(&Giant);                               \
103                 WITNESS_RESTORE(&Giant.lock_object, Giant);             \
104         }                                                               \
105 } while (0)
106 #define GIANT_SAVE() do {                                               \
107         if (mtx_owned(&Giant)) {                                        \
108                 WITNESS_SAVE(&Giant.lock_object, Giant);                \
109                 while (mtx_owned(&Giant)) {                             \
110                         _i++;                                           \
111                         mtx_unlock(&Giant);                             \
112                 }                                                       \
113         }                                                               \
114 } while (0)
115
116 static bool __always_inline
117 LK_CAN_SHARE(uintptr_t x, int flags, bool fp)
118 {
119
120         if ((x & (LK_SHARE | LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) ==
121             LK_SHARE)
122                 return (true);
123         if (fp || (!(x & LK_SHARE)))
124                 return (false);
125         if ((curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||
126             (curthread->td_pflags & TDP_DEADLKTREAT))
127                 return (true);
128         return (false);
129 }
130
131 #define LK_TRYOP(x)                                                     \
132         ((x) & LK_NOWAIT)
133
134 #define LK_CAN_WITNESS(x)                                               \
135         (((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
136 #define LK_TRYWIT(x)                                                    \
137         (LK_TRYOP(x) ? LOP_TRYLOCK : 0)
138
139 #define LK_CAN_ADAPT(lk, f)                                             \
140         (((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&             \
141         ((f) & LK_SLEEPFAIL) == 0)
142
143 #define lockmgr_disowned(lk)                                            \
144         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
145
146 #define lockmgr_xlocked_v(v)                                            \
147         (((v) & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
148
149 #define lockmgr_xlocked(lk) lockmgr_xlocked_v((lk)->lk_lock)
150
151 static void     assert_lockmgr(const struct lock_object *lock, int how);
152 #ifdef DDB
153 static void     db_show_lockmgr(const struct lock_object *lock);
154 #endif
155 static void     lock_lockmgr(struct lock_object *lock, uintptr_t how);
156 #ifdef KDTRACE_HOOKS
157 static int      owner_lockmgr(const struct lock_object *lock,
158                     struct thread **owner);
159 #endif
160 static uintptr_t unlock_lockmgr(struct lock_object *lock);
161
162 struct lock_class lock_class_lockmgr = {
163         .lc_name = "lockmgr",
164         .lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
165         .lc_assert = assert_lockmgr,
166 #ifdef DDB
167         .lc_ddb_show = db_show_lockmgr,
168 #endif
169         .lc_lock = lock_lockmgr,
170         .lc_unlock = unlock_lockmgr,
171 #ifdef KDTRACE_HOOKS
172         .lc_owner = owner_lockmgr,
173 #endif
174 };
175
176 struct lockmgr_wait {
177         const char *iwmesg;
178         int ipri;
179         int itimo;
180 };
181
182 static bool __always_inline lockmgr_slock_try(struct lock *lk, uintptr_t *xp,
183     int flags, bool fp);
184 static bool __always_inline lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp);
185
186 static void
187 lockmgr_exit(u_int flags, struct lock_object *ilk, int wakeup_swapper)
188 {
189         struct lock_class *class;
190
191         if (flags & LK_INTERLOCK) {
192                 class = LOCK_CLASS(ilk);
193                 class->lc_unlock(ilk);
194         }
195
196         if (__predict_false(wakeup_swapper))
197                 kick_proc0();
198 }
199
200 static void
201 lockmgr_note_shared_acquire(struct lock *lk, int contested,
202     uint64_t waittime, const char *file, int line, int flags)
203 {
204
205         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
206             waittime, file, line, LOCKSTAT_READER);
207         LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line);
208         WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line);
209         TD_LOCKS_INC(curthread);
210         TD_SLOCKS_INC(curthread);
211         STACK_SAVE(lk);
212 }
213
214 static void
215 lockmgr_note_shared_release(struct lock *lk, const char *file, int line)
216 {
217
218         LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_READER);
219         WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
220         LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
221         TD_LOCKS_DEC(curthread);
222         TD_SLOCKS_DEC(curthread);
223 }
224
225 static void
226 lockmgr_note_exclusive_acquire(struct lock *lk, int contested,
227     uint64_t waittime, const char *file, int line, int flags)
228 {
229
230         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
231             waittime, file, line, LOCKSTAT_WRITER);
232         LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
233         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file,
234             line);
235         TD_LOCKS_INC(curthread);
236         STACK_SAVE(lk);
237 }
238
239 static void
240 lockmgr_note_exclusive_release(struct lock *lk, const char *file, int line)
241 {
242
243         LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_WRITER);
244         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file,
245             line);
246         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
247         TD_LOCKS_DEC(curthread);
248 }
249
250 static __inline struct thread *
251 lockmgr_xholder(const struct lock *lk)
252 {
253         uintptr_t x;
254
255         x = lk->lk_lock;
256         return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
257 }
258
259 /*
260  * It assumes sleepq_lock held and returns with this one unheld.
261  * It also assumes the generic interlock is sane and previously checked.
262  * If LK_INTERLOCK is specified the interlock is not reacquired after the
263  * sleep.
264  */
265 static __inline int
266 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
267     const char *wmesg, int pri, int timo, int queue)
268 {
269         GIANT_DECLARE;
270         struct lock_class *class;
271         int catch, error;
272
273         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
274         catch = pri & PCATCH;
275         pri &= PRIMASK;
276         error = 0;
277
278         LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
279             (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
280
281         if (flags & LK_INTERLOCK)
282                 class->lc_unlock(ilk);
283         if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
284                 lk->lk_exslpfail++;
285         GIANT_SAVE();
286         sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
287             SLEEPQ_INTERRUPTIBLE : 0), queue);
288         if ((flags & LK_TIMELOCK) && timo)
289                 sleepq_set_timeout(&lk->lock_object, timo);
290
291         /*
292          * Decisional switch for real sleeping.
293          */
294         if ((flags & LK_TIMELOCK) && timo && catch)
295                 error = sleepq_timedwait_sig(&lk->lock_object, pri);
296         else if ((flags & LK_TIMELOCK) && timo)
297                 error = sleepq_timedwait(&lk->lock_object, pri);
298         else if (catch)
299                 error = sleepq_wait_sig(&lk->lock_object, pri);
300         else
301                 sleepq_wait(&lk->lock_object, pri);
302         GIANT_RESTORE();
303         if ((flags & LK_SLEEPFAIL) && error == 0)
304                 error = ENOLCK;
305
306         return (error);
307 }
308
309 static __inline int
310 wakeupshlk(struct lock *lk, const char *file, int line)
311 {
312         uintptr_t v, x, orig_x;
313         u_int realexslp;
314         int queue, wakeup_swapper;
315
316         wakeup_swapper = 0;
317         for (;;) {
318                 x = lk->lk_lock;
319                 if (lockmgr_sunlock_try(lk, &x))
320                         break;
321
322                 /*
323                  * We should have a sharer with waiters, so enter the hard
324                  * path in order to handle wakeups correctly.
325                  */
326                 sleepq_lock(&lk->lock_object);
327                 orig_x = lk->lk_lock;
328 retry_sleepq:
329                 x = orig_x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
330                 v = LK_UNLOCKED;
331
332                 /*
333                  * If the lock has exclusive waiters, give them preference in
334                  * order to avoid deadlock with shared runners up.
335                  * If interruptible sleeps left the exclusive queue empty
336                  * avoid a starvation for the threads sleeping on the shared
337                  * queue by giving them precedence and cleaning up the
338                  * exclusive waiters bit anyway.
339                  * Please note that lk_exslpfail count may be lying about
340                  * the real number of waiters with the LK_SLEEPFAIL flag on
341                  * because they may be used in conjunction with interruptible
342                  * sleeps so lk_exslpfail might be considered an 'upper limit'
343                  * bound, including the edge cases.
344                  */
345                 realexslp = sleepq_sleepcnt(&lk->lock_object,
346                     SQ_EXCLUSIVE_QUEUE);
347                 if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
348                         if (lk->lk_exslpfail < realexslp) {
349                                 lk->lk_exslpfail = 0;
350                                 queue = SQ_EXCLUSIVE_QUEUE;
351                                 v |= (x & LK_SHARED_WAITERS);
352                         } else {
353                                 lk->lk_exslpfail = 0;
354                                 LOCK_LOG2(lk,
355                                     "%s: %p has only LK_SLEEPFAIL sleepers",
356                                     __func__, lk);
357                                 LOCK_LOG2(lk,
358                             "%s: %p waking up threads on the exclusive queue",
359                                     __func__, lk);
360                                 wakeup_swapper =
361                                     sleepq_broadcast(&lk->lock_object,
362                                     SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
363                                 queue = SQ_SHARED_QUEUE;
364                         }
365                                 
366                 } else {
367
368                         /*
369                          * Exclusive waiters sleeping with LK_SLEEPFAIL on
370                          * and using interruptible sleeps/timeout may have
371                          * left spourious lk_exslpfail counts on, so clean
372                          * it up anyway.
373                          */
374                         lk->lk_exslpfail = 0;
375                         queue = SQ_SHARED_QUEUE;
376                 }
377
378                 if (lockmgr_sunlock_try(lk, &orig_x)) {
379                         sleepq_release(&lk->lock_object);
380                         break;
381                 }
382
383                 x |= LK_SHARERS_LOCK(1);
384                 if (!atomic_fcmpset_rel_ptr(&lk->lk_lock, &x, v)) {
385                         orig_x = x;
386                         goto retry_sleepq;
387                 }
388                 LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
389                     __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
390                     "exclusive");
391                 wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
392                     0, queue);
393                 sleepq_release(&lk->lock_object);
394                 break;
395         }
396
397         lockmgr_note_shared_release(lk, file, line);
398         return (wakeup_swapper);
399 }
400
401 static void
402 assert_lockmgr(const struct lock_object *lock, int what)
403 {
404
405         panic("lockmgr locks do not support assertions");
406 }
407
408 static void
409 lock_lockmgr(struct lock_object *lock, uintptr_t how)
410 {
411
412         panic("lockmgr locks do not support sleep interlocking");
413 }
414
415 static uintptr_t
416 unlock_lockmgr(struct lock_object *lock)
417 {
418
419         panic("lockmgr locks do not support sleep interlocking");
420 }
421
422 #ifdef KDTRACE_HOOKS
423 static int
424 owner_lockmgr(const struct lock_object *lock, struct thread **owner)
425 {
426
427         panic("lockmgr locks do not support owner inquiring");
428 }
429 #endif
430
431 void
432 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
433 {
434         int iflags;
435
436         MPASS((flags & ~LK_INIT_MASK) == 0);
437         ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
438             ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
439             &lk->lk_lock));
440
441         iflags = LO_SLEEPABLE | LO_UPGRADABLE;
442         if (flags & LK_CANRECURSE)
443                 iflags |= LO_RECURSABLE;
444         if ((flags & LK_NODUP) == 0)
445                 iflags |= LO_DUPOK;
446         if (flags & LK_NOPROFILE)
447                 iflags |= LO_NOPROFILE;
448         if ((flags & LK_NOWITNESS) == 0)
449                 iflags |= LO_WITNESS;
450         if (flags & LK_QUIET)
451                 iflags |= LO_QUIET;
452         if (flags & LK_IS_VNODE)
453                 iflags |= LO_IS_VNODE;
454         iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
455
456         lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
457         lk->lk_lock = LK_UNLOCKED;
458         lk->lk_recurse = 0;
459         lk->lk_exslpfail = 0;
460         lk->lk_timo = timo;
461         lk->lk_pri = pri;
462         STACK_ZERO(lk);
463 }
464
465 /*
466  * XXX: Gross hacks to manipulate external lock flags after
467  * initialization.  Used for certain vnode and buf locks.
468  */
469 void
470 lockallowshare(struct lock *lk)
471 {
472
473         lockmgr_assert(lk, KA_XLOCKED);
474         lk->lock_object.lo_flags &= ~LK_NOSHARE;
475 }
476
477 void
478 lockdisableshare(struct lock *lk)
479 {
480
481         lockmgr_assert(lk, KA_XLOCKED);
482         lk->lock_object.lo_flags |= LK_NOSHARE;
483 }
484
485 void
486 lockallowrecurse(struct lock *lk)
487 {
488
489         lockmgr_assert(lk, KA_XLOCKED);
490         lk->lock_object.lo_flags |= LO_RECURSABLE;
491 }
492
493 void
494 lockdisablerecurse(struct lock *lk)
495 {
496
497         lockmgr_assert(lk, KA_XLOCKED);
498         lk->lock_object.lo_flags &= ~LO_RECURSABLE;
499 }
500
501 void
502 lockdestroy(struct lock *lk)
503 {
504
505         KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
506         KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
507         KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
508         lock_destroy(&lk->lock_object);
509 }
510
511 static bool __always_inline
512 lockmgr_slock_try(struct lock *lk, uintptr_t *xp, int flags, bool fp)
513 {
514
515         /*
516          * If no other thread has an exclusive lock, or
517          * no exclusive waiter is present, bump the count of
518          * sharers.  Since we have to preserve the state of
519          * waiters, if we fail to acquire the shared lock
520          * loop back and retry.
521          */
522         *xp = lk->lk_lock;
523         while (LK_CAN_SHARE(*xp, flags, fp)) {
524                 if (atomic_fcmpset_acq_ptr(&lk->lk_lock, xp,
525                     *xp + LK_ONE_SHARER)) {
526                         return (true);
527                 }
528         }
529         return (false);
530 }
531
532 static bool __always_inline
533 lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp)
534 {
535
536         for (;;) {
537                 if (LK_SHARERS(*xp) > 1 || !(*xp & LK_ALL_WAITERS)) {
538                         if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
539                             *xp - LK_ONE_SHARER))
540                                 return (true);
541                         continue;
542                 }
543                 break;
544         }
545         return (false);
546 }
547
548 static __noinline int
549 lockmgr_slock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
550     const char *file, int line, struct lockmgr_wait *lwa)
551 {
552         uintptr_t tid, x;
553         int error = 0;
554         const char *iwmesg;
555         int ipri, itimo;
556
557 #ifdef KDTRACE_HOOKS
558         uint64_t sleep_time = 0;
559 #endif
560 #ifdef LOCK_PROFILING
561         uint64_t waittime = 0;
562         int contested = 0;
563 #endif
564
565         if (__predict_false(panicstr != NULL))
566                 goto out;
567
568         tid = (uintptr_t)curthread;
569
570         if (LK_CAN_WITNESS(flags))
571                 WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
572                     file, line, flags & LK_INTERLOCK ? ilk : NULL);
573         for (;;) {
574                 if (lockmgr_slock_try(lk, &x, flags, false))
575                         break;
576 #ifdef HWPMC_HOOKS
577                 PMC_SOFT_CALL( , , lock, failed);
578 #endif
579                 lock_profile_obtain_lock_failed(&lk->lock_object,
580                     &contested, &waittime);
581
582                 /*
583                  * If the lock is already held by curthread in
584                  * exclusive way avoid a deadlock.
585                  */
586                 if (LK_HOLDER(x) == tid) {
587                         LOCK_LOG2(lk,
588                             "%s: %p already held in exclusive mode",
589                             __func__, lk);
590                         error = EDEADLK;
591                         break;
592                 }
593
594                 /*
595                  * If the lock is expected to not sleep just give up
596                  * and return.
597                  */
598                 if (LK_TRYOP(flags)) {
599                         LOCK_LOG2(lk, "%s: %p fails the try operation",
600                             __func__, lk);
601                         error = EBUSY;
602                         break;
603                 }
604
605                 /*
606                  * Acquire the sleepqueue chain lock because we
607                  * probabilly will need to manipulate waiters flags.
608                  */
609                 sleepq_lock(&lk->lock_object);
610                 x = lk->lk_lock;
611 retry_sleepq:
612
613                 /*
614                  * if the lock can be acquired in shared mode, try
615                  * again.
616                  */
617                 if (LK_CAN_SHARE(x, flags, false)) {
618                         sleepq_release(&lk->lock_object);
619                         continue;
620                 }
621
622                 /*
623                  * Try to set the LK_SHARED_WAITERS flag.  If we fail,
624                  * loop back and retry.
625                  */
626                 if ((x & LK_SHARED_WAITERS) == 0) {
627                         if (!atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
628                             x | LK_SHARED_WAITERS)) {
629                                 goto retry_sleepq;
630                         }
631                         LOCK_LOG2(lk, "%s: %p set shared waiters flag",
632                             __func__, lk);
633                 }
634
635                 if (lwa == NULL) {
636                         iwmesg = lk->lock_object.lo_name;
637                         ipri = lk->lk_pri;
638                         itimo = lk->lk_timo;
639                 } else {
640                         iwmesg = lwa->iwmesg;
641                         ipri = lwa->ipri;
642                         itimo = lwa->itimo;
643                 }
644
645                 /*
646                  * As far as we have been unable to acquire the
647                  * shared lock and the shared waiters flag is set,
648                  * we will sleep.
649                  */
650 #ifdef KDTRACE_HOOKS
651                 sleep_time -= lockstat_nsecs(&lk->lock_object);
652 #endif
653                 error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
654                     SQ_SHARED_QUEUE);
655 #ifdef KDTRACE_HOOKS
656                 sleep_time += lockstat_nsecs(&lk->lock_object);
657 #endif
658                 flags &= ~LK_INTERLOCK;
659                 if (error) {
660                         LOCK_LOG3(lk,
661                             "%s: interrupted sleep for %p with %d",
662                             __func__, lk, error);
663                         break;
664                 }
665                 LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
666                     __func__, lk);
667         }
668         if (error == 0) {
669 #ifdef KDTRACE_HOOKS
670                 if (sleep_time != 0)
671                         LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
672                             LOCKSTAT_READER, (x & LK_SHARE) == 0,
673                             (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
674 #endif
675 #ifdef LOCK_PROFILING
676                 lockmgr_note_shared_acquire(lk, contested, waittime,
677                     file, line, flags);
678 #else
679                 lockmgr_note_shared_acquire(lk, 0, 0, file, line,
680                     flags);
681 #endif
682         }
683
684 out:
685         lockmgr_exit(flags, ilk, 0);
686         return (error);
687 }
688
689 static __noinline int
690 lockmgr_xlock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
691     const char *file, int line, struct lockmgr_wait *lwa)
692 {
693         struct lock_class *class;
694         uintptr_t tid, x, v;
695         int error = 0;
696         const char *iwmesg;
697         int ipri, itimo;
698
699 #ifdef KDTRACE_HOOKS
700         uint64_t sleep_time = 0;
701 #endif
702 #ifdef LOCK_PROFILING
703         uint64_t waittime = 0;
704         int contested = 0;
705 #endif
706
707         if (__predict_false(panicstr != NULL))
708                 goto out;
709
710         tid = (uintptr_t)curthread;
711
712         if (LK_CAN_WITNESS(flags))
713                 WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
714                     LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
715                     ilk : NULL);
716
717         /*
718          * If curthread already holds the lock and this one is
719          * allowed to recurse, simply recurse on it.
720          */
721         if (lockmgr_xlocked(lk)) {
722                 if ((flags & LK_CANRECURSE) == 0 &&
723                     (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
724                         /*
725                          * If the lock is expected to not panic just
726                          * give up and return.
727                          */
728                         if (LK_TRYOP(flags)) {
729                                 LOCK_LOG2(lk,
730                                     "%s: %p fails the try operation",
731                                     __func__, lk);
732                                 error = EBUSY;
733                                 goto out;
734                         }
735                         if (flags & LK_INTERLOCK) {
736                                 class = LOCK_CLASS(ilk);
737                                 class->lc_unlock(ilk);
738                         }
739                         panic("%s: recursing on non recursive lockmgr %p "
740                             "@ %s:%d\n", __func__, lk, file, line);
741                 }
742                 lk->lk_recurse++;
743                 LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
744                 LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
745                     lk->lk_recurse, file, line);
746                 WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
747                     LK_TRYWIT(flags), file, line);
748                 TD_LOCKS_INC(curthread);
749                 goto out;
750         }
751
752         for (;;) {
753                 if (lk->lk_lock == LK_UNLOCKED &&
754                     atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
755                         break;
756 #ifdef HWPMC_HOOKS
757                 PMC_SOFT_CALL( , , lock, failed);
758 #endif
759                 lock_profile_obtain_lock_failed(&lk->lock_object,
760                     &contested, &waittime);
761
762                 /*
763                  * If the lock is expected to not sleep just give up
764                  * and return.
765                  */
766                 if (LK_TRYOP(flags)) {
767                         LOCK_LOG2(lk, "%s: %p fails the try operation",
768                             __func__, lk);
769                         error = EBUSY;
770                         break;
771                 }
772
773                 /*
774                  * Acquire the sleepqueue chain lock because we
775                  * probabilly will need to manipulate waiters flags.
776                  */
777                 sleepq_lock(&lk->lock_object);
778                 x = lk->lk_lock;
779 retry_sleepq:
780
781                 /*
782                  * if the lock has been released while we spun on
783                  * the sleepqueue chain lock just try again.
784                  */
785                 if (x == LK_UNLOCKED) {
786                         sleepq_release(&lk->lock_object);
787                         continue;
788                 }
789
790                 /*
791                  * The lock can be in the state where there is a
792                  * pending queue of waiters, but still no owner.
793                  * This happens when the lock is contested and an
794                  * owner is going to claim the lock.
795                  * If curthread is the one successfully acquiring it
796                  * claim lock ownership and return, preserving waiters
797                  * flags.
798                  */
799                 v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
800                 if ((x & ~v) == LK_UNLOCKED) {
801                         v &= ~LK_EXCLUSIVE_SPINNERS;
802                         if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
803                             tid | v)) {
804                                 sleepq_release(&lk->lock_object);
805                                 LOCK_LOG2(lk,
806                                     "%s: %p claimed by a new writer",
807                                     __func__, lk);
808                                 break;
809                         }
810                         goto retry_sleepq;
811                 }
812
813                 /*
814                  * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
815                  * fail, loop back and retry.
816                  */
817                 if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
818                         if (!atomic_fcmpset_ptr(&lk->lk_lock, &x,
819                             x | LK_EXCLUSIVE_WAITERS)) {
820                                 goto retry_sleepq;
821                         }
822                         LOCK_LOG2(lk, "%s: %p set excl waiters flag",
823                             __func__, lk);
824                 }
825
826                 if (lwa == NULL) {
827                         iwmesg = lk->lock_object.lo_name;
828                         ipri = lk->lk_pri;
829                         itimo = lk->lk_timo;
830                 } else {
831                         iwmesg = lwa->iwmesg;
832                         ipri = lwa->ipri;
833                         itimo = lwa->itimo;
834                 }
835
836                 /*
837                  * As far as we have been unable to acquire the
838                  * exclusive lock and the exclusive waiters flag
839                  * is set, we will sleep.
840                  */
841 #ifdef KDTRACE_HOOKS
842                 sleep_time -= lockstat_nsecs(&lk->lock_object);
843 #endif
844                 error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
845                     SQ_EXCLUSIVE_QUEUE);
846 #ifdef KDTRACE_HOOKS
847                 sleep_time += lockstat_nsecs(&lk->lock_object);
848 #endif
849                 flags &= ~LK_INTERLOCK;
850                 if (error) {
851                         LOCK_LOG3(lk,
852                             "%s: interrupted sleep for %p with %d",
853                             __func__, lk, error);
854                         break;
855                 }
856                 LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
857                     __func__, lk);
858         }
859         if (error == 0) {
860 #ifdef KDTRACE_HOOKS
861                 if (sleep_time != 0)
862                         LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
863                             LOCKSTAT_WRITER, (x & LK_SHARE) == 0,
864                             (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
865 #endif
866 #ifdef LOCK_PROFILING
867                 lockmgr_note_exclusive_acquire(lk, contested, waittime,
868                     file, line, flags);
869 #else
870                 lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
871                     flags);
872 #endif
873         }
874
875 out:
876         lockmgr_exit(flags, ilk, 0);
877         return (error);
878 }
879
880 static __noinline int
881 lockmgr_upgrade(struct lock *lk, u_int flags, struct lock_object *ilk,
882     const char *file, int line, struct lockmgr_wait *lwa)
883 {
884         uintptr_t tid, x, v;
885         int error = 0;
886         int wakeup_swapper = 0;
887         int op;
888
889         if (__predict_false(panicstr != NULL))
890                 goto out;
891
892         tid = (uintptr_t)curthread;
893
894         _lockmgr_assert(lk, KA_SLOCKED, file, line);
895         v = lk->lk_lock;
896         x = v & LK_ALL_WAITERS;
897         v &= LK_EXCLUSIVE_SPINNERS;
898
899         /*
900          * Try to switch from one shared lock to an exclusive one.
901          * We need to preserve waiters flags during the operation.
902          */
903         if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
904             tid | x)) {
905                 LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
906                     line);
907                 WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
908                     LK_TRYWIT(flags), file, line);
909                 LOCKSTAT_RECORD0(lockmgr__upgrade, lk);
910                 TD_SLOCKS_DEC(curthread);
911                 goto out;
912         }
913
914         op = flags & LK_TYPE_MASK;
915
916         /*
917          * In LK_TRYUPGRADE mode, do not drop the lock,
918          * returning EBUSY instead.
919          */
920         if (op == LK_TRYUPGRADE) {
921                 LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
922                     __func__, lk);
923                 error = EBUSY;
924                 goto out;
925         }
926
927         /*
928          * We have been unable to succeed in upgrading, so just
929          * give up the shared lock.
930          */
931         wakeup_swapper |= wakeupshlk(lk, file, line);
932         error = lockmgr_xlock_hard(lk, flags, ilk, file, line, lwa);
933         flags &= ~LK_INTERLOCK;
934 out:
935         lockmgr_exit(flags, ilk, wakeup_swapper);
936         return (error);
937 }
938
939 int
940 lockmgr_lock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk,
941     const char *file, int line)
942 {
943         struct lock_class *class;
944         uintptr_t x, tid;
945         u_int op;
946         bool locked;
947
948         if (__predict_false(panicstr != NULL))
949                 return (0);
950
951         op = flags & LK_TYPE_MASK;
952         locked = false;
953         switch (op) {
954         case LK_SHARED:
955                 if (LK_CAN_WITNESS(flags))
956                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
957                             file, line, flags & LK_INTERLOCK ? ilk : NULL);
958                 if (__predict_false(lk->lock_object.lo_flags & LK_NOSHARE))
959                         break;
960                 if (lockmgr_slock_try(lk, &x, flags, true)) {
961                         lockmgr_note_shared_acquire(lk, 0, 0,
962                             file, line, flags);
963                         locked = true;
964                 } else {
965                         return (lockmgr_slock_hard(lk, flags, ilk, file, line,
966                             NULL));
967                 }
968                 break;
969         case LK_EXCLUSIVE:
970                 if (LK_CAN_WITNESS(flags))
971                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
972                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
973                             ilk : NULL);
974                 tid = (uintptr_t)curthread;
975                 if (lk->lk_lock == LK_UNLOCKED &&
976                     atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
977                         lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
978                             flags);
979                         locked = true;
980                 } else {
981                         return (lockmgr_xlock_hard(lk, flags, ilk, file, line,
982                             NULL));
983                 }
984                 break;
985         case LK_UPGRADE:
986         case LK_TRYUPGRADE:
987                 return (lockmgr_upgrade(lk, flags, ilk, file, line, NULL));
988         default:
989                 break;
990         }
991         if (__predict_true(locked)) {
992                 if (__predict_false(flags & LK_INTERLOCK)) {
993                         class = LOCK_CLASS(ilk);
994                         class->lc_unlock(ilk);
995                 }
996                 return (0);
997         } else {
998                 return (__lockmgr_args(lk, flags, ilk, LK_WMESG_DEFAULT,
999                     LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, file, line));
1000         }
1001 }
1002
1003 static __noinline int
1004 lockmgr_sunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
1005     const char *file, int line)
1006
1007 {
1008         int wakeup_swapper = 0;
1009
1010         if (__predict_false(panicstr != NULL))
1011                 goto out;
1012
1013         wakeup_swapper = wakeupshlk(lk, file, line);
1014
1015 out:
1016         lockmgr_exit(flags, ilk, wakeup_swapper);
1017         return (0);
1018 }
1019
1020 static __noinline int
1021 lockmgr_xunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
1022     const char *file, int line)
1023 {
1024         uintptr_t tid, v;
1025         int wakeup_swapper = 0;
1026         u_int realexslp;
1027         int queue;
1028
1029         if (__predict_false(panicstr != NULL))
1030                 goto out;
1031
1032         tid = (uintptr_t)curthread;
1033
1034         /*
1035          * As first option, treact the lock as if it has not
1036          * any waiter.
1037          * Fix-up the tid var if the lock has been disowned.
1038          */
1039         if (LK_HOLDER(x) == LK_KERNPROC)
1040                 tid = LK_KERNPROC;
1041         else {
1042                 WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1043                 TD_LOCKS_DEC(curthread);
1044         }
1045         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
1046
1047         /*
1048          * The lock is held in exclusive mode.
1049          * If the lock is recursed also, then unrecurse it.
1050          */
1051         if (lockmgr_xlocked_v(x) && lockmgr_recursed(lk)) {
1052                 LOCK_LOG2(lk, "%s: %p unrecursing", __func__, lk);
1053                 lk->lk_recurse--;
1054                 goto out;
1055         }
1056         if (tid != LK_KERNPROC)
1057                 LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk,
1058                     LOCKSTAT_WRITER);
1059
1060         if (x == tid && atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED))
1061                 goto out;
1062
1063         sleepq_lock(&lk->lock_object);
1064         x = lk->lk_lock;
1065         v = LK_UNLOCKED;
1066
1067         /*
1068          * If the lock has exclusive waiters, give them
1069          * preference in order to avoid deadlock with
1070          * shared runners up.
1071          * If interruptible sleeps left the exclusive queue
1072          * empty avoid a starvation for the threads sleeping
1073          * on the shared queue by giving them precedence
1074          * and cleaning up the exclusive waiters bit anyway.
1075          * Please note that lk_exslpfail count may be lying
1076          * about the real number of waiters with the
1077          * LK_SLEEPFAIL flag on because they may be used in
1078          * conjunction with interruptible sleeps so
1079          * lk_exslpfail might be considered an 'upper limit'
1080          * bound, including the edge cases.
1081          */
1082         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1083         realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE);
1084         if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1085                 if (lk->lk_exslpfail < realexslp) {
1086                         lk->lk_exslpfail = 0;
1087                         queue = SQ_EXCLUSIVE_QUEUE;
1088                         v |= (x & LK_SHARED_WAITERS);
1089                 } else {
1090                         lk->lk_exslpfail = 0;
1091                         LOCK_LOG2(lk,
1092                             "%s: %p has only LK_SLEEPFAIL sleepers",
1093                             __func__, lk);
1094                         LOCK_LOG2(lk,
1095                             "%s: %p waking up threads on the exclusive queue",
1096                             __func__, lk);
1097                         wakeup_swapper = sleepq_broadcast(&lk->lock_object,
1098                             SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1099                         queue = SQ_SHARED_QUEUE;
1100                 }
1101         } else {
1102
1103                 /*
1104                  * Exclusive waiters sleeping with LK_SLEEPFAIL
1105                  * on and using interruptible sleeps/timeout
1106                  * may have left spourious lk_exslpfail counts
1107                  * on, so clean it up anyway.
1108                  */
1109                 lk->lk_exslpfail = 0;
1110                 queue = SQ_SHARED_QUEUE;
1111         }
1112
1113         LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
1114             __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1115             "exclusive");
1116         atomic_store_rel_ptr(&lk->lk_lock, v);
1117         wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
1118         sleepq_release(&lk->lock_object);
1119
1120 out:
1121         lockmgr_exit(flags, ilk, wakeup_swapper);
1122         return (0);
1123 }
1124
1125 int
1126 lockmgr_unlock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk)
1127 {
1128         struct lock_class *class;
1129         uintptr_t x, tid;
1130         const char *file;
1131         int line;
1132
1133         if (__predict_false(panicstr != NULL))
1134                 return (0);
1135
1136         file = __FILE__;
1137         line = __LINE__;
1138
1139         _lockmgr_assert(lk, KA_LOCKED, file, line);
1140         x = lk->lk_lock;
1141         if (__predict_true(x & LK_SHARE) != 0) {
1142                 if (lockmgr_sunlock_try(lk, &x)) {
1143                         lockmgr_note_shared_release(lk, file, line);
1144                 } else {
1145                         return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1146                 }
1147         } else {
1148                 tid = (uintptr_t)curthread;
1149                 if (!lockmgr_recursed(lk) &&
1150                     atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) {
1151                         lockmgr_note_exclusive_release(lk, file, line);
1152                 } else {
1153                         return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1154                 }
1155         }
1156         if (__predict_false(flags & LK_INTERLOCK)) {
1157                 class = LOCK_CLASS(ilk);
1158                 class->lc_unlock(ilk);
1159         }
1160         return (0);
1161 }
1162
1163 int
1164 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
1165     const char *wmesg, int pri, int timo, const char *file, int line)
1166 {
1167         GIANT_DECLARE;
1168         struct lockmgr_wait lwa;
1169         struct lock_class *class;
1170         const char *iwmesg;
1171         uintptr_t tid, v, x;
1172         u_int op, realexslp;
1173         int error, ipri, itimo, queue, wakeup_swapper;
1174 #ifdef LOCK_PROFILING
1175         uint64_t waittime = 0;
1176         int contested = 0;
1177 #endif
1178
1179         if (panicstr != NULL)
1180                 return (0);
1181
1182         error = 0;
1183         tid = (uintptr_t)curthread;
1184         op = (flags & LK_TYPE_MASK);
1185         iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
1186         ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
1187         itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
1188
1189         lwa.iwmesg = iwmesg;
1190         lwa.ipri = ipri;
1191         lwa.itimo = itimo;
1192
1193         MPASS((flags & ~LK_TOTAL_MASK) == 0);
1194         KASSERT((op & (op - 1)) == 0,
1195             ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
1196         KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
1197             (op != LK_DOWNGRADE && op != LK_RELEASE),
1198             ("%s: Invalid flags in regard of the operation desired @ %s:%d",
1199             __func__, file, line));
1200         KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
1201             ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
1202             __func__, file, line));
1203         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
1204             ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
1205             lk->lock_object.lo_name, file, line));
1206
1207         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
1208
1209         if (lk->lock_object.lo_flags & LK_NOSHARE) {
1210                 switch (op) {
1211                 case LK_SHARED:
1212                         op = LK_EXCLUSIVE;
1213                         break;
1214                 case LK_UPGRADE:
1215                 case LK_TRYUPGRADE:
1216                 case LK_DOWNGRADE:
1217                         _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
1218                             file, line);
1219                         if (flags & LK_INTERLOCK)
1220                                 class->lc_unlock(ilk);
1221                         return (0);
1222                 }
1223         }
1224
1225         wakeup_swapper = 0;
1226         switch (op) {
1227         case LK_SHARED:
1228                 return (lockmgr_slock_hard(lk, flags, ilk, file, line, &lwa));
1229                 break;
1230         case LK_UPGRADE:
1231         case LK_TRYUPGRADE:
1232                 return (lockmgr_upgrade(lk, flags, ilk, file, line, &lwa));
1233                 break;
1234         case LK_EXCLUSIVE:
1235                 return (lockmgr_xlock_hard(lk, flags, ilk, file, line, &lwa));
1236                 break;
1237         case LK_DOWNGRADE:
1238                 _lockmgr_assert(lk, KA_XLOCKED, file, line);
1239                 WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
1240
1241                 /*
1242                  * Panic if the lock is recursed.
1243                  */
1244                 if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1245                         if (flags & LK_INTERLOCK)
1246                                 class->lc_unlock(ilk);
1247                         panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
1248                             __func__, iwmesg, file, line);
1249                 }
1250                 TD_SLOCKS_INC(curthread);
1251
1252                 /*
1253                  * In order to preserve waiters flags, just spin.
1254                  */
1255                 for (;;) {
1256                         x = lk->lk_lock;
1257                         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1258                         x &= LK_ALL_WAITERS;
1259                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1260                             LK_SHARERS_LOCK(1) | x))
1261                                 break;
1262                         cpu_spinwait();
1263                 }
1264                 LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
1265                 LOCKSTAT_RECORD0(lockmgr__downgrade, lk);
1266                 break;
1267         case LK_RELEASE:
1268                 _lockmgr_assert(lk, KA_LOCKED, file, line);
1269                 x = lk->lk_lock;
1270
1271                 if (__predict_true(x & LK_SHARE) != 0) {
1272                         return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1273                 } else {
1274                         return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1275                 }
1276                 break;
1277         case LK_DRAIN:
1278                 if (LK_CAN_WITNESS(flags))
1279                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1280                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1281                             ilk : NULL);
1282
1283                 /*
1284                  * Trying to drain a lock we already own will result in a
1285                  * deadlock.
1286                  */
1287                 if (lockmgr_xlocked(lk)) {
1288                         if (flags & LK_INTERLOCK)
1289                                 class->lc_unlock(ilk);
1290                         panic("%s: draining %s with the lock held @ %s:%d\n",
1291                             __func__, iwmesg, file, line);
1292                 }
1293
1294                 for (;;) {
1295                         if (lk->lk_lock == LK_UNLOCKED &&
1296                             atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
1297                                 break;
1298
1299 #ifdef HWPMC_HOOKS
1300                         PMC_SOFT_CALL( , , lock, failed);
1301 #endif
1302                         lock_profile_obtain_lock_failed(&lk->lock_object,
1303                             &contested, &waittime);
1304
1305                         /*
1306                          * If the lock is expected to not sleep just give up
1307                          * and return.
1308                          */
1309                         if (LK_TRYOP(flags)) {
1310                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
1311                                     __func__, lk);
1312                                 error = EBUSY;
1313                                 break;
1314                         }
1315
1316                         /*
1317                          * Acquire the sleepqueue chain lock because we
1318                          * probabilly will need to manipulate waiters flags.
1319                          */
1320                         sleepq_lock(&lk->lock_object);
1321                         x = lk->lk_lock;
1322
1323                         /*
1324                          * if the lock has been released while we spun on
1325                          * the sleepqueue chain lock just try again.
1326                          */
1327                         if (x == LK_UNLOCKED) {
1328                                 sleepq_release(&lk->lock_object);
1329                                 continue;
1330                         }
1331
1332                         v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1333                         if ((x & ~v) == LK_UNLOCKED) {
1334                                 v = (x & ~LK_EXCLUSIVE_SPINNERS);
1335
1336                                 /*
1337                                  * If interruptible sleeps left the exclusive
1338                                  * queue empty avoid a starvation for the
1339                                  * threads sleeping on the shared queue by
1340                                  * giving them precedence and cleaning up the
1341                                  * exclusive waiters bit anyway.
1342                                  * Please note that lk_exslpfail count may be
1343                                  * lying about the real number of waiters with
1344                                  * the LK_SLEEPFAIL flag on because they may
1345                                  * be used in conjunction with interruptible
1346                                  * sleeps so lk_exslpfail might be considered
1347                                  * an 'upper limit' bound, including the edge
1348                                  * cases.
1349                                  */
1350                                 if (v & LK_EXCLUSIVE_WAITERS) {
1351                                         queue = SQ_EXCLUSIVE_QUEUE;
1352                                         v &= ~LK_EXCLUSIVE_WAITERS;
1353                                 } else {
1354
1355                                         /*
1356                                          * Exclusive waiters sleeping with
1357                                          * LK_SLEEPFAIL on and using
1358                                          * interruptible sleeps/timeout may
1359                                          * have left spourious lk_exslpfail
1360                                          * counts on, so clean it up anyway.
1361                                          */
1362                                         MPASS(v & LK_SHARED_WAITERS);
1363                                         lk->lk_exslpfail = 0;
1364                                         queue = SQ_SHARED_QUEUE;
1365                                         v &= ~LK_SHARED_WAITERS;
1366                                 }
1367                                 if (queue == SQ_EXCLUSIVE_QUEUE) {
1368                                         realexslp =
1369                                             sleepq_sleepcnt(&lk->lock_object,
1370                                             SQ_EXCLUSIVE_QUEUE);
1371                                         if (lk->lk_exslpfail >= realexslp) {
1372                                                 lk->lk_exslpfail = 0;
1373                                                 queue = SQ_SHARED_QUEUE;
1374                                                 v &= ~LK_SHARED_WAITERS;
1375                                                 if (realexslp != 0) {
1376                                                         LOCK_LOG2(lk,
1377                                         "%s: %p has only LK_SLEEPFAIL sleepers",
1378                                                             __func__, lk);
1379                                                         LOCK_LOG2(lk,
1380                         "%s: %p waking up threads on the exclusive queue",
1381                                                             __func__, lk);
1382                                                         wakeup_swapper =
1383                                                             sleepq_broadcast(
1384                                                             &lk->lock_object,
1385                                                             SLEEPQ_LK, 0,
1386                                                             SQ_EXCLUSIVE_QUEUE);
1387                                                 }
1388                                         } else
1389                                                 lk->lk_exslpfail = 0;
1390                                 }
1391                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1392                                         sleepq_release(&lk->lock_object);
1393                                         continue;
1394                                 }
1395                                 LOCK_LOG3(lk,
1396                                 "%s: %p waking up all threads on the %s queue",
1397                                     __func__, lk, queue == SQ_SHARED_QUEUE ?
1398                                     "shared" : "exclusive");
1399                                 wakeup_swapper |= sleepq_broadcast(
1400                                     &lk->lock_object, SLEEPQ_LK, 0, queue);
1401
1402                                 /*
1403                                  * If shared waiters have been woken up we need
1404                                  * to wait for one of them to acquire the lock
1405                                  * before to set the exclusive waiters in
1406                                  * order to avoid a deadlock.
1407                                  */
1408                                 if (queue == SQ_SHARED_QUEUE) {
1409                                         for (v = lk->lk_lock;
1410                                             (v & LK_SHARE) && !LK_SHARERS(v);
1411                                             v = lk->lk_lock)
1412                                                 cpu_spinwait();
1413                                 }
1414                         }
1415
1416                         /*
1417                          * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1418                          * fail, loop back and retry.
1419                          */
1420                         if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1421                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1422                                     x | LK_EXCLUSIVE_WAITERS)) {
1423                                         sleepq_release(&lk->lock_object);
1424                                         continue;
1425                                 }
1426                                 LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1427                                     __func__, lk);
1428                         }
1429
1430                         /*
1431                          * As far as we have been unable to acquire the
1432                          * exclusive lock and the exclusive waiters flag
1433                          * is set, we will sleep.
1434                          */
1435                         if (flags & LK_INTERLOCK) {
1436                                 class->lc_unlock(ilk);
1437                                 flags &= ~LK_INTERLOCK;
1438                         }
1439                         GIANT_SAVE();
1440                         sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1441                             SQ_EXCLUSIVE_QUEUE);
1442                         sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1443                         GIANT_RESTORE();
1444                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1445                             __func__, lk);
1446                 }
1447
1448                 if (error == 0) {
1449                         lock_profile_obtain_lock_success(&lk->lock_object,
1450                             contested, waittime, file, line);
1451                         LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1452                             lk->lk_recurse, file, line);
1453                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1454                             LK_TRYWIT(flags), file, line);
1455                         TD_LOCKS_INC(curthread);
1456                         STACK_SAVE(lk);
1457                 }
1458                 break;
1459         default:
1460                 if (flags & LK_INTERLOCK)
1461                         class->lc_unlock(ilk);
1462                 panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1463         }
1464
1465         if (flags & LK_INTERLOCK)
1466                 class->lc_unlock(ilk);
1467         if (wakeup_swapper)
1468                 kick_proc0();
1469
1470         return (error);
1471 }
1472
1473 void
1474 _lockmgr_disown(struct lock *lk, const char *file, int line)
1475 {
1476         uintptr_t tid, x;
1477
1478         if (SCHEDULER_STOPPED())
1479                 return;
1480
1481         tid = (uintptr_t)curthread;
1482         _lockmgr_assert(lk, KA_XLOCKED, file, line);
1483
1484         /*
1485          * Panic if the lock is recursed.
1486          */
1487         if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1488                 panic("%s: disown a recursed lockmgr @ %s:%d\n",
1489                     __func__,  file, line);
1490
1491         /*
1492          * If the owner is already LK_KERNPROC just skip the whole operation.
1493          */
1494         if (LK_HOLDER(lk->lk_lock) != tid)
1495                 return;
1496         lock_profile_release_lock(&lk->lock_object);
1497         LOCKSTAT_RECORD1(lockmgr__disown, lk, LOCKSTAT_WRITER);
1498         LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1499         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1500         TD_LOCKS_DEC(curthread);
1501         STACK_SAVE(lk);
1502
1503         /*
1504          * In order to preserve waiters flags, just spin.
1505          */
1506         for (;;) {
1507                 x = lk->lk_lock;
1508                 MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1509                 x &= LK_ALL_WAITERS;
1510                 if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1511                     LK_KERNPROC | x))
1512                         return;
1513                 cpu_spinwait();
1514         }
1515 }
1516
1517 void
1518 lockmgr_printinfo(const struct lock *lk)
1519 {
1520         struct thread *td;
1521         uintptr_t x;
1522
1523         if (lk->lk_lock == LK_UNLOCKED)
1524                 printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1525         else if (lk->lk_lock & LK_SHARE)
1526                 printf("lock type %s: SHARED (count %ju)\n",
1527                     lk->lock_object.lo_name,
1528                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1529         else {
1530                 td = lockmgr_xholder(lk);
1531                 if (td == (struct thread *)LK_KERNPROC)
1532                         printf("lock type %s: EXCL by KERNPROC\n",
1533                             lk->lock_object.lo_name);
1534                 else
1535                         printf("lock type %s: EXCL by thread %p "
1536                             "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1537                             td, td->td_proc->p_pid, td->td_proc->p_comm,
1538                             td->td_tid);
1539         }
1540
1541         x = lk->lk_lock;
1542         if (x & LK_EXCLUSIVE_WAITERS)
1543                 printf(" with exclusive waiters pending\n");
1544         if (x & LK_SHARED_WAITERS)
1545                 printf(" with shared waiters pending\n");
1546         if (x & LK_EXCLUSIVE_SPINNERS)
1547                 printf(" with exclusive spinners pending\n");
1548
1549         STACK_PRINT(lk);
1550 }
1551
1552 int
1553 lockstatus(const struct lock *lk)
1554 {
1555         uintptr_t v, x;
1556         int ret;
1557
1558         ret = LK_SHARED;
1559         x = lk->lk_lock;
1560         v = LK_HOLDER(x);
1561
1562         if ((x & LK_SHARE) == 0) {
1563                 if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1564                         ret = LK_EXCLUSIVE;
1565                 else
1566                         ret = LK_EXCLOTHER;
1567         } else if (x == LK_UNLOCKED)
1568                 ret = 0;
1569
1570         return (ret);
1571 }
1572
1573 #ifdef INVARIANT_SUPPORT
1574
1575 FEATURE(invariant_support,
1576     "Support for modules compiled with INVARIANTS option");
1577
1578 #ifndef INVARIANTS
1579 #undef  _lockmgr_assert
1580 #endif
1581
1582 void
1583 _lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1584 {
1585         int slocked = 0;
1586
1587         if (panicstr != NULL)
1588                 return;
1589         switch (what) {
1590         case KA_SLOCKED:
1591         case KA_SLOCKED | KA_NOTRECURSED:
1592         case KA_SLOCKED | KA_RECURSED:
1593                 slocked = 1;
1594         case KA_LOCKED:
1595         case KA_LOCKED | KA_NOTRECURSED:
1596         case KA_LOCKED | KA_RECURSED:
1597 #ifdef WITNESS
1598
1599                 /*
1600                  * We cannot trust WITNESS if the lock is held in exclusive
1601                  * mode and a call to lockmgr_disown() happened.
1602                  * Workaround this skipping the check if the lock is held in
1603                  * exclusive mode even for the KA_LOCKED case.
1604                  */
1605                 if (slocked || (lk->lk_lock & LK_SHARE)) {
1606                         witness_assert(&lk->lock_object, what, file, line);
1607                         break;
1608                 }
1609 #endif
1610                 if (lk->lk_lock == LK_UNLOCKED ||
1611                     ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1612                     (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1613                         panic("Lock %s not %slocked @ %s:%d\n",
1614                             lk->lock_object.lo_name, slocked ? "share" : "",
1615                             file, line);
1616
1617                 if ((lk->lk_lock & LK_SHARE) == 0) {
1618                         if (lockmgr_recursed(lk)) {
1619                                 if (what & KA_NOTRECURSED)
1620                                         panic("Lock %s recursed @ %s:%d\n",
1621                                             lk->lock_object.lo_name, file,
1622                                             line);
1623                         } else if (what & KA_RECURSED)
1624                                 panic("Lock %s not recursed @ %s:%d\n",
1625                                     lk->lock_object.lo_name, file, line);
1626                 }
1627                 break;
1628         case KA_XLOCKED:
1629         case KA_XLOCKED | KA_NOTRECURSED:
1630         case KA_XLOCKED | KA_RECURSED:
1631                 if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1632                         panic("Lock %s not exclusively locked @ %s:%d\n",
1633                             lk->lock_object.lo_name, file, line);
1634                 if (lockmgr_recursed(lk)) {
1635                         if (what & KA_NOTRECURSED)
1636                                 panic("Lock %s recursed @ %s:%d\n",
1637                                     lk->lock_object.lo_name, file, line);
1638                 } else if (what & KA_RECURSED)
1639                         panic("Lock %s not recursed @ %s:%d\n",
1640                             lk->lock_object.lo_name, file, line);
1641                 break;
1642         case KA_UNLOCKED:
1643                 if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1644                         panic("Lock %s exclusively locked @ %s:%d\n",
1645                             lk->lock_object.lo_name, file, line);
1646                 break;
1647         default:
1648                 panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1649                     line);
1650         }
1651 }
1652 #endif
1653
1654 #ifdef DDB
1655 int
1656 lockmgr_chain(struct thread *td, struct thread **ownerp)
1657 {
1658         struct lock *lk;
1659
1660         lk = td->td_wchan;
1661
1662         if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1663                 return (0);
1664         db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1665         if (lk->lk_lock & LK_SHARE)
1666                 db_printf("SHARED (count %ju)\n",
1667                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1668         else
1669                 db_printf("EXCL\n");
1670         *ownerp = lockmgr_xholder(lk);
1671
1672         return (1);
1673 }
1674
1675 static void
1676 db_show_lockmgr(const struct lock_object *lock)
1677 {
1678         struct thread *td;
1679         const struct lock *lk;
1680
1681         lk = (const struct lock *)lock;
1682
1683         db_printf(" state: ");
1684         if (lk->lk_lock == LK_UNLOCKED)
1685                 db_printf("UNLOCKED\n");
1686         else if (lk->lk_lock & LK_SHARE)
1687                 db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1688         else {
1689                 td = lockmgr_xholder(lk);
1690                 if (td == (struct thread *)LK_KERNPROC)
1691                         db_printf("XLOCK: LK_KERNPROC\n");
1692                 else
1693                         db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1694                             td->td_tid, td->td_proc->p_pid,
1695                             td->td_proc->p_comm);
1696                 if (lockmgr_recursed(lk))
1697                         db_printf(" recursed: %d\n", lk->lk_recurse);
1698         }
1699         db_printf(" waiters: ");
1700         switch (lk->lk_lock & LK_ALL_WAITERS) {
1701         case LK_SHARED_WAITERS:
1702                 db_printf("shared\n");
1703                 break;
1704         case LK_EXCLUSIVE_WAITERS:
1705                 db_printf("exclusive\n");
1706                 break;
1707         case LK_ALL_WAITERS:
1708                 db_printf("shared and exclusive\n");
1709                 break;
1710         default:
1711                 db_printf("none\n");
1712         }
1713         db_printf(" spinners: ");
1714         if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1715                 db_printf("exclusive\n");
1716         else
1717                 db_printf("none\n");
1718 }
1719 #endif