]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_lock.c
Import riscv DTS files
[FreeBSD/FreeBSD.git] / sys / kern / kern_lock.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28  * DAMAGE.
29  */
30
31 #include "opt_ddb.h"
32 #include "opt_hwpmc_hooks.h"
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include <sys/param.h>
38 #include <sys/kdb.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/lock_profile.h>
42 #include <sys/lockmgr.h>
43 #include <sys/lockstat.h>
44 #include <sys/mutex.h>
45 #include <sys/proc.h>
46 #include <sys/sleepqueue.h>
47 #ifdef DEBUG_LOCKS
48 #include <sys/stack.h>
49 #endif
50 #include <sys/sysctl.h>
51 #include <sys/systm.h>
52
53 #include <machine/cpu.h>
54
55 #ifdef DDB
56 #include <ddb/ddb.h>
57 #endif
58
59 #ifdef HWPMC_HOOKS
60 #include <sys/pmckern.h>
61 PMC_SOFT_DECLARE( , , lock, failed);
62 #endif
63
64 CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
65     (LK_ADAPTIVE | LK_NOSHARE));
66 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
67     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
68
69 #define SQ_EXCLUSIVE_QUEUE      0
70 #define SQ_SHARED_QUEUE         1
71
72 #ifndef INVARIANTS
73 #define _lockmgr_assert(lk, what, file, line)
74 #endif
75
76 #define TD_SLOCKS_INC(td)       ((td)->td_lk_slocks++)
77 #define TD_SLOCKS_DEC(td)       ((td)->td_lk_slocks--)
78
79 #ifndef DEBUG_LOCKS
80 #define STACK_PRINT(lk)
81 #define STACK_SAVE(lk)
82 #define STACK_ZERO(lk)
83 #else
84 #define STACK_PRINT(lk) stack_print_ddb(&(lk)->lk_stack)
85 #define STACK_SAVE(lk)  stack_save(&(lk)->lk_stack)
86 #define STACK_ZERO(lk)  stack_zero(&(lk)->lk_stack)
87 #endif
88
89 #define LOCK_LOG2(lk, string, arg1, arg2)                               \
90         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
91                 CTR2(KTR_LOCK, (string), (arg1), (arg2))
92 #define LOCK_LOG3(lk, string, arg1, arg2, arg3)                         \
93         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
94                 CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
95
96 #define GIANT_DECLARE                                                   \
97         int _i = 0;                                                     \
98         WITNESS_SAVE_DECL(Giant)
99 #define GIANT_RESTORE() do {                                            \
100         if (__predict_false(_i > 0)) {                                  \
101                 while (_i--)                                            \
102                         mtx_lock(&Giant);                               \
103                 WITNESS_RESTORE(&Giant.lock_object, Giant);             \
104         }                                                               \
105 } while (0)
106 #define GIANT_SAVE() do {                                               \
107         if (__predict_false(mtx_owned(&Giant))) {                       \
108                 WITNESS_SAVE(&Giant.lock_object, Giant);                \
109                 while (mtx_owned(&Giant)) {                             \
110                         _i++;                                           \
111                         mtx_unlock(&Giant);                             \
112                 }                                                       \
113         }                                                               \
114 } while (0)
115
116 static bool __always_inline
117 LK_CAN_SHARE(uintptr_t x, int flags, bool fp)
118 {
119
120         if ((x & (LK_SHARE | LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) ==
121             LK_SHARE)
122                 return (true);
123         if (fp || (!(x & LK_SHARE)))
124                 return (false);
125         if ((curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||
126             (curthread->td_pflags & TDP_DEADLKTREAT))
127                 return (true);
128         return (false);
129 }
130
131 #define LK_TRYOP(x)                                                     \
132         ((x) & LK_NOWAIT)
133
134 #define LK_CAN_WITNESS(x)                                               \
135         (((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
136 #define LK_TRYWIT(x)                                                    \
137         (LK_TRYOP(x) ? LOP_TRYLOCK : 0)
138
139 #define LK_CAN_ADAPT(lk, f)                                             \
140         (((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&             \
141         ((f) & LK_SLEEPFAIL) == 0)
142
143 #define lockmgr_disowned(lk)                                            \
144         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
145
146 #define lockmgr_xlocked_v(v)                                            \
147         (((v) & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
148
149 #define lockmgr_xlocked(lk) lockmgr_xlocked_v((lk)->lk_lock)
150
151 static void     assert_lockmgr(const struct lock_object *lock, int how);
152 #ifdef DDB
153 static void     db_show_lockmgr(const struct lock_object *lock);
154 #endif
155 static void     lock_lockmgr(struct lock_object *lock, uintptr_t how);
156 #ifdef KDTRACE_HOOKS
157 static int      owner_lockmgr(const struct lock_object *lock,
158                     struct thread **owner);
159 #endif
160 static uintptr_t unlock_lockmgr(struct lock_object *lock);
161
162 struct lock_class lock_class_lockmgr = {
163         .lc_name = "lockmgr",
164         .lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
165         .lc_assert = assert_lockmgr,
166 #ifdef DDB
167         .lc_ddb_show = db_show_lockmgr,
168 #endif
169         .lc_lock = lock_lockmgr,
170         .lc_unlock = unlock_lockmgr,
171 #ifdef KDTRACE_HOOKS
172         .lc_owner = owner_lockmgr,
173 #endif
174 };
175
176 struct lockmgr_wait {
177         const char *iwmesg;
178         int ipri;
179         int itimo;
180 };
181
182 static bool __always_inline lockmgr_slock_try(struct lock *lk, uintptr_t *xp,
183     int flags, bool fp);
184 static bool __always_inline lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp);
185
186 static void
187 lockmgr_exit(u_int flags, struct lock_object *ilk, int wakeup_swapper)
188 {
189         struct lock_class *class;
190
191         if (flags & LK_INTERLOCK) {
192                 class = LOCK_CLASS(ilk);
193                 class->lc_unlock(ilk);
194         }
195
196         if (__predict_false(wakeup_swapper))
197                 kick_proc0();
198 }
199
200 static void
201 lockmgr_note_shared_acquire(struct lock *lk, int contested,
202     uint64_t waittime, const char *file, int line, int flags)
203 {
204
205         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
206             waittime, file, line, LOCKSTAT_READER);
207         LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line);
208         WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line);
209         TD_LOCKS_INC(curthread);
210         TD_SLOCKS_INC(curthread);
211         STACK_SAVE(lk);
212 }
213
214 static void
215 lockmgr_note_shared_release(struct lock *lk, const char *file, int line)
216 {
217
218         LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_READER);
219         WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
220         LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
221         TD_LOCKS_DEC(curthread);
222         TD_SLOCKS_DEC(curthread);
223 }
224
225 static void
226 lockmgr_note_exclusive_acquire(struct lock *lk, int contested,
227     uint64_t waittime, const char *file, int line, int flags)
228 {
229
230         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
231             waittime, file, line, LOCKSTAT_WRITER);
232         LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
233         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file,
234             line);
235         TD_LOCKS_INC(curthread);
236         STACK_SAVE(lk);
237 }
238
239 static void
240 lockmgr_note_exclusive_release(struct lock *lk, const char *file, int line)
241 {
242
243         LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_WRITER);
244         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file,
245             line);
246         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
247         TD_LOCKS_DEC(curthread);
248 }
249
250 static __inline struct thread *
251 lockmgr_xholder(const struct lock *lk)
252 {
253         uintptr_t x;
254
255         x = lk->lk_lock;
256         return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
257 }
258
259 /*
260  * It assumes sleepq_lock held and returns with this one unheld.
261  * It also assumes the generic interlock is sane and previously checked.
262  * If LK_INTERLOCK is specified the interlock is not reacquired after the
263  * sleep.
264  */
265 static __inline int
266 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
267     const char *wmesg, int pri, int timo, int queue)
268 {
269         GIANT_DECLARE;
270         struct lock_class *class;
271         int catch, error;
272
273         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
274         catch = pri & PCATCH;
275         pri &= PRIMASK;
276         error = 0;
277
278         LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
279             (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
280
281         if (flags & LK_INTERLOCK)
282                 class->lc_unlock(ilk);
283         if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
284                 lk->lk_exslpfail++;
285         GIANT_SAVE();
286         sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
287             SLEEPQ_INTERRUPTIBLE : 0), queue);
288         if ((flags & LK_TIMELOCK) && timo)
289                 sleepq_set_timeout(&lk->lock_object, timo);
290
291         /*
292          * Decisional switch for real sleeping.
293          */
294         if ((flags & LK_TIMELOCK) && timo && catch)
295                 error = sleepq_timedwait_sig(&lk->lock_object, pri);
296         else if ((flags & LK_TIMELOCK) && timo)
297                 error = sleepq_timedwait(&lk->lock_object, pri);
298         else if (catch)
299                 error = sleepq_wait_sig(&lk->lock_object, pri);
300         else
301                 sleepq_wait(&lk->lock_object, pri);
302         GIANT_RESTORE();
303         if ((flags & LK_SLEEPFAIL) && error == 0)
304                 error = ENOLCK;
305
306         return (error);
307 }
308
309 static __inline int
310 wakeupshlk(struct lock *lk, const char *file, int line)
311 {
312         uintptr_t v, x, orig_x;
313         u_int realexslp;
314         int queue, wakeup_swapper;
315
316         wakeup_swapper = 0;
317         for (;;) {
318                 x = lk->lk_lock;
319                 if (lockmgr_sunlock_try(lk, &x))
320                         break;
321
322                 /*
323                  * We should have a sharer with waiters, so enter the hard
324                  * path in order to handle wakeups correctly.
325                  */
326                 sleepq_lock(&lk->lock_object);
327                 orig_x = lk->lk_lock;
328 retry_sleepq:
329                 x = orig_x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
330                 v = LK_UNLOCKED;
331
332                 /*
333                  * If the lock has exclusive waiters, give them preference in
334                  * order to avoid deadlock with shared runners up.
335                  * If interruptible sleeps left the exclusive queue empty
336                  * avoid a starvation for the threads sleeping on the shared
337                  * queue by giving them precedence and cleaning up the
338                  * exclusive waiters bit anyway.
339                  * Please note that lk_exslpfail count may be lying about
340                  * the real number of waiters with the LK_SLEEPFAIL flag on
341                  * because they may be used in conjunction with interruptible
342                  * sleeps so lk_exslpfail might be considered an 'upper limit'
343                  * bound, including the edge cases.
344                  */
345                 realexslp = sleepq_sleepcnt(&lk->lock_object,
346                     SQ_EXCLUSIVE_QUEUE);
347                 if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
348                         if (lk->lk_exslpfail < realexslp) {
349                                 lk->lk_exslpfail = 0;
350                                 queue = SQ_EXCLUSIVE_QUEUE;
351                                 v |= (x & LK_SHARED_WAITERS);
352                         } else {
353                                 lk->lk_exslpfail = 0;
354                                 LOCK_LOG2(lk,
355                                     "%s: %p has only LK_SLEEPFAIL sleepers",
356                                     __func__, lk);
357                                 LOCK_LOG2(lk,
358                             "%s: %p waking up threads on the exclusive queue",
359                                     __func__, lk);
360                                 wakeup_swapper =
361                                     sleepq_broadcast(&lk->lock_object,
362                                     SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
363                                 queue = SQ_SHARED_QUEUE;
364                         }
365                                 
366                 } else {
367
368                         /*
369                          * Exclusive waiters sleeping with LK_SLEEPFAIL on
370                          * and using interruptible sleeps/timeout may have
371                          * left spourious lk_exslpfail counts on, so clean
372                          * it up anyway.
373                          */
374                         lk->lk_exslpfail = 0;
375                         queue = SQ_SHARED_QUEUE;
376                 }
377
378                 if (lockmgr_sunlock_try(lk, &orig_x)) {
379                         sleepq_release(&lk->lock_object);
380                         break;
381                 }
382
383                 x |= LK_SHARERS_LOCK(1);
384                 if (!atomic_fcmpset_rel_ptr(&lk->lk_lock, &x, v)) {
385                         orig_x = x;
386                         goto retry_sleepq;
387                 }
388                 LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
389                     __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
390                     "exclusive");
391                 wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
392                     0, queue);
393                 sleepq_release(&lk->lock_object);
394                 break;
395         }
396
397         lockmgr_note_shared_release(lk, file, line);
398         return (wakeup_swapper);
399 }
400
401 static void
402 assert_lockmgr(const struct lock_object *lock, int what)
403 {
404
405         panic("lockmgr locks do not support assertions");
406 }
407
408 static void
409 lock_lockmgr(struct lock_object *lock, uintptr_t how)
410 {
411
412         panic("lockmgr locks do not support sleep interlocking");
413 }
414
415 static uintptr_t
416 unlock_lockmgr(struct lock_object *lock)
417 {
418
419         panic("lockmgr locks do not support sleep interlocking");
420 }
421
422 #ifdef KDTRACE_HOOKS
423 static int
424 owner_lockmgr(const struct lock_object *lock, struct thread **owner)
425 {
426
427         panic("lockmgr locks do not support owner inquiring");
428 }
429 #endif
430
431 void
432 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
433 {
434         int iflags;
435
436         MPASS((flags & ~LK_INIT_MASK) == 0);
437         ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
438             ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
439             &lk->lk_lock));
440
441         iflags = LO_SLEEPABLE | LO_UPGRADABLE;
442         if (flags & LK_CANRECURSE)
443                 iflags |= LO_RECURSABLE;
444         if ((flags & LK_NODUP) == 0)
445                 iflags |= LO_DUPOK;
446         if (flags & LK_NOPROFILE)
447                 iflags |= LO_NOPROFILE;
448         if ((flags & LK_NOWITNESS) == 0)
449                 iflags |= LO_WITNESS;
450         if (flags & LK_QUIET)
451                 iflags |= LO_QUIET;
452         if (flags & LK_IS_VNODE)
453                 iflags |= LO_IS_VNODE;
454         if (flags & LK_NEW)
455                 iflags |= LO_NEW;
456         iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
457
458         lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
459         lk->lk_lock = LK_UNLOCKED;
460         lk->lk_recurse = 0;
461         lk->lk_exslpfail = 0;
462         lk->lk_timo = timo;
463         lk->lk_pri = pri;
464         STACK_ZERO(lk);
465 }
466
467 /*
468  * XXX: Gross hacks to manipulate external lock flags after
469  * initialization.  Used for certain vnode and buf locks.
470  */
471 void
472 lockallowshare(struct lock *lk)
473 {
474
475         lockmgr_assert(lk, KA_XLOCKED);
476         lk->lock_object.lo_flags &= ~LK_NOSHARE;
477 }
478
479 void
480 lockdisableshare(struct lock *lk)
481 {
482
483         lockmgr_assert(lk, KA_XLOCKED);
484         lk->lock_object.lo_flags |= LK_NOSHARE;
485 }
486
487 void
488 lockallowrecurse(struct lock *lk)
489 {
490
491         lockmgr_assert(lk, KA_XLOCKED);
492         lk->lock_object.lo_flags |= LO_RECURSABLE;
493 }
494
495 void
496 lockdisablerecurse(struct lock *lk)
497 {
498
499         lockmgr_assert(lk, KA_XLOCKED);
500         lk->lock_object.lo_flags &= ~LO_RECURSABLE;
501 }
502
503 void
504 lockdestroy(struct lock *lk)
505 {
506
507         KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
508         KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
509         KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
510         lock_destroy(&lk->lock_object);
511 }
512
513 static bool __always_inline
514 lockmgr_slock_try(struct lock *lk, uintptr_t *xp, int flags, bool fp)
515 {
516
517         /*
518          * If no other thread has an exclusive lock, or
519          * no exclusive waiter is present, bump the count of
520          * sharers.  Since we have to preserve the state of
521          * waiters, if we fail to acquire the shared lock
522          * loop back and retry.
523          */
524         *xp = lk->lk_lock;
525         while (LK_CAN_SHARE(*xp, flags, fp)) {
526                 if (atomic_fcmpset_acq_ptr(&lk->lk_lock, xp,
527                     *xp + LK_ONE_SHARER)) {
528                         return (true);
529                 }
530         }
531         return (false);
532 }
533
534 static bool __always_inline
535 lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp)
536 {
537
538         for (;;) {
539                 if (LK_SHARERS(*xp) > 1 || !(*xp & LK_ALL_WAITERS)) {
540                         if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
541                             *xp - LK_ONE_SHARER))
542                                 return (true);
543                         continue;
544                 }
545                 break;
546         }
547         return (false);
548 }
549
550 static __noinline int
551 lockmgr_slock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
552     const char *file, int line, struct lockmgr_wait *lwa)
553 {
554         uintptr_t tid, x;
555         int error = 0;
556         const char *iwmesg;
557         int ipri, itimo;
558
559 #ifdef KDTRACE_HOOKS
560         uint64_t sleep_time = 0;
561 #endif
562 #ifdef LOCK_PROFILING
563         uint64_t waittime = 0;
564         int contested = 0;
565 #endif
566
567         if (__predict_false(panicstr != NULL))
568                 goto out;
569
570         tid = (uintptr_t)curthread;
571
572         if (LK_CAN_WITNESS(flags))
573                 WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
574                     file, line, flags & LK_INTERLOCK ? ilk : NULL);
575         for (;;) {
576                 if (lockmgr_slock_try(lk, &x, flags, false))
577                         break;
578 #ifdef HWPMC_HOOKS
579                 PMC_SOFT_CALL( , , lock, failed);
580 #endif
581                 lock_profile_obtain_lock_failed(&lk->lock_object,
582                     &contested, &waittime);
583
584                 /*
585                  * If the lock is already held by curthread in
586                  * exclusive way avoid a deadlock.
587                  */
588                 if (LK_HOLDER(x) == tid) {
589                         LOCK_LOG2(lk,
590                             "%s: %p already held in exclusive mode",
591                             __func__, lk);
592                         error = EDEADLK;
593                         break;
594                 }
595
596                 /*
597                  * If the lock is expected to not sleep just give up
598                  * and return.
599                  */
600                 if (LK_TRYOP(flags)) {
601                         LOCK_LOG2(lk, "%s: %p fails the try operation",
602                             __func__, lk);
603                         error = EBUSY;
604                         break;
605                 }
606
607                 /*
608                  * Acquire the sleepqueue chain lock because we
609                  * probabilly will need to manipulate waiters flags.
610                  */
611                 sleepq_lock(&lk->lock_object);
612                 x = lk->lk_lock;
613 retry_sleepq:
614
615                 /*
616                  * if the lock can be acquired in shared mode, try
617                  * again.
618                  */
619                 if (LK_CAN_SHARE(x, flags, false)) {
620                         sleepq_release(&lk->lock_object);
621                         continue;
622                 }
623
624                 /*
625                  * Try to set the LK_SHARED_WAITERS flag.  If we fail,
626                  * loop back and retry.
627                  */
628                 if ((x & LK_SHARED_WAITERS) == 0) {
629                         if (!atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
630                             x | LK_SHARED_WAITERS)) {
631                                 goto retry_sleepq;
632                         }
633                         LOCK_LOG2(lk, "%s: %p set shared waiters flag",
634                             __func__, lk);
635                 }
636
637                 if (lwa == NULL) {
638                         iwmesg = lk->lock_object.lo_name;
639                         ipri = lk->lk_pri;
640                         itimo = lk->lk_timo;
641                 } else {
642                         iwmesg = lwa->iwmesg;
643                         ipri = lwa->ipri;
644                         itimo = lwa->itimo;
645                 }
646
647                 /*
648                  * As far as we have been unable to acquire the
649                  * shared lock and the shared waiters flag is set,
650                  * we will sleep.
651                  */
652 #ifdef KDTRACE_HOOKS
653                 sleep_time -= lockstat_nsecs(&lk->lock_object);
654 #endif
655                 error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
656                     SQ_SHARED_QUEUE);
657 #ifdef KDTRACE_HOOKS
658                 sleep_time += lockstat_nsecs(&lk->lock_object);
659 #endif
660                 flags &= ~LK_INTERLOCK;
661                 if (error) {
662                         LOCK_LOG3(lk,
663                             "%s: interrupted sleep for %p with %d",
664                             __func__, lk, error);
665                         break;
666                 }
667                 LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
668                     __func__, lk);
669         }
670         if (error == 0) {
671 #ifdef KDTRACE_HOOKS
672                 if (sleep_time != 0)
673                         LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
674                             LOCKSTAT_READER, (x & LK_SHARE) == 0,
675                             (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
676 #endif
677 #ifdef LOCK_PROFILING
678                 lockmgr_note_shared_acquire(lk, contested, waittime,
679                     file, line, flags);
680 #else
681                 lockmgr_note_shared_acquire(lk, 0, 0, file, line,
682                     flags);
683 #endif
684         }
685
686 out:
687         lockmgr_exit(flags, ilk, 0);
688         return (error);
689 }
690
691 static __noinline int
692 lockmgr_xlock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
693     const char *file, int line, struct lockmgr_wait *lwa)
694 {
695         struct lock_class *class;
696         uintptr_t tid, x, v;
697         int error = 0;
698         const char *iwmesg;
699         int ipri, itimo;
700
701 #ifdef KDTRACE_HOOKS
702         uint64_t sleep_time = 0;
703 #endif
704 #ifdef LOCK_PROFILING
705         uint64_t waittime = 0;
706         int contested = 0;
707 #endif
708
709         if (__predict_false(panicstr != NULL))
710                 goto out;
711
712         tid = (uintptr_t)curthread;
713
714         if (LK_CAN_WITNESS(flags))
715                 WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
716                     LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
717                     ilk : NULL);
718
719         /*
720          * If curthread already holds the lock and this one is
721          * allowed to recurse, simply recurse on it.
722          */
723         if (lockmgr_xlocked(lk)) {
724                 if ((flags & LK_CANRECURSE) == 0 &&
725                     (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
726                         /*
727                          * If the lock is expected to not panic just
728                          * give up and return.
729                          */
730                         if (LK_TRYOP(flags)) {
731                                 LOCK_LOG2(lk,
732                                     "%s: %p fails the try operation",
733                                     __func__, lk);
734                                 error = EBUSY;
735                                 goto out;
736                         }
737                         if (flags & LK_INTERLOCK) {
738                                 class = LOCK_CLASS(ilk);
739                                 class->lc_unlock(ilk);
740                         }
741                         panic("%s: recursing on non recursive lockmgr %p "
742                             "@ %s:%d\n", __func__, lk, file, line);
743                 }
744                 lk->lk_recurse++;
745                 LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
746                 LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
747                     lk->lk_recurse, file, line);
748                 WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
749                     LK_TRYWIT(flags), file, line);
750                 TD_LOCKS_INC(curthread);
751                 goto out;
752         }
753
754         for (;;) {
755                 if (lk->lk_lock == LK_UNLOCKED &&
756                     atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
757                         break;
758 #ifdef HWPMC_HOOKS
759                 PMC_SOFT_CALL( , , lock, failed);
760 #endif
761                 lock_profile_obtain_lock_failed(&lk->lock_object,
762                     &contested, &waittime);
763
764                 /*
765                  * If the lock is expected to not sleep just give up
766                  * and return.
767                  */
768                 if (LK_TRYOP(flags)) {
769                         LOCK_LOG2(lk, "%s: %p fails the try operation",
770                             __func__, lk);
771                         error = EBUSY;
772                         break;
773                 }
774
775                 /*
776                  * Acquire the sleepqueue chain lock because we
777                  * probabilly will need to manipulate waiters flags.
778                  */
779                 sleepq_lock(&lk->lock_object);
780                 x = lk->lk_lock;
781 retry_sleepq:
782
783                 /*
784                  * if the lock has been released while we spun on
785                  * the sleepqueue chain lock just try again.
786                  */
787                 if (x == LK_UNLOCKED) {
788                         sleepq_release(&lk->lock_object);
789                         continue;
790                 }
791
792                 /*
793                  * The lock can be in the state where there is a
794                  * pending queue of waiters, but still no owner.
795                  * This happens when the lock is contested and an
796                  * owner is going to claim the lock.
797                  * If curthread is the one successfully acquiring it
798                  * claim lock ownership and return, preserving waiters
799                  * flags.
800                  */
801                 v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
802                 if ((x & ~v) == LK_UNLOCKED) {
803                         v &= ~LK_EXCLUSIVE_SPINNERS;
804                         if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
805                             tid | v)) {
806                                 sleepq_release(&lk->lock_object);
807                                 LOCK_LOG2(lk,
808                                     "%s: %p claimed by a new writer",
809                                     __func__, lk);
810                                 break;
811                         }
812                         goto retry_sleepq;
813                 }
814
815                 /*
816                  * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
817                  * fail, loop back and retry.
818                  */
819                 if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
820                         if (!atomic_fcmpset_ptr(&lk->lk_lock, &x,
821                             x | LK_EXCLUSIVE_WAITERS)) {
822                                 goto retry_sleepq;
823                         }
824                         LOCK_LOG2(lk, "%s: %p set excl waiters flag",
825                             __func__, lk);
826                 }
827
828                 if (lwa == NULL) {
829                         iwmesg = lk->lock_object.lo_name;
830                         ipri = lk->lk_pri;
831                         itimo = lk->lk_timo;
832                 } else {
833                         iwmesg = lwa->iwmesg;
834                         ipri = lwa->ipri;
835                         itimo = lwa->itimo;
836                 }
837
838                 /*
839                  * As far as we have been unable to acquire the
840                  * exclusive lock and the exclusive waiters flag
841                  * is set, we will sleep.
842                  */
843 #ifdef KDTRACE_HOOKS
844                 sleep_time -= lockstat_nsecs(&lk->lock_object);
845 #endif
846                 error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
847                     SQ_EXCLUSIVE_QUEUE);
848 #ifdef KDTRACE_HOOKS
849                 sleep_time += lockstat_nsecs(&lk->lock_object);
850 #endif
851                 flags &= ~LK_INTERLOCK;
852                 if (error) {
853                         LOCK_LOG3(lk,
854                             "%s: interrupted sleep for %p with %d",
855                             __func__, lk, error);
856                         break;
857                 }
858                 LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
859                     __func__, lk);
860         }
861         if (error == 0) {
862 #ifdef KDTRACE_HOOKS
863                 if (sleep_time != 0)
864                         LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
865                             LOCKSTAT_WRITER, (x & LK_SHARE) == 0,
866                             (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
867 #endif
868 #ifdef LOCK_PROFILING
869                 lockmgr_note_exclusive_acquire(lk, contested, waittime,
870                     file, line, flags);
871 #else
872                 lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
873                     flags);
874 #endif
875         }
876
877 out:
878         lockmgr_exit(flags, ilk, 0);
879         return (error);
880 }
881
882 static __noinline int
883 lockmgr_upgrade(struct lock *lk, u_int flags, struct lock_object *ilk,
884     const char *file, int line, struct lockmgr_wait *lwa)
885 {
886         uintptr_t tid, x, v;
887         int error = 0;
888         int wakeup_swapper = 0;
889         int op;
890
891         if (__predict_false(panicstr != NULL))
892                 goto out;
893
894         tid = (uintptr_t)curthread;
895
896         _lockmgr_assert(lk, KA_SLOCKED, file, line);
897         v = lk->lk_lock;
898         x = v & LK_ALL_WAITERS;
899         v &= LK_EXCLUSIVE_SPINNERS;
900
901         /*
902          * Try to switch from one shared lock to an exclusive one.
903          * We need to preserve waiters flags during the operation.
904          */
905         if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
906             tid | x)) {
907                 LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
908                     line);
909                 WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
910                     LK_TRYWIT(flags), file, line);
911                 LOCKSTAT_RECORD0(lockmgr__upgrade, lk);
912                 TD_SLOCKS_DEC(curthread);
913                 goto out;
914         }
915
916         op = flags & LK_TYPE_MASK;
917
918         /*
919          * In LK_TRYUPGRADE mode, do not drop the lock,
920          * returning EBUSY instead.
921          */
922         if (op == LK_TRYUPGRADE) {
923                 LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
924                     __func__, lk);
925                 error = EBUSY;
926                 goto out;
927         }
928
929         /*
930          * We have been unable to succeed in upgrading, so just
931          * give up the shared lock.
932          */
933         wakeup_swapper |= wakeupshlk(lk, file, line);
934         error = lockmgr_xlock_hard(lk, flags, ilk, file, line, lwa);
935         flags &= ~LK_INTERLOCK;
936 out:
937         lockmgr_exit(flags, ilk, wakeup_swapper);
938         return (error);
939 }
940
941 int
942 lockmgr_lock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk,
943     const char *file, int line)
944 {
945         struct lock_class *class;
946         uintptr_t x, tid;
947         u_int op;
948         bool locked;
949
950         if (__predict_false(panicstr != NULL))
951                 return (0);
952
953         op = flags & LK_TYPE_MASK;
954         locked = false;
955         switch (op) {
956         case LK_SHARED:
957                 if (LK_CAN_WITNESS(flags))
958                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
959                             file, line, flags & LK_INTERLOCK ? ilk : NULL);
960                 if (__predict_false(lk->lock_object.lo_flags & LK_NOSHARE))
961                         break;
962                 if (lockmgr_slock_try(lk, &x, flags, true)) {
963                         lockmgr_note_shared_acquire(lk, 0, 0,
964                             file, line, flags);
965                         locked = true;
966                 } else {
967                         return (lockmgr_slock_hard(lk, flags, ilk, file, line,
968                             NULL));
969                 }
970                 break;
971         case LK_EXCLUSIVE:
972                 if (LK_CAN_WITNESS(flags))
973                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
974                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
975                             ilk : NULL);
976                 tid = (uintptr_t)curthread;
977                 if (lk->lk_lock == LK_UNLOCKED &&
978                     atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
979                         lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
980                             flags);
981                         locked = true;
982                 } else {
983                         return (lockmgr_xlock_hard(lk, flags, ilk, file, line,
984                             NULL));
985                 }
986                 break;
987         case LK_UPGRADE:
988         case LK_TRYUPGRADE:
989                 return (lockmgr_upgrade(lk, flags, ilk, file, line, NULL));
990         default:
991                 break;
992         }
993         if (__predict_true(locked)) {
994                 if (__predict_false(flags & LK_INTERLOCK)) {
995                         class = LOCK_CLASS(ilk);
996                         class->lc_unlock(ilk);
997                 }
998                 return (0);
999         } else {
1000                 return (__lockmgr_args(lk, flags, ilk, LK_WMESG_DEFAULT,
1001                     LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, file, line));
1002         }
1003 }
1004
1005 static __noinline int
1006 lockmgr_sunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
1007     const char *file, int line)
1008
1009 {
1010         int wakeup_swapper = 0;
1011
1012         if (__predict_false(panicstr != NULL))
1013                 goto out;
1014
1015         wakeup_swapper = wakeupshlk(lk, file, line);
1016
1017 out:
1018         lockmgr_exit(flags, ilk, wakeup_swapper);
1019         return (0);
1020 }
1021
1022 static __noinline int
1023 lockmgr_xunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
1024     const char *file, int line)
1025 {
1026         uintptr_t tid, v;
1027         int wakeup_swapper = 0;
1028         u_int realexslp;
1029         int queue;
1030
1031         if (__predict_false(panicstr != NULL))
1032                 goto out;
1033
1034         tid = (uintptr_t)curthread;
1035
1036         /*
1037          * As first option, treact the lock as if it has not
1038          * any waiter.
1039          * Fix-up the tid var if the lock has been disowned.
1040          */
1041         if (LK_HOLDER(x) == LK_KERNPROC)
1042                 tid = LK_KERNPROC;
1043         else {
1044                 WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1045                 TD_LOCKS_DEC(curthread);
1046         }
1047         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
1048
1049         /*
1050          * The lock is held in exclusive mode.
1051          * If the lock is recursed also, then unrecurse it.
1052          */
1053         if (lockmgr_xlocked_v(x) && lockmgr_recursed(lk)) {
1054                 LOCK_LOG2(lk, "%s: %p unrecursing", __func__, lk);
1055                 lk->lk_recurse--;
1056                 goto out;
1057         }
1058         if (tid != LK_KERNPROC)
1059                 LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk,
1060                     LOCKSTAT_WRITER);
1061
1062         if (x == tid && atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED))
1063                 goto out;
1064
1065         sleepq_lock(&lk->lock_object);
1066         x = lk->lk_lock;
1067         v = LK_UNLOCKED;
1068
1069         /*
1070          * If the lock has exclusive waiters, give them
1071          * preference in order to avoid deadlock with
1072          * shared runners up.
1073          * If interruptible sleeps left the exclusive queue
1074          * empty avoid a starvation for the threads sleeping
1075          * on the shared queue by giving them precedence
1076          * and cleaning up the exclusive waiters bit anyway.
1077          * Please note that lk_exslpfail count may be lying
1078          * about the real number of waiters with the
1079          * LK_SLEEPFAIL flag on because they may be used in
1080          * conjunction with interruptible sleeps so
1081          * lk_exslpfail might be considered an 'upper limit'
1082          * bound, including the edge cases.
1083          */
1084         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1085         realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE);
1086         if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1087                 if (lk->lk_exslpfail < realexslp) {
1088                         lk->lk_exslpfail = 0;
1089                         queue = SQ_EXCLUSIVE_QUEUE;
1090                         v |= (x & LK_SHARED_WAITERS);
1091                 } else {
1092                         lk->lk_exslpfail = 0;
1093                         LOCK_LOG2(lk,
1094                             "%s: %p has only LK_SLEEPFAIL sleepers",
1095                             __func__, lk);
1096                         LOCK_LOG2(lk,
1097                             "%s: %p waking up threads on the exclusive queue",
1098                             __func__, lk);
1099                         wakeup_swapper = sleepq_broadcast(&lk->lock_object,
1100                             SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1101                         queue = SQ_SHARED_QUEUE;
1102                 }
1103         } else {
1104
1105                 /*
1106                  * Exclusive waiters sleeping with LK_SLEEPFAIL
1107                  * on and using interruptible sleeps/timeout
1108                  * may have left spourious lk_exslpfail counts
1109                  * on, so clean it up anyway.
1110                  */
1111                 lk->lk_exslpfail = 0;
1112                 queue = SQ_SHARED_QUEUE;
1113         }
1114
1115         LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
1116             __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1117             "exclusive");
1118         atomic_store_rel_ptr(&lk->lk_lock, v);
1119         wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
1120         sleepq_release(&lk->lock_object);
1121
1122 out:
1123         lockmgr_exit(flags, ilk, wakeup_swapper);
1124         return (0);
1125 }
1126
1127 int
1128 lockmgr_unlock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk)
1129 {
1130         struct lock_class *class;
1131         uintptr_t x, tid;
1132         const char *file;
1133         int line;
1134
1135         if (__predict_false(panicstr != NULL))
1136                 return (0);
1137
1138         file = __FILE__;
1139         line = __LINE__;
1140
1141         _lockmgr_assert(lk, KA_LOCKED, file, line);
1142         x = lk->lk_lock;
1143         if (__predict_true(x & LK_SHARE) != 0) {
1144                 if (lockmgr_sunlock_try(lk, &x)) {
1145                         lockmgr_note_shared_release(lk, file, line);
1146                 } else {
1147                         return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1148                 }
1149         } else {
1150                 tid = (uintptr_t)curthread;
1151                 if (!lockmgr_recursed(lk) &&
1152                     atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) {
1153                         lockmgr_note_exclusive_release(lk, file, line);
1154                 } else {
1155                         return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1156                 }
1157         }
1158         if (__predict_false(flags & LK_INTERLOCK)) {
1159                 class = LOCK_CLASS(ilk);
1160                 class->lc_unlock(ilk);
1161         }
1162         return (0);
1163 }
1164
1165 int
1166 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
1167     const char *wmesg, int pri, int timo, const char *file, int line)
1168 {
1169         GIANT_DECLARE;
1170         struct lockmgr_wait lwa;
1171         struct lock_class *class;
1172         const char *iwmesg;
1173         uintptr_t tid, v, x;
1174         u_int op, realexslp;
1175         int error, ipri, itimo, queue, wakeup_swapper;
1176 #ifdef LOCK_PROFILING
1177         uint64_t waittime = 0;
1178         int contested = 0;
1179 #endif
1180
1181         if (panicstr != NULL)
1182                 return (0);
1183
1184         error = 0;
1185         tid = (uintptr_t)curthread;
1186         op = (flags & LK_TYPE_MASK);
1187         iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
1188         ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
1189         itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
1190
1191         lwa.iwmesg = iwmesg;
1192         lwa.ipri = ipri;
1193         lwa.itimo = itimo;
1194
1195         MPASS((flags & ~LK_TOTAL_MASK) == 0);
1196         KASSERT((op & (op - 1)) == 0,
1197             ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
1198         KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
1199             (op != LK_DOWNGRADE && op != LK_RELEASE),
1200             ("%s: Invalid flags in regard of the operation desired @ %s:%d",
1201             __func__, file, line));
1202         KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
1203             ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
1204             __func__, file, line));
1205         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
1206             ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
1207             lk->lock_object.lo_name, file, line));
1208
1209         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
1210
1211         if (lk->lock_object.lo_flags & LK_NOSHARE) {
1212                 switch (op) {
1213                 case LK_SHARED:
1214                         op = LK_EXCLUSIVE;
1215                         break;
1216                 case LK_UPGRADE:
1217                 case LK_TRYUPGRADE:
1218                 case LK_DOWNGRADE:
1219                         _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
1220                             file, line);
1221                         if (flags & LK_INTERLOCK)
1222                                 class->lc_unlock(ilk);
1223                         return (0);
1224                 }
1225         }
1226
1227         wakeup_swapper = 0;
1228         switch (op) {
1229         case LK_SHARED:
1230                 return (lockmgr_slock_hard(lk, flags, ilk, file, line, &lwa));
1231                 break;
1232         case LK_UPGRADE:
1233         case LK_TRYUPGRADE:
1234                 return (lockmgr_upgrade(lk, flags, ilk, file, line, &lwa));
1235                 break;
1236         case LK_EXCLUSIVE:
1237                 return (lockmgr_xlock_hard(lk, flags, ilk, file, line, &lwa));
1238                 break;
1239         case LK_DOWNGRADE:
1240                 _lockmgr_assert(lk, KA_XLOCKED, file, line);
1241                 WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
1242
1243                 /*
1244                  * Panic if the lock is recursed.
1245                  */
1246                 if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1247                         if (flags & LK_INTERLOCK)
1248                                 class->lc_unlock(ilk);
1249                         panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
1250                             __func__, iwmesg, file, line);
1251                 }
1252                 TD_SLOCKS_INC(curthread);
1253
1254                 /*
1255                  * In order to preserve waiters flags, just spin.
1256                  */
1257                 for (;;) {
1258                         x = lk->lk_lock;
1259                         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1260                         x &= LK_ALL_WAITERS;
1261                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1262                             LK_SHARERS_LOCK(1) | x))
1263                                 break;
1264                         cpu_spinwait();
1265                 }
1266                 LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
1267                 LOCKSTAT_RECORD0(lockmgr__downgrade, lk);
1268                 break;
1269         case LK_RELEASE:
1270                 _lockmgr_assert(lk, KA_LOCKED, file, line);
1271                 x = lk->lk_lock;
1272
1273                 if (__predict_true(x & LK_SHARE) != 0) {
1274                         return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1275                 } else {
1276                         return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1277                 }
1278                 break;
1279         case LK_DRAIN:
1280                 if (LK_CAN_WITNESS(flags))
1281                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1282                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1283                             ilk : NULL);
1284
1285                 /*
1286                  * Trying to drain a lock we already own will result in a
1287                  * deadlock.
1288                  */
1289                 if (lockmgr_xlocked(lk)) {
1290                         if (flags & LK_INTERLOCK)
1291                                 class->lc_unlock(ilk);
1292                         panic("%s: draining %s with the lock held @ %s:%d\n",
1293                             __func__, iwmesg, file, line);
1294                 }
1295
1296                 for (;;) {
1297                         if (lk->lk_lock == LK_UNLOCKED &&
1298                             atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
1299                                 break;
1300
1301 #ifdef HWPMC_HOOKS
1302                         PMC_SOFT_CALL( , , lock, failed);
1303 #endif
1304                         lock_profile_obtain_lock_failed(&lk->lock_object,
1305                             &contested, &waittime);
1306
1307                         /*
1308                          * If the lock is expected to not sleep just give up
1309                          * and return.
1310                          */
1311                         if (LK_TRYOP(flags)) {
1312                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
1313                                     __func__, lk);
1314                                 error = EBUSY;
1315                                 break;
1316                         }
1317
1318                         /*
1319                          * Acquire the sleepqueue chain lock because we
1320                          * probabilly will need to manipulate waiters flags.
1321                          */
1322                         sleepq_lock(&lk->lock_object);
1323                         x = lk->lk_lock;
1324
1325                         /*
1326                          * if the lock has been released while we spun on
1327                          * the sleepqueue chain lock just try again.
1328                          */
1329                         if (x == LK_UNLOCKED) {
1330                                 sleepq_release(&lk->lock_object);
1331                                 continue;
1332                         }
1333
1334                         v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1335                         if ((x & ~v) == LK_UNLOCKED) {
1336                                 v = (x & ~LK_EXCLUSIVE_SPINNERS);
1337
1338                                 /*
1339                                  * If interruptible sleeps left the exclusive
1340                                  * queue empty avoid a starvation for the
1341                                  * threads sleeping on the shared queue by
1342                                  * giving them precedence and cleaning up the
1343                                  * exclusive waiters bit anyway.
1344                                  * Please note that lk_exslpfail count may be
1345                                  * lying about the real number of waiters with
1346                                  * the LK_SLEEPFAIL flag on because they may
1347                                  * be used in conjunction with interruptible
1348                                  * sleeps so lk_exslpfail might be considered
1349                                  * an 'upper limit' bound, including the edge
1350                                  * cases.
1351                                  */
1352                                 if (v & LK_EXCLUSIVE_WAITERS) {
1353                                         queue = SQ_EXCLUSIVE_QUEUE;
1354                                         v &= ~LK_EXCLUSIVE_WAITERS;
1355                                 } else {
1356
1357                                         /*
1358                                          * Exclusive waiters sleeping with
1359                                          * LK_SLEEPFAIL on and using
1360                                          * interruptible sleeps/timeout may
1361                                          * have left spourious lk_exslpfail
1362                                          * counts on, so clean it up anyway.
1363                                          */
1364                                         MPASS(v & LK_SHARED_WAITERS);
1365                                         lk->lk_exslpfail = 0;
1366                                         queue = SQ_SHARED_QUEUE;
1367                                         v &= ~LK_SHARED_WAITERS;
1368                                 }
1369                                 if (queue == SQ_EXCLUSIVE_QUEUE) {
1370                                         realexslp =
1371                                             sleepq_sleepcnt(&lk->lock_object,
1372                                             SQ_EXCLUSIVE_QUEUE);
1373                                         if (lk->lk_exslpfail >= realexslp) {
1374                                                 lk->lk_exslpfail = 0;
1375                                                 queue = SQ_SHARED_QUEUE;
1376                                                 v &= ~LK_SHARED_WAITERS;
1377                                                 if (realexslp != 0) {
1378                                                         LOCK_LOG2(lk,
1379                                         "%s: %p has only LK_SLEEPFAIL sleepers",
1380                                                             __func__, lk);
1381                                                         LOCK_LOG2(lk,
1382                         "%s: %p waking up threads on the exclusive queue",
1383                                                             __func__, lk);
1384                                                         wakeup_swapper =
1385                                                             sleepq_broadcast(
1386                                                             &lk->lock_object,
1387                                                             SLEEPQ_LK, 0,
1388                                                             SQ_EXCLUSIVE_QUEUE);
1389                                                 }
1390                                         } else
1391                                                 lk->lk_exslpfail = 0;
1392                                 }
1393                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1394                                         sleepq_release(&lk->lock_object);
1395                                         continue;
1396                                 }
1397                                 LOCK_LOG3(lk,
1398                                 "%s: %p waking up all threads on the %s queue",
1399                                     __func__, lk, queue == SQ_SHARED_QUEUE ?
1400                                     "shared" : "exclusive");
1401                                 wakeup_swapper |= sleepq_broadcast(
1402                                     &lk->lock_object, SLEEPQ_LK, 0, queue);
1403
1404                                 /*
1405                                  * If shared waiters have been woken up we need
1406                                  * to wait for one of them to acquire the lock
1407                                  * before to set the exclusive waiters in
1408                                  * order to avoid a deadlock.
1409                                  */
1410                                 if (queue == SQ_SHARED_QUEUE) {
1411                                         for (v = lk->lk_lock;
1412                                             (v & LK_SHARE) && !LK_SHARERS(v);
1413                                             v = lk->lk_lock)
1414                                                 cpu_spinwait();
1415                                 }
1416                         }
1417
1418                         /*
1419                          * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1420                          * fail, loop back and retry.
1421                          */
1422                         if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1423                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1424                                     x | LK_EXCLUSIVE_WAITERS)) {
1425                                         sleepq_release(&lk->lock_object);
1426                                         continue;
1427                                 }
1428                                 LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1429                                     __func__, lk);
1430                         }
1431
1432                         /*
1433                          * As far as we have been unable to acquire the
1434                          * exclusive lock and the exclusive waiters flag
1435                          * is set, we will sleep.
1436                          */
1437                         if (flags & LK_INTERLOCK) {
1438                                 class->lc_unlock(ilk);
1439                                 flags &= ~LK_INTERLOCK;
1440                         }
1441                         GIANT_SAVE();
1442                         sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1443                             SQ_EXCLUSIVE_QUEUE);
1444                         sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1445                         GIANT_RESTORE();
1446                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1447                             __func__, lk);
1448                 }
1449
1450                 if (error == 0) {
1451                         lock_profile_obtain_lock_success(&lk->lock_object,
1452                             contested, waittime, file, line);
1453                         LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1454                             lk->lk_recurse, file, line);
1455                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1456                             LK_TRYWIT(flags), file, line);
1457                         TD_LOCKS_INC(curthread);
1458                         STACK_SAVE(lk);
1459                 }
1460                 break;
1461         default:
1462                 if (flags & LK_INTERLOCK)
1463                         class->lc_unlock(ilk);
1464                 panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1465         }
1466
1467         if (flags & LK_INTERLOCK)
1468                 class->lc_unlock(ilk);
1469         if (wakeup_swapper)
1470                 kick_proc0();
1471
1472         return (error);
1473 }
1474
1475 void
1476 _lockmgr_disown(struct lock *lk, const char *file, int line)
1477 {
1478         uintptr_t tid, x;
1479
1480         if (SCHEDULER_STOPPED())
1481                 return;
1482
1483         tid = (uintptr_t)curthread;
1484         _lockmgr_assert(lk, KA_XLOCKED, file, line);
1485
1486         /*
1487          * Panic if the lock is recursed.
1488          */
1489         if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1490                 panic("%s: disown a recursed lockmgr @ %s:%d\n",
1491                     __func__,  file, line);
1492
1493         /*
1494          * If the owner is already LK_KERNPROC just skip the whole operation.
1495          */
1496         if (LK_HOLDER(lk->lk_lock) != tid)
1497                 return;
1498         lock_profile_release_lock(&lk->lock_object);
1499         LOCKSTAT_RECORD1(lockmgr__disown, lk, LOCKSTAT_WRITER);
1500         LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1501         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1502         TD_LOCKS_DEC(curthread);
1503         STACK_SAVE(lk);
1504
1505         /*
1506          * In order to preserve waiters flags, just spin.
1507          */
1508         for (;;) {
1509                 x = lk->lk_lock;
1510                 MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1511                 x &= LK_ALL_WAITERS;
1512                 if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1513                     LK_KERNPROC | x))
1514                         return;
1515                 cpu_spinwait();
1516         }
1517 }
1518
1519 void
1520 lockmgr_printinfo(const struct lock *lk)
1521 {
1522         struct thread *td;
1523         uintptr_t x;
1524
1525         if (lk->lk_lock == LK_UNLOCKED)
1526                 printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1527         else if (lk->lk_lock & LK_SHARE)
1528                 printf("lock type %s: SHARED (count %ju)\n",
1529                     lk->lock_object.lo_name,
1530                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1531         else {
1532                 td = lockmgr_xholder(lk);
1533                 if (td == (struct thread *)LK_KERNPROC)
1534                         printf("lock type %s: EXCL by KERNPROC\n",
1535                             lk->lock_object.lo_name);
1536                 else
1537                         printf("lock type %s: EXCL by thread %p "
1538                             "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1539                             td, td->td_proc->p_pid, td->td_proc->p_comm,
1540                             td->td_tid);
1541         }
1542
1543         x = lk->lk_lock;
1544         if (x & LK_EXCLUSIVE_WAITERS)
1545                 printf(" with exclusive waiters pending\n");
1546         if (x & LK_SHARED_WAITERS)
1547                 printf(" with shared waiters pending\n");
1548         if (x & LK_EXCLUSIVE_SPINNERS)
1549                 printf(" with exclusive spinners pending\n");
1550
1551         STACK_PRINT(lk);
1552 }
1553
1554 int
1555 lockstatus(const struct lock *lk)
1556 {
1557         uintptr_t v, x;
1558         int ret;
1559
1560         ret = LK_SHARED;
1561         x = lk->lk_lock;
1562         v = LK_HOLDER(x);
1563
1564         if ((x & LK_SHARE) == 0) {
1565                 if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1566                         ret = LK_EXCLUSIVE;
1567                 else
1568                         ret = LK_EXCLOTHER;
1569         } else if (x == LK_UNLOCKED)
1570                 ret = 0;
1571
1572         return (ret);
1573 }
1574
1575 #ifdef INVARIANT_SUPPORT
1576
1577 FEATURE(invariant_support,
1578     "Support for modules compiled with INVARIANTS option");
1579
1580 #ifndef INVARIANTS
1581 #undef  _lockmgr_assert
1582 #endif
1583
1584 void
1585 _lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1586 {
1587         int slocked = 0;
1588
1589         if (panicstr != NULL)
1590                 return;
1591         switch (what) {
1592         case KA_SLOCKED:
1593         case KA_SLOCKED | KA_NOTRECURSED:
1594         case KA_SLOCKED | KA_RECURSED:
1595                 slocked = 1;
1596         case KA_LOCKED:
1597         case KA_LOCKED | KA_NOTRECURSED:
1598         case KA_LOCKED | KA_RECURSED:
1599 #ifdef WITNESS
1600
1601                 /*
1602                  * We cannot trust WITNESS if the lock is held in exclusive
1603                  * mode and a call to lockmgr_disown() happened.
1604                  * Workaround this skipping the check if the lock is held in
1605                  * exclusive mode even for the KA_LOCKED case.
1606                  */
1607                 if (slocked || (lk->lk_lock & LK_SHARE)) {
1608                         witness_assert(&lk->lock_object, what, file, line);
1609                         break;
1610                 }
1611 #endif
1612                 if (lk->lk_lock == LK_UNLOCKED ||
1613                     ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1614                     (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1615                         panic("Lock %s not %slocked @ %s:%d\n",
1616                             lk->lock_object.lo_name, slocked ? "share" : "",
1617                             file, line);
1618
1619                 if ((lk->lk_lock & LK_SHARE) == 0) {
1620                         if (lockmgr_recursed(lk)) {
1621                                 if (what & KA_NOTRECURSED)
1622                                         panic("Lock %s recursed @ %s:%d\n",
1623                                             lk->lock_object.lo_name, file,
1624                                             line);
1625                         } else if (what & KA_RECURSED)
1626                                 panic("Lock %s not recursed @ %s:%d\n",
1627                                     lk->lock_object.lo_name, file, line);
1628                 }
1629                 break;
1630         case KA_XLOCKED:
1631         case KA_XLOCKED | KA_NOTRECURSED:
1632         case KA_XLOCKED | KA_RECURSED:
1633                 if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1634                         panic("Lock %s not exclusively locked @ %s:%d\n",
1635                             lk->lock_object.lo_name, file, line);
1636                 if (lockmgr_recursed(lk)) {
1637                         if (what & KA_NOTRECURSED)
1638                                 panic("Lock %s recursed @ %s:%d\n",
1639                                     lk->lock_object.lo_name, file, line);
1640                 } else if (what & KA_RECURSED)
1641                         panic("Lock %s not recursed @ %s:%d\n",
1642                             lk->lock_object.lo_name, file, line);
1643                 break;
1644         case KA_UNLOCKED:
1645                 if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1646                         panic("Lock %s exclusively locked @ %s:%d\n",
1647                             lk->lock_object.lo_name, file, line);
1648                 break;
1649         default:
1650                 panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1651                     line);
1652         }
1653 }
1654 #endif
1655
1656 #ifdef DDB
1657 int
1658 lockmgr_chain(struct thread *td, struct thread **ownerp)
1659 {
1660         struct lock *lk;
1661
1662         lk = td->td_wchan;
1663
1664         if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1665                 return (0);
1666         db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1667         if (lk->lk_lock & LK_SHARE)
1668                 db_printf("SHARED (count %ju)\n",
1669                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1670         else
1671                 db_printf("EXCL\n");
1672         *ownerp = lockmgr_xholder(lk);
1673
1674         return (1);
1675 }
1676
1677 static void
1678 db_show_lockmgr(const struct lock_object *lock)
1679 {
1680         struct thread *td;
1681         const struct lock *lk;
1682
1683         lk = (const struct lock *)lock;
1684
1685         db_printf(" state: ");
1686         if (lk->lk_lock == LK_UNLOCKED)
1687                 db_printf("UNLOCKED\n");
1688         else if (lk->lk_lock & LK_SHARE)
1689                 db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1690         else {
1691                 td = lockmgr_xholder(lk);
1692                 if (td == (struct thread *)LK_KERNPROC)
1693                         db_printf("XLOCK: LK_KERNPROC\n");
1694                 else
1695                         db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1696                             td->td_tid, td->td_proc->p_pid,
1697                             td->td_proc->p_comm);
1698                 if (lockmgr_recursed(lk))
1699                         db_printf(" recursed: %d\n", lk->lk_recurse);
1700         }
1701         db_printf(" waiters: ");
1702         switch (lk->lk_lock & LK_ALL_WAITERS) {
1703         case LK_SHARED_WAITERS:
1704                 db_printf("shared\n");
1705                 break;
1706         case LK_EXCLUSIVE_WAITERS:
1707                 db_printf("exclusive\n");
1708                 break;
1709         case LK_ALL_WAITERS:
1710                 db_printf("shared and exclusive\n");
1711                 break;
1712         default:
1713                 db_printf("none\n");
1714         }
1715         db_printf(" spinners: ");
1716         if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1717                 db_printf("exclusive\n");
1718         else
1719                 db_printf("none\n");
1720 }
1721 #endif