]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_lock.c
MFV 331702:
[FreeBSD/FreeBSD.git] / sys / kern / kern_lock.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28  * DAMAGE.
29  */
30
31 #include "opt_ddb.h"
32 #include "opt_hwpmc_hooks.h"
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include <sys/param.h>
38 #include <sys/kdb.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/lock_profile.h>
42 #include <sys/lockmgr.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sleepqueue.h>
46 #ifdef DEBUG_LOCKS
47 #include <sys/stack.h>
48 #endif
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51
52 #include <machine/cpu.h>
53
54 #ifdef DDB
55 #include <ddb/ddb.h>
56 #endif
57
58 #ifdef HWPMC_HOOKS
59 #include <sys/pmckern.h>
60 PMC_SOFT_DECLARE( , , lock, failed);
61 #endif
62
63 CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
64     (LK_ADAPTIVE | LK_NOSHARE));
65 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
66     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
67
68 #define SQ_EXCLUSIVE_QUEUE      0
69 #define SQ_SHARED_QUEUE         1
70
71 #ifndef INVARIANTS
72 #define _lockmgr_assert(lk, what, file, line)
73 #endif
74
75 #define TD_SLOCKS_INC(td)       ((td)->td_lk_slocks++)
76 #define TD_SLOCKS_DEC(td)       ((td)->td_lk_slocks--)
77
78 #ifndef DEBUG_LOCKS
79 #define STACK_PRINT(lk)
80 #define STACK_SAVE(lk)
81 #define STACK_ZERO(lk)
82 #else
83 #define STACK_PRINT(lk) stack_print_ddb(&(lk)->lk_stack)
84 #define STACK_SAVE(lk)  stack_save(&(lk)->lk_stack)
85 #define STACK_ZERO(lk)  stack_zero(&(lk)->lk_stack)
86 #endif
87
88 #define LOCK_LOG2(lk, string, arg1, arg2)                               \
89         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
90                 CTR2(KTR_LOCK, (string), (arg1), (arg2))
91 #define LOCK_LOG3(lk, string, arg1, arg2, arg3)                         \
92         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
93                 CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
94
95 #define GIANT_DECLARE                                                   \
96         int _i = 0;                                                     \
97         WITNESS_SAVE_DECL(Giant)
98 #define GIANT_RESTORE() do {                                            \
99         if (_i > 0) {                                                   \
100                 while (_i--)                                            \
101                         mtx_lock(&Giant);                               \
102                 WITNESS_RESTORE(&Giant.lock_object, Giant);             \
103         }                                                               \
104 } while (0)
105 #define GIANT_SAVE() do {                                               \
106         if (mtx_owned(&Giant)) {                                        \
107                 WITNESS_SAVE(&Giant.lock_object, Giant);                \
108                 while (mtx_owned(&Giant)) {                             \
109                         _i++;                                           \
110                         mtx_unlock(&Giant);                             \
111                 }                                                       \
112         }                                                               \
113 } while (0)
114
115 #define LK_CAN_SHARE(x, flags)                                          \
116         (((x) & LK_SHARE) &&                                            \
117         (((x) & (LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) == 0 || \
118         (curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||  \
119         (curthread->td_pflags & TDP_DEADLKTREAT)))
120 #define LK_TRYOP(x)                                                     \
121         ((x) & LK_NOWAIT)
122
123 #define LK_CAN_WITNESS(x)                                               \
124         (((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
125 #define LK_TRYWIT(x)                                                    \
126         (LK_TRYOP(x) ? LOP_TRYLOCK : 0)
127
128 #define LK_CAN_ADAPT(lk, f)                                             \
129         (((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&             \
130         ((f) & LK_SLEEPFAIL) == 0)
131
132 #define lockmgr_disowned(lk)                                            \
133         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
134
135 #define lockmgr_xlocked(lk)                                             \
136         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
137
138 static void     assert_lockmgr(const struct lock_object *lock, int how);
139 #ifdef DDB
140 static void     db_show_lockmgr(const struct lock_object *lock);
141 #endif
142 static void     lock_lockmgr(struct lock_object *lock, uintptr_t how);
143 #ifdef KDTRACE_HOOKS
144 static int      owner_lockmgr(const struct lock_object *lock,
145                     struct thread **owner);
146 #endif
147 static uintptr_t unlock_lockmgr(struct lock_object *lock);
148
149 struct lock_class lock_class_lockmgr = {
150         .lc_name = "lockmgr",
151         .lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
152         .lc_assert = assert_lockmgr,
153 #ifdef DDB
154         .lc_ddb_show = db_show_lockmgr,
155 #endif
156         .lc_lock = lock_lockmgr,
157         .lc_unlock = unlock_lockmgr,
158 #ifdef KDTRACE_HOOKS
159         .lc_owner = owner_lockmgr,
160 #endif
161 };
162
163 struct lockmgr_wait {
164         const char *iwmesg;
165         int ipri;
166         int itimo;
167 };
168
169 static bool __always_inline lockmgr_slock_try(struct lock *lk, uintptr_t *xp,
170     int flags);
171 static bool __always_inline lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp);
172
173 static void
174 lockmgr_exit(u_int flags, struct lock_object *ilk, int wakeup_swapper)
175 {
176         struct lock_class *class;
177
178         if (flags & LK_INTERLOCK) {
179                 class = LOCK_CLASS(ilk);
180                 class->lc_unlock(ilk);
181         }
182
183         if (__predict_false(wakeup_swapper))
184                 kick_proc0();
185 }
186
187 static void
188 lockmgr_note_shared_acquire(struct lock *lk, int contested,
189     uint64_t waittime, const char *file, int line, int flags)
190 {
191
192         lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime,
193             file, line);
194         LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line);
195         WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line);
196         TD_LOCKS_INC(curthread);
197         TD_SLOCKS_INC(curthread);
198         STACK_SAVE(lk);
199 }
200
201 static void
202 lockmgr_note_shared_release(struct lock *lk, const char *file, int line)
203 {
204
205         lock_profile_release_lock(&lk->lock_object);
206         WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
207         LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
208         TD_LOCKS_DEC(curthread);
209         TD_SLOCKS_DEC(curthread);
210 }
211
212 static void
213 lockmgr_note_exclusive_acquire(struct lock *lk, int contested,
214     uint64_t waittime, const char *file, int line, int flags)
215 {
216
217         lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime,
218             file, line);
219         LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
220         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file,
221             line);
222         TD_LOCKS_INC(curthread);
223         STACK_SAVE(lk);
224 }
225
226 static void
227 lockmgr_note_exclusive_release(struct lock *lk, const char *file, int line)
228 {
229
230         lock_profile_release_lock(&lk->lock_object);
231         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file,
232             line);
233         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
234         TD_LOCKS_DEC(curthread);
235 }
236
237 static __inline struct thread *
238 lockmgr_xholder(const struct lock *lk)
239 {
240         uintptr_t x;
241
242         x = lk->lk_lock;
243         return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
244 }
245
246 /*
247  * It assumes sleepq_lock held and returns with this one unheld.
248  * It also assumes the generic interlock is sane and previously checked.
249  * If LK_INTERLOCK is specified the interlock is not reacquired after the
250  * sleep.
251  */
252 static __inline int
253 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
254     const char *wmesg, int pri, int timo, int queue)
255 {
256         GIANT_DECLARE;
257         struct lock_class *class;
258         int catch, error;
259
260         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
261         catch = pri & PCATCH;
262         pri &= PRIMASK;
263         error = 0;
264
265         LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
266             (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
267
268         if (flags & LK_INTERLOCK)
269                 class->lc_unlock(ilk);
270         if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
271                 lk->lk_exslpfail++;
272         GIANT_SAVE();
273         sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
274             SLEEPQ_INTERRUPTIBLE : 0), queue);
275         if ((flags & LK_TIMELOCK) && timo)
276                 sleepq_set_timeout(&lk->lock_object, timo);
277
278         /*
279          * Decisional switch for real sleeping.
280          */
281         if ((flags & LK_TIMELOCK) && timo && catch)
282                 error = sleepq_timedwait_sig(&lk->lock_object, pri);
283         else if ((flags & LK_TIMELOCK) && timo)
284                 error = sleepq_timedwait(&lk->lock_object, pri);
285         else if (catch)
286                 error = sleepq_wait_sig(&lk->lock_object, pri);
287         else
288                 sleepq_wait(&lk->lock_object, pri);
289         GIANT_RESTORE();
290         if ((flags & LK_SLEEPFAIL) && error == 0)
291                 error = ENOLCK;
292
293         return (error);
294 }
295
296 static __inline int
297 wakeupshlk(struct lock *lk, const char *file, int line)
298 {
299         uintptr_t v, x, orig_x;
300         u_int realexslp;
301         int queue, wakeup_swapper;
302
303         wakeup_swapper = 0;
304         for (;;) {
305                 x = lk->lk_lock;
306                 if (lockmgr_sunlock_try(lk, &x))
307                         break;
308
309                 /*
310                  * We should have a sharer with waiters, so enter the hard
311                  * path in order to handle wakeups correctly.
312                  */
313                 sleepq_lock(&lk->lock_object);
314                 orig_x = lk->lk_lock;
315 retry_sleepq:
316                 x = orig_x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
317                 v = LK_UNLOCKED;
318
319                 /*
320                  * If the lock has exclusive waiters, give them preference in
321                  * order to avoid deadlock with shared runners up.
322                  * If interruptible sleeps left the exclusive queue empty
323                  * avoid a starvation for the threads sleeping on the shared
324                  * queue by giving them precedence and cleaning up the
325                  * exclusive waiters bit anyway.
326                  * Please note that lk_exslpfail count may be lying about
327                  * the real number of waiters with the LK_SLEEPFAIL flag on
328                  * because they may be used in conjunction with interruptible
329                  * sleeps so lk_exslpfail might be considered an 'upper limit'
330                  * bound, including the edge cases.
331                  */
332                 realexslp = sleepq_sleepcnt(&lk->lock_object,
333                     SQ_EXCLUSIVE_QUEUE);
334                 if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
335                         if (lk->lk_exslpfail < realexslp) {
336                                 lk->lk_exslpfail = 0;
337                                 queue = SQ_EXCLUSIVE_QUEUE;
338                                 v |= (x & LK_SHARED_WAITERS);
339                         } else {
340                                 lk->lk_exslpfail = 0;
341                                 LOCK_LOG2(lk,
342                                     "%s: %p has only LK_SLEEPFAIL sleepers",
343                                     __func__, lk);
344                                 LOCK_LOG2(lk,
345                             "%s: %p waking up threads on the exclusive queue",
346                                     __func__, lk);
347                                 wakeup_swapper =
348                                     sleepq_broadcast(&lk->lock_object,
349                                     SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
350                                 queue = SQ_SHARED_QUEUE;
351                         }
352                                 
353                 } else {
354
355                         /*
356                          * Exclusive waiters sleeping with LK_SLEEPFAIL on
357                          * and using interruptible sleeps/timeout may have
358                          * left spourious lk_exslpfail counts on, so clean
359                          * it up anyway.
360                          */
361                         lk->lk_exslpfail = 0;
362                         queue = SQ_SHARED_QUEUE;
363                 }
364
365                 if (lockmgr_sunlock_try(lk, &orig_x)) {
366                         sleepq_release(&lk->lock_object);
367                         break;
368                 }
369
370                 x |= LK_SHARERS_LOCK(1);
371                 if (!atomic_fcmpset_rel_ptr(&lk->lk_lock, &x, v)) {
372                         orig_x = x;
373                         goto retry_sleepq;
374                 }
375                 LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
376                     __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
377                     "exclusive");
378                 wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
379                     0, queue);
380                 sleepq_release(&lk->lock_object);
381                 break;
382         }
383
384         lockmgr_note_shared_release(lk, file, line);
385         return (wakeup_swapper);
386 }
387
388 static void
389 assert_lockmgr(const struct lock_object *lock, int what)
390 {
391
392         panic("lockmgr locks do not support assertions");
393 }
394
395 static void
396 lock_lockmgr(struct lock_object *lock, uintptr_t how)
397 {
398
399         panic("lockmgr locks do not support sleep interlocking");
400 }
401
402 static uintptr_t
403 unlock_lockmgr(struct lock_object *lock)
404 {
405
406         panic("lockmgr locks do not support sleep interlocking");
407 }
408
409 #ifdef KDTRACE_HOOKS
410 static int
411 owner_lockmgr(const struct lock_object *lock, struct thread **owner)
412 {
413
414         panic("lockmgr locks do not support owner inquiring");
415 }
416 #endif
417
418 void
419 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
420 {
421         int iflags;
422
423         MPASS((flags & ~LK_INIT_MASK) == 0);
424         ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
425             ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
426             &lk->lk_lock));
427
428         iflags = LO_SLEEPABLE | LO_UPGRADABLE;
429         if (flags & LK_CANRECURSE)
430                 iflags |= LO_RECURSABLE;
431         if ((flags & LK_NODUP) == 0)
432                 iflags |= LO_DUPOK;
433         if (flags & LK_NOPROFILE)
434                 iflags |= LO_NOPROFILE;
435         if ((flags & LK_NOWITNESS) == 0)
436                 iflags |= LO_WITNESS;
437         if (flags & LK_QUIET)
438                 iflags |= LO_QUIET;
439         if (flags & LK_IS_VNODE)
440                 iflags |= LO_IS_VNODE;
441         iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
442
443         lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
444         lk->lk_lock = LK_UNLOCKED;
445         lk->lk_recurse = 0;
446         lk->lk_exslpfail = 0;
447         lk->lk_timo = timo;
448         lk->lk_pri = pri;
449         STACK_ZERO(lk);
450 }
451
452 /*
453  * XXX: Gross hacks to manipulate external lock flags after
454  * initialization.  Used for certain vnode and buf locks.
455  */
456 void
457 lockallowshare(struct lock *lk)
458 {
459
460         lockmgr_assert(lk, KA_XLOCKED);
461         lk->lock_object.lo_flags &= ~LK_NOSHARE;
462 }
463
464 void
465 lockdisableshare(struct lock *lk)
466 {
467
468         lockmgr_assert(lk, KA_XLOCKED);
469         lk->lock_object.lo_flags |= LK_NOSHARE;
470 }
471
472 void
473 lockallowrecurse(struct lock *lk)
474 {
475
476         lockmgr_assert(lk, KA_XLOCKED);
477         lk->lock_object.lo_flags |= LO_RECURSABLE;
478 }
479
480 void
481 lockdisablerecurse(struct lock *lk)
482 {
483
484         lockmgr_assert(lk, KA_XLOCKED);
485         lk->lock_object.lo_flags &= ~LO_RECURSABLE;
486 }
487
488 void
489 lockdestroy(struct lock *lk)
490 {
491
492         KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
493         KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
494         KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
495         lock_destroy(&lk->lock_object);
496 }
497
498 static bool __always_inline
499 lockmgr_slock_try(struct lock *lk, uintptr_t *xp, int flags)
500 {
501
502         /*
503          * If no other thread has an exclusive lock, or
504          * no exclusive waiter is present, bump the count of
505          * sharers.  Since we have to preserve the state of
506          * waiters, if we fail to acquire the shared lock
507          * loop back and retry.
508          */
509         *xp = lk->lk_lock;
510         while (LK_CAN_SHARE(*xp, flags)) {
511                 if (atomic_fcmpset_acq_ptr(&lk->lk_lock, xp,
512                     *xp + LK_ONE_SHARER)) {
513                         return (true);
514                 }
515         }
516         return (false);
517 }
518
519 static bool __always_inline
520 lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp)
521 {
522
523         for (;;) {
524                 /*
525                  * If there is more than one shared lock held, just drop one
526                  * and return.
527                  */
528                 if (LK_SHARERS(*xp) > 1) {
529                         if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
530                             *xp - LK_ONE_SHARER))
531                                 return (true);
532                         continue;
533                 }
534
535                 /*
536                  * If there are not waiters on the exclusive queue, drop the
537                  * lock quickly.
538                  */
539                 if ((*xp & LK_ALL_WAITERS) == 0) {
540                         MPASS((*xp & ~LK_EXCLUSIVE_SPINNERS) ==
541                             LK_SHARERS_LOCK(1));
542                         if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
543                             LK_UNLOCKED))
544                                 return (true);
545                         continue;
546                 }
547                 break;
548         }
549         return (false);
550 }
551
552 static __noinline int
553 lockmgr_slock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
554     const char *file, int line, struct lockmgr_wait *lwa)
555 {
556         uintptr_t tid, x;
557         int error = 0;
558         const char *iwmesg;
559         int ipri, itimo;
560
561 #ifdef LOCK_PROFILING
562         uint64_t waittime = 0;
563         int contested = 0;
564 #endif
565
566         if (__predict_false(panicstr != NULL))
567                 goto out;
568
569         tid = (uintptr_t)curthread;
570
571         if (LK_CAN_WITNESS(flags))
572                 WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
573                     file, line, flags & LK_INTERLOCK ? ilk : NULL);
574         for (;;) {
575                 if (lockmgr_slock_try(lk, &x, flags))
576                         break;
577 #ifdef HWPMC_HOOKS
578                 PMC_SOFT_CALL( , , lock, failed);
579 #endif
580                 lock_profile_obtain_lock_failed(&lk->lock_object,
581                     &contested, &waittime);
582
583                 /*
584                  * If the lock is already held by curthread in
585                  * exclusive way avoid a deadlock.
586                  */
587                 if (LK_HOLDER(x) == tid) {
588                         LOCK_LOG2(lk,
589                             "%s: %p already held in exclusive mode",
590                             __func__, lk);
591                         error = EDEADLK;
592                         break;
593                 }
594
595                 /*
596                  * If the lock is expected to not sleep just give up
597                  * and return.
598                  */
599                 if (LK_TRYOP(flags)) {
600                         LOCK_LOG2(lk, "%s: %p fails the try operation",
601                             __func__, lk);
602                         error = EBUSY;
603                         break;
604                 }
605
606                 /*
607                  * Acquire the sleepqueue chain lock because we
608                  * probabilly will need to manipulate waiters flags.
609                  */
610                 sleepq_lock(&lk->lock_object);
611                 x = lk->lk_lock;
612 retry_sleepq:
613
614                 /*
615                  * if the lock can be acquired in shared mode, try
616                  * again.
617                  */
618                 if (LK_CAN_SHARE(x, flags)) {
619                         sleepq_release(&lk->lock_object);
620                         continue;
621                 }
622
623                 /*
624                  * Try to set the LK_SHARED_WAITERS flag.  If we fail,
625                  * loop back and retry.
626                  */
627                 if ((x & LK_SHARED_WAITERS) == 0) {
628                         if (!atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
629                             x | LK_SHARED_WAITERS)) {
630                                 goto retry_sleepq;
631                         }
632                         LOCK_LOG2(lk, "%s: %p set shared waiters flag",
633                             __func__, lk);
634                 }
635
636                 if (lwa == NULL) {
637                         iwmesg = lk->lock_object.lo_name;
638                         ipri = lk->lk_pri;
639                         itimo = lk->lk_timo;
640                 } else {
641                         iwmesg = lwa->iwmesg;
642                         ipri = lwa->ipri;
643                         itimo = lwa->itimo;
644                 }
645
646                 /*
647                  * As far as we have been unable to acquire the
648                  * shared lock and the shared waiters flag is set,
649                  * we will sleep.
650                  */
651                 error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
652                     SQ_SHARED_QUEUE);
653                 flags &= ~LK_INTERLOCK;
654                 if (error) {
655                         LOCK_LOG3(lk,
656                             "%s: interrupted sleep for %p with %d",
657                             __func__, lk, error);
658                         break;
659                 }
660                 LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
661                     __func__, lk);
662         }
663         if (error == 0) {
664 #ifdef LOCK_PROFILING
665                 lockmgr_note_shared_acquire(lk, contested, waittime,
666                     file, line, flags);
667 #else
668                 lockmgr_note_shared_acquire(lk, 0, 0, file, line,
669                     flags);
670 #endif
671         }
672
673 out:
674         lockmgr_exit(flags, ilk, 0);
675         return (error);
676 }
677
678 static __noinline int
679 lockmgr_xlock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
680     const char *file, int line, struct lockmgr_wait *lwa)
681 {
682         struct lock_class *class;
683         uintptr_t tid, x, v;
684         int error = 0;
685         const char *iwmesg;
686         int ipri, itimo;
687
688 #ifdef LOCK_PROFILING
689         uint64_t waittime = 0;
690         int contested = 0;
691 #endif
692
693         if (__predict_false(panicstr != NULL))
694                 goto out;
695
696         tid = (uintptr_t)curthread;
697
698         if (LK_CAN_WITNESS(flags))
699                 WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
700                     LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
701                     ilk : NULL);
702
703         /*
704          * If curthread already holds the lock and this one is
705          * allowed to recurse, simply recurse on it.
706          */
707         if (lockmgr_xlocked(lk)) {
708                 if ((flags & LK_CANRECURSE) == 0 &&
709                     (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
710                         /*
711                          * If the lock is expected to not panic just
712                          * give up and return.
713                          */
714                         if (LK_TRYOP(flags)) {
715                                 LOCK_LOG2(lk,
716                                     "%s: %p fails the try operation",
717                                     __func__, lk);
718                                 error = EBUSY;
719                                 goto out;
720                         }
721                         if (flags & LK_INTERLOCK) {
722                                 class = LOCK_CLASS(ilk);
723                                 class->lc_unlock(ilk);
724                         }
725                         panic("%s: recursing on non recursive lockmgr %p "
726                             "@ %s:%d\n", __func__, lk, file, line);
727                 }
728                 lk->lk_recurse++;
729                 LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
730                 LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
731                     lk->lk_recurse, file, line);
732                 WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
733                     LK_TRYWIT(flags), file, line);
734                 TD_LOCKS_INC(curthread);
735                 goto out;
736         }
737
738         for (;;) {
739                 if (lk->lk_lock == LK_UNLOCKED &&
740                     atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
741                         break;
742 #ifdef HWPMC_HOOKS
743                 PMC_SOFT_CALL( , , lock, failed);
744 #endif
745                 lock_profile_obtain_lock_failed(&lk->lock_object,
746                     &contested, &waittime);
747
748                 /*
749                  * If the lock is expected to not sleep just give up
750                  * and return.
751                  */
752                 if (LK_TRYOP(flags)) {
753                         LOCK_LOG2(lk, "%s: %p fails the try operation",
754                             __func__, lk);
755                         error = EBUSY;
756                         break;
757                 }
758
759                 /*
760                  * Acquire the sleepqueue chain lock because we
761                  * probabilly will need to manipulate waiters flags.
762                  */
763                 sleepq_lock(&lk->lock_object);
764                 x = lk->lk_lock;
765 retry_sleepq:
766
767                 /*
768                  * if the lock has been released while we spun on
769                  * the sleepqueue chain lock just try again.
770                  */
771                 if (x == LK_UNLOCKED) {
772                         sleepq_release(&lk->lock_object);
773                         continue;
774                 }
775
776                 /*
777                  * The lock can be in the state where there is a
778                  * pending queue of waiters, but still no owner.
779                  * This happens when the lock is contested and an
780                  * owner is going to claim the lock.
781                  * If curthread is the one successfully acquiring it
782                  * claim lock ownership and return, preserving waiters
783                  * flags.
784                  */
785                 v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
786                 if ((x & ~v) == LK_UNLOCKED) {
787                         v &= ~LK_EXCLUSIVE_SPINNERS;
788                         if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
789                             tid | v)) {
790                                 sleepq_release(&lk->lock_object);
791                                 LOCK_LOG2(lk,
792                                     "%s: %p claimed by a new writer",
793                                     __func__, lk);
794                                 break;
795                         }
796                         goto retry_sleepq;
797                 }
798
799                 /*
800                  * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
801                  * fail, loop back and retry.
802                  */
803                 if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
804                         if (!atomic_fcmpset_ptr(&lk->lk_lock, &x,
805                             x | LK_EXCLUSIVE_WAITERS)) {
806                                 goto retry_sleepq;
807                         }
808                         LOCK_LOG2(lk, "%s: %p set excl waiters flag",
809                             __func__, lk);
810                 }
811
812                 if (lwa == NULL) {
813                         iwmesg = lk->lock_object.lo_name;
814                         ipri = lk->lk_pri;
815                         itimo = lk->lk_timo;
816                 } else {
817                         iwmesg = lwa->iwmesg;
818                         ipri = lwa->ipri;
819                         itimo = lwa->itimo;
820                 }
821
822                 /*
823                  * As far as we have been unable to acquire the
824                  * exclusive lock and the exclusive waiters flag
825                  * is set, we will sleep.
826                  */
827                 error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
828                     SQ_EXCLUSIVE_QUEUE);
829                 flags &= ~LK_INTERLOCK;
830                 if (error) {
831                         LOCK_LOG3(lk,
832                             "%s: interrupted sleep for %p with %d",
833                             __func__, lk, error);
834                         break;
835                 }
836                 LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
837                     __func__, lk);
838         }
839         if (error == 0) {
840 #ifdef LOCK_PROFILING
841                 lockmgr_note_exclusive_acquire(lk, contested, waittime,
842                     file, line, flags);
843 #else
844                 lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
845                     flags);
846 #endif
847         }
848
849 out:
850         lockmgr_exit(flags, ilk, 0);
851         return (error);
852 }
853
854 static __noinline int
855 lockmgr_upgrade(struct lock *lk, u_int flags, struct lock_object *ilk,
856     const char *file, int line, struct lockmgr_wait *lwa)
857 {
858         uintptr_t tid, x, v;
859         int error = 0;
860         int wakeup_swapper = 0;
861         int op;
862
863         if (__predict_false(panicstr != NULL))
864                 goto out;
865
866         tid = (uintptr_t)curthread;
867
868         _lockmgr_assert(lk, KA_SLOCKED, file, line);
869         v = lk->lk_lock;
870         x = v & LK_ALL_WAITERS;
871         v &= LK_EXCLUSIVE_SPINNERS;
872
873         /*
874          * Try to switch from one shared lock to an exclusive one.
875          * We need to preserve waiters flags during the operation.
876          */
877         if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
878             tid | x)) {
879                 LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
880                     line);
881                 WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
882                     LK_TRYWIT(flags), file, line);
883                 TD_SLOCKS_DEC(curthread);
884                 goto out;
885         }
886
887         op = flags & LK_TYPE_MASK;
888
889         /*
890          * In LK_TRYUPGRADE mode, do not drop the lock,
891          * returning EBUSY instead.
892          */
893         if (op == LK_TRYUPGRADE) {
894                 LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
895                     __func__, lk);
896                 error = EBUSY;
897                 goto out;
898         }
899
900         /*
901          * We have been unable to succeed in upgrading, so just
902          * give up the shared lock.
903          */
904         wakeup_swapper |= wakeupshlk(lk, file, line);
905         error = lockmgr_xlock_hard(lk, flags, ilk, file, line, lwa);
906         flags &= ~LK_INTERLOCK;
907 out:
908         lockmgr_exit(flags, ilk, wakeup_swapper);
909         return (error);
910 }
911
912 int
913 lockmgr_lock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk,
914     const char *file, int line)
915 {
916         struct lock_class *class;
917         uintptr_t x, tid;
918         u_int op;
919         bool locked;
920
921         op = flags & LK_TYPE_MASK;
922         locked = false;
923         switch (op) {
924         case LK_SHARED:
925                 if (LK_CAN_WITNESS(flags))
926                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
927                             file, line, flags & LK_INTERLOCK ? ilk : NULL);
928                 if (__predict_false(lk->lock_object.lo_flags & LK_NOSHARE))
929                         break;
930                 if (lockmgr_slock_try(lk, &x, flags)) {
931                         lockmgr_note_shared_acquire(lk, 0, 0,
932                             file, line, flags);
933                         locked = true;
934                 } else {
935                         return (lockmgr_slock_hard(lk, flags, ilk, file, line,
936                             NULL));
937                 }
938                 break;
939         case LK_EXCLUSIVE:
940                 if (LK_CAN_WITNESS(flags))
941                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
942                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
943                             ilk : NULL);
944                 tid = (uintptr_t)curthread;
945                 if (lk->lk_lock == LK_UNLOCKED &&
946                     atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
947                         lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
948                             flags);
949                         locked = true;
950                 } else {
951                         return (lockmgr_xlock_hard(lk, flags, ilk, file, line,
952                             NULL));
953                 }
954                 break;
955         case LK_UPGRADE:
956         case LK_TRYUPGRADE:
957                 return (lockmgr_upgrade(lk, flags, ilk, file, line, NULL));
958         default:
959                 break;
960         }
961         if (__predict_true(locked)) {
962                 if (__predict_false(flags & LK_INTERLOCK)) {
963                         class = LOCK_CLASS(ilk);
964                         class->lc_unlock(ilk);
965                 }
966                 return (0);
967         } else {
968                 return (__lockmgr_args(lk, flags, ilk, LK_WMESG_DEFAULT,
969                     LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, file, line));
970         }
971 }
972
973 static __noinline int
974 lockmgr_sunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
975     const char *file, int line)
976
977 {
978         int wakeup_swapper = 0;
979
980         if (__predict_false(panicstr != NULL))
981                 goto out;
982
983         wakeup_swapper = wakeupshlk(lk, file, line);
984
985 out:
986         lockmgr_exit(flags, ilk, wakeup_swapper);
987         return (0);
988 }
989
990 static __noinline int
991 lockmgr_xunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
992     const char *file, int line)
993 {
994         uintptr_t tid, v;
995         int wakeup_swapper = 0;
996         u_int realexslp;
997         int queue;
998
999         if (__predict_false(panicstr != NULL))
1000                 goto out;
1001
1002         tid = (uintptr_t)curthread;
1003
1004         /*
1005          * As first option, treact the lock as if it has not
1006          * any waiter.
1007          * Fix-up the tid var if the lock has been disowned.
1008          */
1009         if (LK_HOLDER(x) == LK_KERNPROC)
1010                 tid = LK_KERNPROC;
1011         else {
1012                 WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1013                 TD_LOCKS_DEC(curthread);
1014         }
1015         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
1016
1017         /*
1018          * The lock is held in exclusive mode.
1019          * If the lock is recursed also, then unrecurse it.
1020          */
1021         if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1022                 LOCK_LOG2(lk, "%s: %p unrecursing", __func__, lk);
1023                 lk->lk_recurse--;
1024                 goto out;
1025         }
1026         if (tid != LK_KERNPROC)
1027                 lock_profile_release_lock(&lk->lock_object);
1028
1029         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED))
1030                 goto out;
1031
1032         sleepq_lock(&lk->lock_object);
1033         x = lk->lk_lock;
1034         v = LK_UNLOCKED;
1035
1036         /*
1037          * If the lock has exclusive waiters, give them
1038          * preference in order to avoid deadlock with
1039          * shared runners up.
1040          * If interruptible sleeps left the exclusive queue
1041          * empty avoid a starvation for the threads sleeping
1042          * on the shared queue by giving them precedence
1043          * and cleaning up the exclusive waiters bit anyway.
1044          * Please note that lk_exslpfail count may be lying
1045          * about the real number of waiters with the
1046          * LK_SLEEPFAIL flag on because they may be used in
1047          * conjunction with interruptible sleeps so
1048          * lk_exslpfail might be considered an 'upper limit'
1049          * bound, including the edge cases.
1050          */
1051         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1052         realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE);
1053         if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1054                 if (lk->lk_exslpfail < realexslp) {
1055                         lk->lk_exslpfail = 0;
1056                         queue = SQ_EXCLUSIVE_QUEUE;
1057                         v |= (x & LK_SHARED_WAITERS);
1058                 } else {
1059                         lk->lk_exslpfail = 0;
1060                         LOCK_LOG2(lk,
1061                             "%s: %p has only LK_SLEEPFAIL sleepers",
1062                             __func__, lk);
1063                         LOCK_LOG2(lk,
1064                             "%s: %p waking up threads on the exclusive queue",
1065                             __func__, lk);
1066                         wakeup_swapper = sleepq_broadcast(&lk->lock_object,
1067                             SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1068                         queue = SQ_SHARED_QUEUE;
1069                 }
1070         } else {
1071
1072                 /*
1073                  * Exclusive waiters sleeping with LK_SLEEPFAIL
1074                  * on and using interruptible sleeps/timeout
1075                  * may have left spourious lk_exslpfail counts
1076                  * on, so clean it up anyway.
1077                  */
1078                 lk->lk_exslpfail = 0;
1079                 queue = SQ_SHARED_QUEUE;
1080         }
1081
1082         LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
1083             __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1084             "exclusive");
1085         atomic_store_rel_ptr(&lk->lk_lock, v);
1086         wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
1087         sleepq_release(&lk->lock_object);
1088
1089 out:
1090         lockmgr_exit(flags, ilk, wakeup_swapper);
1091         return (0);
1092 }
1093
1094 int
1095 lockmgr_unlock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk)
1096 {
1097         struct lock_class *class;
1098         uintptr_t x, tid;
1099         bool unlocked;
1100         const char *file;
1101         int line;
1102
1103         file = __FILE__;
1104         line = __LINE__;
1105
1106         _lockmgr_assert(lk, KA_LOCKED, file, line);
1107         unlocked = false;
1108         x = lk->lk_lock;
1109         if (__predict_true(x & LK_SHARE) != 0) {
1110                 if (lockmgr_sunlock_try(lk, &x)) {
1111                         lockmgr_note_shared_release(lk, file, line);
1112                         unlocked = true;
1113                 } else {
1114                         return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1115                 }
1116         } else {
1117                 tid = (uintptr_t)curthread;
1118                 if (!lockmgr_recursed(lk) &&
1119                     atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) {
1120                         lockmgr_note_exclusive_release(lk, file, line);
1121                         unlocked = true;
1122                 } else {
1123                         return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1124                 }
1125         }
1126         if (__predict_false(flags & LK_INTERLOCK)) {
1127                 class = LOCK_CLASS(ilk);
1128                 class->lc_unlock(ilk);
1129         }
1130         return (0);
1131 }
1132
1133 int
1134 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
1135     const char *wmesg, int pri, int timo, const char *file, int line)
1136 {
1137         GIANT_DECLARE;
1138         struct lockmgr_wait lwa;
1139         struct lock_class *class;
1140         const char *iwmesg;
1141         uintptr_t tid, v, x;
1142         u_int op, realexslp;
1143         int error, ipri, itimo, queue, wakeup_swapper;
1144 #ifdef LOCK_PROFILING
1145         uint64_t waittime = 0;
1146         int contested = 0;
1147 #endif
1148
1149         error = 0;
1150         tid = (uintptr_t)curthread;
1151         op = (flags & LK_TYPE_MASK);
1152         iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
1153         ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
1154         itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
1155
1156         lwa.iwmesg = iwmesg;
1157         lwa.ipri = ipri;
1158         lwa.itimo = itimo;
1159
1160         MPASS((flags & ~LK_TOTAL_MASK) == 0);
1161         KASSERT((op & (op - 1)) == 0,
1162             ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
1163         KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
1164             (op != LK_DOWNGRADE && op != LK_RELEASE),
1165             ("%s: Invalid flags in regard of the operation desired @ %s:%d",
1166             __func__, file, line));
1167         KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
1168             ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
1169             __func__, file, line));
1170         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
1171             ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
1172             lk->lock_object.lo_name, file, line));
1173
1174         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
1175         if (panicstr != NULL) {
1176                 if (flags & LK_INTERLOCK)
1177                         class->lc_unlock(ilk);
1178                 return (0);
1179         }
1180
1181         if (lk->lock_object.lo_flags & LK_NOSHARE) {
1182                 switch (op) {
1183                 case LK_SHARED:
1184                         op = LK_EXCLUSIVE;
1185                         break;
1186                 case LK_UPGRADE:
1187                 case LK_TRYUPGRADE:
1188                 case LK_DOWNGRADE:
1189                         _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
1190                             file, line);
1191                         if (flags & LK_INTERLOCK)
1192                                 class->lc_unlock(ilk);
1193                         return (0);
1194                 }
1195         }
1196
1197         wakeup_swapper = 0;
1198         switch (op) {
1199         case LK_SHARED:
1200                 return (lockmgr_slock_hard(lk, flags, ilk, file, line, &lwa));
1201                 break;
1202         case LK_UPGRADE:
1203         case LK_TRYUPGRADE:
1204                 return (lockmgr_upgrade(lk, flags, ilk, file, line, &lwa));
1205                 break;
1206         case LK_EXCLUSIVE:
1207                 return (lockmgr_xlock_hard(lk, flags, ilk, file, line, &lwa));
1208                 break;
1209         case LK_DOWNGRADE:
1210                 _lockmgr_assert(lk, KA_XLOCKED, file, line);
1211                 LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
1212                 WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
1213
1214                 /*
1215                  * Panic if the lock is recursed.
1216                  */
1217                 if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1218                         if (flags & LK_INTERLOCK)
1219                                 class->lc_unlock(ilk);
1220                         panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
1221                             __func__, iwmesg, file, line);
1222                 }
1223                 TD_SLOCKS_INC(curthread);
1224
1225                 /*
1226                  * In order to preserve waiters flags, just spin.
1227                  */
1228                 for (;;) {
1229                         x = lk->lk_lock;
1230                         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1231                         x &= LK_ALL_WAITERS;
1232                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1233                             LK_SHARERS_LOCK(1) | x))
1234                                 break;
1235                         cpu_spinwait();
1236                 }
1237                 break;
1238         case LK_RELEASE:
1239                 _lockmgr_assert(lk, KA_LOCKED, file, line);
1240                 x = lk->lk_lock;
1241
1242                 if (__predict_true(x & LK_SHARE) != 0) {
1243                         return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1244                 } else {
1245                         return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1246                 }
1247                 break;
1248         case LK_DRAIN:
1249                 if (LK_CAN_WITNESS(flags))
1250                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1251                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1252                             ilk : NULL);
1253
1254                 /*
1255                  * Trying to drain a lock we already own will result in a
1256                  * deadlock.
1257                  */
1258                 if (lockmgr_xlocked(lk)) {
1259                         if (flags & LK_INTERLOCK)
1260                                 class->lc_unlock(ilk);
1261                         panic("%s: draining %s with the lock held @ %s:%d\n",
1262                             __func__, iwmesg, file, line);
1263                 }
1264
1265                 for (;;) {
1266                         if (lk->lk_lock == LK_UNLOCKED &&
1267                             atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
1268                                 break;
1269
1270 #ifdef HWPMC_HOOKS
1271                         PMC_SOFT_CALL( , , lock, failed);
1272 #endif
1273                         lock_profile_obtain_lock_failed(&lk->lock_object,
1274                             &contested, &waittime);
1275
1276                         /*
1277                          * If the lock is expected to not sleep just give up
1278                          * and return.
1279                          */
1280                         if (LK_TRYOP(flags)) {
1281                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
1282                                     __func__, lk);
1283                                 error = EBUSY;
1284                                 break;
1285                         }
1286
1287                         /*
1288                          * Acquire the sleepqueue chain lock because we
1289                          * probabilly will need to manipulate waiters flags.
1290                          */
1291                         sleepq_lock(&lk->lock_object);
1292                         x = lk->lk_lock;
1293
1294                         /*
1295                          * if the lock has been released while we spun on
1296                          * the sleepqueue chain lock just try again.
1297                          */
1298                         if (x == LK_UNLOCKED) {
1299                                 sleepq_release(&lk->lock_object);
1300                                 continue;
1301                         }
1302
1303                         v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1304                         if ((x & ~v) == LK_UNLOCKED) {
1305                                 v = (x & ~LK_EXCLUSIVE_SPINNERS);
1306
1307                                 /*
1308                                  * If interruptible sleeps left the exclusive
1309                                  * queue empty avoid a starvation for the
1310                                  * threads sleeping on the shared queue by
1311                                  * giving them precedence and cleaning up the
1312                                  * exclusive waiters bit anyway.
1313                                  * Please note that lk_exslpfail count may be
1314                                  * lying about the real number of waiters with
1315                                  * the LK_SLEEPFAIL flag on because they may
1316                                  * be used in conjunction with interruptible
1317                                  * sleeps so lk_exslpfail might be considered
1318                                  * an 'upper limit' bound, including the edge
1319                                  * cases.
1320                                  */
1321                                 if (v & LK_EXCLUSIVE_WAITERS) {
1322                                         queue = SQ_EXCLUSIVE_QUEUE;
1323                                         v &= ~LK_EXCLUSIVE_WAITERS;
1324                                 } else {
1325
1326                                         /*
1327                                          * Exclusive waiters sleeping with
1328                                          * LK_SLEEPFAIL on and using
1329                                          * interruptible sleeps/timeout may
1330                                          * have left spourious lk_exslpfail
1331                                          * counts on, so clean it up anyway.
1332                                          */
1333                                         MPASS(v & LK_SHARED_WAITERS);
1334                                         lk->lk_exslpfail = 0;
1335                                         queue = SQ_SHARED_QUEUE;
1336                                         v &= ~LK_SHARED_WAITERS;
1337                                 }
1338                                 if (queue == SQ_EXCLUSIVE_QUEUE) {
1339                                         realexslp =
1340                                             sleepq_sleepcnt(&lk->lock_object,
1341                                             SQ_EXCLUSIVE_QUEUE);
1342                                         if (lk->lk_exslpfail >= realexslp) {
1343                                                 lk->lk_exslpfail = 0;
1344                                                 queue = SQ_SHARED_QUEUE;
1345                                                 v &= ~LK_SHARED_WAITERS;
1346                                                 if (realexslp != 0) {
1347                                                         LOCK_LOG2(lk,
1348                                         "%s: %p has only LK_SLEEPFAIL sleepers",
1349                                                             __func__, lk);
1350                                                         LOCK_LOG2(lk,
1351                         "%s: %p waking up threads on the exclusive queue",
1352                                                             __func__, lk);
1353                                                         wakeup_swapper =
1354                                                             sleepq_broadcast(
1355                                                             &lk->lock_object,
1356                                                             SLEEPQ_LK, 0,
1357                                                             SQ_EXCLUSIVE_QUEUE);
1358                                                 }
1359                                         } else
1360                                                 lk->lk_exslpfail = 0;
1361                                 }
1362                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1363                                         sleepq_release(&lk->lock_object);
1364                                         continue;
1365                                 }
1366                                 LOCK_LOG3(lk,
1367                                 "%s: %p waking up all threads on the %s queue",
1368                                     __func__, lk, queue == SQ_SHARED_QUEUE ?
1369                                     "shared" : "exclusive");
1370                                 wakeup_swapper |= sleepq_broadcast(
1371                                     &lk->lock_object, SLEEPQ_LK, 0, queue);
1372
1373                                 /*
1374                                  * If shared waiters have been woken up we need
1375                                  * to wait for one of them to acquire the lock
1376                                  * before to set the exclusive waiters in
1377                                  * order to avoid a deadlock.
1378                                  */
1379                                 if (queue == SQ_SHARED_QUEUE) {
1380                                         for (v = lk->lk_lock;
1381                                             (v & LK_SHARE) && !LK_SHARERS(v);
1382                                             v = lk->lk_lock)
1383                                                 cpu_spinwait();
1384                                 }
1385                         }
1386
1387                         /*
1388                          * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1389                          * fail, loop back and retry.
1390                          */
1391                         if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1392                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1393                                     x | LK_EXCLUSIVE_WAITERS)) {
1394                                         sleepq_release(&lk->lock_object);
1395                                         continue;
1396                                 }
1397                                 LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1398                                     __func__, lk);
1399                         }
1400
1401                         /*
1402                          * As far as we have been unable to acquire the
1403                          * exclusive lock and the exclusive waiters flag
1404                          * is set, we will sleep.
1405                          */
1406                         if (flags & LK_INTERLOCK) {
1407                                 class->lc_unlock(ilk);
1408                                 flags &= ~LK_INTERLOCK;
1409                         }
1410                         GIANT_SAVE();
1411                         sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1412                             SQ_EXCLUSIVE_QUEUE);
1413                         sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1414                         GIANT_RESTORE();
1415                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1416                             __func__, lk);
1417                 }
1418
1419                 if (error == 0) {
1420                         lock_profile_obtain_lock_success(&lk->lock_object,
1421                             contested, waittime, file, line);
1422                         LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1423                             lk->lk_recurse, file, line);
1424                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1425                             LK_TRYWIT(flags), file, line);
1426                         TD_LOCKS_INC(curthread);
1427                         STACK_SAVE(lk);
1428                 }
1429                 break;
1430         default:
1431                 if (flags & LK_INTERLOCK)
1432                         class->lc_unlock(ilk);
1433                 panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1434         }
1435
1436         if (flags & LK_INTERLOCK)
1437                 class->lc_unlock(ilk);
1438         if (wakeup_swapper)
1439                 kick_proc0();
1440
1441         return (error);
1442 }
1443
1444 void
1445 _lockmgr_disown(struct lock *lk, const char *file, int line)
1446 {
1447         uintptr_t tid, x;
1448
1449         if (SCHEDULER_STOPPED())
1450                 return;
1451
1452         tid = (uintptr_t)curthread;
1453         _lockmgr_assert(lk, KA_XLOCKED, file, line);
1454
1455         /*
1456          * Panic if the lock is recursed.
1457          */
1458         if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1459                 panic("%s: disown a recursed lockmgr @ %s:%d\n",
1460                     __func__,  file, line);
1461
1462         /*
1463          * If the owner is already LK_KERNPROC just skip the whole operation.
1464          */
1465         if (LK_HOLDER(lk->lk_lock) != tid)
1466                 return;
1467         lock_profile_release_lock(&lk->lock_object);
1468         LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1469         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1470         TD_LOCKS_DEC(curthread);
1471         STACK_SAVE(lk);
1472
1473         /*
1474          * In order to preserve waiters flags, just spin.
1475          */
1476         for (;;) {
1477                 x = lk->lk_lock;
1478                 MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1479                 x &= LK_ALL_WAITERS;
1480                 if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1481                     LK_KERNPROC | x))
1482                         return;
1483                 cpu_spinwait();
1484         }
1485 }
1486
1487 void
1488 lockmgr_printinfo(const struct lock *lk)
1489 {
1490         struct thread *td;
1491         uintptr_t x;
1492
1493         if (lk->lk_lock == LK_UNLOCKED)
1494                 printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1495         else if (lk->lk_lock & LK_SHARE)
1496                 printf("lock type %s: SHARED (count %ju)\n",
1497                     lk->lock_object.lo_name,
1498                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1499         else {
1500                 td = lockmgr_xholder(lk);
1501                 if (td == (struct thread *)LK_KERNPROC)
1502                         printf("lock type %s: EXCL by KERNPROC\n",
1503                             lk->lock_object.lo_name);
1504                 else
1505                         printf("lock type %s: EXCL by thread %p "
1506                             "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1507                             td, td->td_proc->p_pid, td->td_proc->p_comm,
1508                             td->td_tid);
1509         }
1510
1511         x = lk->lk_lock;
1512         if (x & LK_EXCLUSIVE_WAITERS)
1513                 printf(" with exclusive waiters pending\n");
1514         if (x & LK_SHARED_WAITERS)
1515                 printf(" with shared waiters pending\n");
1516         if (x & LK_EXCLUSIVE_SPINNERS)
1517                 printf(" with exclusive spinners pending\n");
1518
1519         STACK_PRINT(lk);
1520 }
1521
1522 int
1523 lockstatus(const struct lock *lk)
1524 {
1525         uintptr_t v, x;
1526         int ret;
1527
1528         ret = LK_SHARED;
1529         x = lk->lk_lock;
1530         v = LK_HOLDER(x);
1531
1532         if ((x & LK_SHARE) == 0) {
1533                 if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1534                         ret = LK_EXCLUSIVE;
1535                 else
1536                         ret = LK_EXCLOTHER;
1537         } else if (x == LK_UNLOCKED)
1538                 ret = 0;
1539
1540         return (ret);
1541 }
1542
1543 #ifdef INVARIANT_SUPPORT
1544
1545 FEATURE(invariant_support,
1546     "Support for modules compiled with INVARIANTS option");
1547
1548 #ifndef INVARIANTS
1549 #undef  _lockmgr_assert
1550 #endif
1551
1552 void
1553 _lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1554 {
1555         int slocked = 0;
1556
1557         if (panicstr != NULL)
1558                 return;
1559         switch (what) {
1560         case KA_SLOCKED:
1561         case KA_SLOCKED | KA_NOTRECURSED:
1562         case KA_SLOCKED | KA_RECURSED:
1563                 slocked = 1;
1564         case KA_LOCKED:
1565         case KA_LOCKED | KA_NOTRECURSED:
1566         case KA_LOCKED | KA_RECURSED:
1567 #ifdef WITNESS
1568
1569                 /*
1570                  * We cannot trust WITNESS if the lock is held in exclusive
1571                  * mode and a call to lockmgr_disown() happened.
1572                  * Workaround this skipping the check if the lock is held in
1573                  * exclusive mode even for the KA_LOCKED case.
1574                  */
1575                 if (slocked || (lk->lk_lock & LK_SHARE)) {
1576                         witness_assert(&lk->lock_object, what, file, line);
1577                         break;
1578                 }
1579 #endif
1580                 if (lk->lk_lock == LK_UNLOCKED ||
1581                     ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1582                     (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1583                         panic("Lock %s not %slocked @ %s:%d\n",
1584                             lk->lock_object.lo_name, slocked ? "share" : "",
1585                             file, line);
1586
1587                 if ((lk->lk_lock & LK_SHARE) == 0) {
1588                         if (lockmgr_recursed(lk)) {
1589                                 if (what & KA_NOTRECURSED)
1590                                         panic("Lock %s recursed @ %s:%d\n",
1591                                             lk->lock_object.lo_name, file,
1592                                             line);
1593                         } else if (what & KA_RECURSED)
1594                                 panic("Lock %s not recursed @ %s:%d\n",
1595                                     lk->lock_object.lo_name, file, line);
1596                 }
1597                 break;
1598         case KA_XLOCKED:
1599         case KA_XLOCKED | KA_NOTRECURSED:
1600         case KA_XLOCKED | KA_RECURSED:
1601                 if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1602                         panic("Lock %s not exclusively locked @ %s:%d\n",
1603                             lk->lock_object.lo_name, file, line);
1604                 if (lockmgr_recursed(lk)) {
1605                         if (what & KA_NOTRECURSED)
1606                                 panic("Lock %s recursed @ %s:%d\n",
1607                                     lk->lock_object.lo_name, file, line);
1608                 } else if (what & KA_RECURSED)
1609                         panic("Lock %s not recursed @ %s:%d\n",
1610                             lk->lock_object.lo_name, file, line);
1611                 break;
1612         case KA_UNLOCKED:
1613                 if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1614                         panic("Lock %s exclusively locked @ %s:%d\n",
1615                             lk->lock_object.lo_name, file, line);
1616                 break;
1617         default:
1618                 panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1619                     line);
1620         }
1621 }
1622 #endif
1623
1624 #ifdef DDB
1625 int
1626 lockmgr_chain(struct thread *td, struct thread **ownerp)
1627 {
1628         struct lock *lk;
1629
1630         lk = td->td_wchan;
1631
1632         if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1633                 return (0);
1634         db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1635         if (lk->lk_lock & LK_SHARE)
1636                 db_printf("SHARED (count %ju)\n",
1637                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1638         else
1639                 db_printf("EXCL\n");
1640         *ownerp = lockmgr_xholder(lk);
1641
1642         return (1);
1643 }
1644
1645 static void
1646 db_show_lockmgr(const struct lock_object *lock)
1647 {
1648         struct thread *td;
1649         const struct lock *lk;
1650
1651         lk = (const struct lock *)lock;
1652
1653         db_printf(" state: ");
1654         if (lk->lk_lock == LK_UNLOCKED)
1655                 db_printf("UNLOCKED\n");
1656         else if (lk->lk_lock & LK_SHARE)
1657                 db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1658         else {
1659                 td = lockmgr_xholder(lk);
1660                 if (td == (struct thread *)LK_KERNPROC)
1661                         db_printf("XLOCK: LK_KERNPROC\n");
1662                 else
1663                         db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1664                             td->td_tid, td->td_proc->p_pid,
1665                             td->td_proc->p_comm);
1666                 if (lockmgr_recursed(lk))
1667                         db_printf(" recursed: %d\n", lk->lk_recurse);
1668         }
1669         db_printf(" waiters: ");
1670         switch (lk->lk_lock & LK_ALL_WAITERS) {
1671         case LK_SHARED_WAITERS:
1672                 db_printf("shared\n");
1673                 break;
1674         case LK_EXCLUSIVE_WAITERS:
1675                 db_printf("exclusive\n");
1676                 break;
1677         case LK_ALL_WAITERS:
1678                 db_printf("shared and exclusive\n");
1679                 break;
1680         default:
1681                 db_printf("none\n");
1682         }
1683         db_printf(" spinners: ");
1684         if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1685                 db_printf("exclusive\n");
1686         else
1687                 db_printf("none\n");
1688 }
1689 #endif