]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_lock.c
Upgrade Unbound to 1.6.2. More to follow.
[FreeBSD/FreeBSD.git] / sys / kern / kern_lock.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28  * DAMAGE.
29  */
30
31 #include "opt_ddb.h"
32 #include "opt_hwpmc_hooks.h"
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include <sys/param.h>
38 #include <sys/kdb.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/lock_profile.h>
42 #include <sys/lockmgr.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sleepqueue.h>
46 #ifdef DEBUG_LOCKS
47 #include <sys/stack.h>
48 #endif
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51
52 #include <machine/cpu.h>
53
54 #ifdef DDB
55 #include <ddb/ddb.h>
56 #endif
57
58 #ifdef HWPMC_HOOKS
59 #include <sys/pmckern.h>
60 PMC_SOFT_DECLARE( , , lock, failed);
61 #endif
62
63 CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
64     (LK_ADAPTIVE | LK_NOSHARE));
65 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
66     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
67
68 #define SQ_EXCLUSIVE_QUEUE      0
69 #define SQ_SHARED_QUEUE         1
70
71 #ifndef INVARIANTS
72 #define _lockmgr_assert(lk, what, file, line)
73 #endif
74
75 #define TD_SLOCKS_INC(td)       ((td)->td_lk_slocks++)
76 #define TD_SLOCKS_DEC(td)       ((td)->td_lk_slocks--)
77
78 #ifndef DEBUG_LOCKS
79 #define STACK_PRINT(lk)
80 #define STACK_SAVE(lk)
81 #define STACK_ZERO(lk)
82 #else
83 #define STACK_PRINT(lk) stack_print_ddb(&(lk)->lk_stack)
84 #define STACK_SAVE(lk)  stack_save(&(lk)->lk_stack)
85 #define STACK_ZERO(lk)  stack_zero(&(lk)->lk_stack)
86 #endif
87
88 #define LOCK_LOG2(lk, string, arg1, arg2)                               \
89         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
90                 CTR2(KTR_LOCK, (string), (arg1), (arg2))
91 #define LOCK_LOG3(lk, string, arg1, arg2, arg3)                         \
92         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
93                 CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
94
95 #define GIANT_DECLARE                                                   \
96         int _i = 0;                                                     \
97         WITNESS_SAVE_DECL(Giant)
98 #define GIANT_RESTORE() do {                                            \
99         if (_i > 0) {                                                   \
100                 while (_i--)                                            \
101                         mtx_lock(&Giant);                               \
102                 WITNESS_RESTORE(&Giant.lock_object, Giant);             \
103         }                                                               \
104 } while (0)
105 #define GIANT_SAVE() do {                                               \
106         if (mtx_owned(&Giant)) {                                        \
107                 WITNESS_SAVE(&Giant.lock_object, Giant);                \
108                 while (mtx_owned(&Giant)) {                             \
109                         _i++;                                           \
110                         mtx_unlock(&Giant);                             \
111                 }                                                       \
112         }                                                               \
113 } while (0)
114
115 #define LK_CAN_SHARE(x, flags)                                          \
116         (((x) & LK_SHARE) &&                                            \
117         (((x) & (LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) == 0 || \
118         (curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||  \
119         (curthread->td_pflags & TDP_DEADLKTREAT)))
120 #define LK_TRYOP(x)                                                     \
121         ((x) & LK_NOWAIT)
122
123 #define LK_CAN_WITNESS(x)                                               \
124         (((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
125 #define LK_TRYWIT(x)                                                    \
126         (LK_TRYOP(x) ? LOP_TRYLOCK : 0)
127
128 #define LK_CAN_ADAPT(lk, f)                                             \
129         (((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&             \
130         ((f) & LK_SLEEPFAIL) == 0)
131
132 #define lockmgr_disowned(lk)                                            \
133         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
134
135 #define lockmgr_xlocked(lk)                                             \
136         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
137
138 static void     assert_lockmgr(const struct lock_object *lock, int how);
139 #ifdef DDB
140 static void     db_show_lockmgr(const struct lock_object *lock);
141 #endif
142 static void     lock_lockmgr(struct lock_object *lock, uintptr_t how);
143 #ifdef KDTRACE_HOOKS
144 static int      owner_lockmgr(const struct lock_object *lock,
145                     struct thread **owner);
146 #endif
147 static uintptr_t unlock_lockmgr(struct lock_object *lock);
148
149 struct lock_class lock_class_lockmgr = {
150         .lc_name = "lockmgr",
151         .lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
152         .lc_assert = assert_lockmgr,
153 #ifdef DDB
154         .lc_ddb_show = db_show_lockmgr,
155 #endif
156         .lc_lock = lock_lockmgr,
157         .lc_unlock = unlock_lockmgr,
158 #ifdef KDTRACE_HOOKS
159         .lc_owner = owner_lockmgr,
160 #endif
161 };
162
163 struct lockmgr_wait {
164         const char *iwmesg;
165         int ipri;
166         int itimo;
167 };
168
169 static bool __always_inline lockmgr_slock_try(struct lock *lk, uintptr_t *xp,
170     int flags);
171 static bool __always_inline lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp);
172
173 static void
174 lockmgr_exit(u_int flags, struct lock_object *ilk, int wakeup_swapper)
175 {
176         struct lock_class *class;
177
178         if (flags & LK_INTERLOCK) {
179                 class = LOCK_CLASS(ilk);
180                 class->lc_unlock(ilk);
181         }
182
183         if (__predict_false(wakeup_swapper))
184                 kick_proc0();
185 }
186
187 static void
188 lockmgr_note_shared_acquire(struct lock *lk, int contested,
189     uint64_t waittime, const char *file, int line, int flags)
190 {
191
192         lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime,
193             file, line);
194         LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line);
195         WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line);
196         TD_LOCKS_INC(curthread);
197         TD_SLOCKS_INC(curthread);
198         STACK_SAVE(lk);
199 }
200
201 static void
202 lockmgr_note_shared_release(struct lock *lk, const char *file, int line)
203 {
204
205         lock_profile_release_lock(&lk->lock_object);
206         WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
207         LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
208         TD_LOCKS_DEC(curthread);
209         TD_SLOCKS_DEC(curthread);
210 }
211
212 static void
213 lockmgr_note_exclusive_acquire(struct lock *lk, int contested,
214     uint64_t waittime, const char *file, int line, int flags)
215 {
216
217         lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime,
218             file, line);
219         LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
220         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file,
221             line);
222         TD_LOCKS_INC(curthread);
223         STACK_SAVE(lk);
224 }
225
226 static void
227 lockmgr_note_exclusive_release(struct lock *lk, const char *file, int line)
228 {
229
230         lock_profile_release_lock(&lk->lock_object);
231         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file,
232             line);
233         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
234         TD_LOCKS_DEC(curthread);
235 }
236
237 static __inline struct thread *
238 lockmgr_xholder(const struct lock *lk)
239 {
240         uintptr_t x;
241
242         x = lk->lk_lock;
243         return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
244 }
245
246 /*
247  * It assumes sleepq_lock held and returns with this one unheld.
248  * It also assumes the generic interlock is sane and previously checked.
249  * If LK_INTERLOCK is specified the interlock is not reacquired after the
250  * sleep.
251  */
252 static __inline int
253 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
254     const char *wmesg, int pri, int timo, int queue)
255 {
256         GIANT_DECLARE;
257         struct lock_class *class;
258         int catch, error;
259
260         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
261         catch = pri & PCATCH;
262         pri &= PRIMASK;
263         error = 0;
264
265         LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
266             (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
267
268         if (flags & LK_INTERLOCK)
269                 class->lc_unlock(ilk);
270         if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
271                 lk->lk_exslpfail++;
272         GIANT_SAVE();
273         sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
274             SLEEPQ_INTERRUPTIBLE : 0), queue);
275         if ((flags & LK_TIMELOCK) && timo)
276                 sleepq_set_timeout(&lk->lock_object, timo);
277
278         /*
279          * Decisional switch for real sleeping.
280          */
281         if ((flags & LK_TIMELOCK) && timo && catch)
282                 error = sleepq_timedwait_sig(&lk->lock_object, pri);
283         else if ((flags & LK_TIMELOCK) && timo)
284                 error = sleepq_timedwait(&lk->lock_object, pri);
285         else if (catch)
286                 error = sleepq_wait_sig(&lk->lock_object, pri);
287         else
288                 sleepq_wait(&lk->lock_object, pri);
289         GIANT_RESTORE();
290         if ((flags & LK_SLEEPFAIL) && error == 0)
291                 error = ENOLCK;
292
293         return (error);
294 }
295
296 static __inline int
297 wakeupshlk(struct lock *lk, const char *file, int line)
298 {
299         uintptr_t v, x, orig_x;
300         u_int realexslp;
301         int queue, wakeup_swapper;
302
303         wakeup_swapper = 0;
304         for (;;) {
305                 x = lk->lk_lock;
306                 if (lockmgr_sunlock_try(lk, &x))
307                         break;
308
309                 /*
310                  * We should have a sharer with waiters, so enter the hard
311                  * path in order to handle wakeups correctly.
312                  */
313                 sleepq_lock(&lk->lock_object);
314                 orig_x = lk->lk_lock;
315 retry_sleepq:
316                 x = orig_x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
317                 v = LK_UNLOCKED;
318
319                 /*
320                  * If the lock has exclusive waiters, give them preference in
321                  * order to avoid deadlock with shared runners up.
322                  * If interruptible sleeps left the exclusive queue empty
323                  * avoid a starvation for the threads sleeping on the shared
324                  * queue by giving them precedence and cleaning up the
325                  * exclusive waiters bit anyway.
326                  * Please note that lk_exslpfail count may be lying about
327                  * the real number of waiters with the LK_SLEEPFAIL flag on
328                  * because they may be used in conjunction with interruptible
329                  * sleeps so lk_exslpfail might be considered an 'upper limit'
330                  * bound, including the edge cases.
331                  */
332                 realexslp = sleepq_sleepcnt(&lk->lock_object,
333                     SQ_EXCLUSIVE_QUEUE);
334                 if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
335                         if (lk->lk_exslpfail < realexslp) {
336                                 lk->lk_exslpfail = 0;
337                                 queue = SQ_EXCLUSIVE_QUEUE;
338                                 v |= (x & LK_SHARED_WAITERS);
339                         } else {
340                                 lk->lk_exslpfail = 0;
341                                 LOCK_LOG2(lk,
342                                     "%s: %p has only LK_SLEEPFAIL sleepers",
343                                     __func__, lk);
344                                 LOCK_LOG2(lk,
345                             "%s: %p waking up threads on the exclusive queue",
346                                     __func__, lk);
347                                 wakeup_swapper =
348                                     sleepq_broadcast(&lk->lock_object,
349                                     SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
350                                 queue = SQ_SHARED_QUEUE;
351                         }
352                                 
353                 } else {
354
355                         /*
356                          * Exclusive waiters sleeping with LK_SLEEPFAIL on
357                          * and using interruptible sleeps/timeout may have
358                          * left spourious lk_exslpfail counts on, so clean
359                          * it up anyway.
360                          */
361                         lk->lk_exslpfail = 0;
362                         queue = SQ_SHARED_QUEUE;
363                 }
364
365                 if (lockmgr_sunlock_try(lk, &orig_x)) {
366                         sleepq_release(&lk->lock_object);
367                         break;
368                 }
369
370                 x |= LK_SHARERS_LOCK(1);
371                 if (!atomic_fcmpset_rel_ptr(&lk->lk_lock, &x, v)) {
372                         orig_x = x;
373                         goto retry_sleepq;
374                 }
375                 LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
376                     __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
377                     "exclusive");
378                 wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
379                     0, queue);
380                 sleepq_release(&lk->lock_object);
381                 break;
382         }
383
384         lockmgr_note_shared_release(lk, file, line);
385         return (wakeup_swapper);
386 }
387
388 static void
389 assert_lockmgr(const struct lock_object *lock, int what)
390 {
391
392         panic("lockmgr locks do not support assertions");
393 }
394
395 static void
396 lock_lockmgr(struct lock_object *lock, uintptr_t how)
397 {
398
399         panic("lockmgr locks do not support sleep interlocking");
400 }
401
402 static uintptr_t
403 unlock_lockmgr(struct lock_object *lock)
404 {
405
406         panic("lockmgr locks do not support sleep interlocking");
407 }
408
409 #ifdef KDTRACE_HOOKS
410 static int
411 owner_lockmgr(const struct lock_object *lock, struct thread **owner)
412 {
413
414         panic("lockmgr locks do not support owner inquiring");
415 }
416 #endif
417
418 void
419 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
420 {
421         int iflags;
422
423         MPASS((flags & ~LK_INIT_MASK) == 0);
424         ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
425             ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
426             &lk->lk_lock));
427
428         iflags = LO_SLEEPABLE | LO_UPGRADABLE;
429         if (flags & LK_CANRECURSE)
430                 iflags |= LO_RECURSABLE;
431         if ((flags & LK_NODUP) == 0)
432                 iflags |= LO_DUPOK;
433         if (flags & LK_NOPROFILE)
434                 iflags |= LO_NOPROFILE;
435         if ((flags & LK_NOWITNESS) == 0)
436                 iflags |= LO_WITNESS;
437         if (flags & LK_QUIET)
438                 iflags |= LO_QUIET;
439         if (flags & LK_IS_VNODE)
440                 iflags |= LO_IS_VNODE;
441         iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
442
443         lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
444         lk->lk_lock = LK_UNLOCKED;
445         lk->lk_recurse = 0;
446         lk->lk_exslpfail = 0;
447         lk->lk_timo = timo;
448         lk->lk_pri = pri;
449         STACK_ZERO(lk);
450 }
451
452 /*
453  * XXX: Gross hacks to manipulate external lock flags after
454  * initialization.  Used for certain vnode and buf locks.
455  */
456 void
457 lockallowshare(struct lock *lk)
458 {
459
460         lockmgr_assert(lk, KA_XLOCKED);
461         lk->lock_object.lo_flags &= ~LK_NOSHARE;
462 }
463
464 void
465 lockdisableshare(struct lock *lk)
466 {
467
468         lockmgr_assert(lk, KA_XLOCKED);
469         lk->lock_object.lo_flags |= LK_NOSHARE;
470 }
471
472 void
473 lockallowrecurse(struct lock *lk)
474 {
475
476         lockmgr_assert(lk, KA_XLOCKED);
477         lk->lock_object.lo_flags |= LO_RECURSABLE;
478 }
479
480 void
481 lockdisablerecurse(struct lock *lk)
482 {
483
484         lockmgr_assert(lk, KA_XLOCKED);
485         lk->lock_object.lo_flags &= ~LO_RECURSABLE;
486 }
487
488 void
489 lockdestroy(struct lock *lk)
490 {
491
492         KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
493         KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
494         KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
495         lock_destroy(&lk->lock_object);
496 }
497
498 static bool __always_inline
499 lockmgr_slock_try(struct lock *lk, uintptr_t *xp, int flags)
500 {
501
502         /*
503          * If no other thread has an exclusive lock, or
504          * no exclusive waiter is present, bump the count of
505          * sharers.  Since we have to preserve the state of
506          * waiters, if we fail to acquire the shared lock
507          * loop back and retry.
508          */
509         *xp = lk->lk_lock;
510         while (LK_CAN_SHARE(*xp, flags)) {
511                 if (atomic_fcmpset_acq_ptr(&lk->lk_lock, xp,
512                     *xp + LK_ONE_SHARER)) {
513                         return (true);
514                 }
515         }
516         return (false);
517 }
518
519 static bool __always_inline
520 lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp)
521 {
522
523         for (;;) {
524                 /*
525                  * If there is more than one shared lock held, just drop one
526                  * and return.
527                  */
528                 if (LK_SHARERS(*xp) > 1) {
529                         if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
530                             *xp - LK_ONE_SHARER))
531                                 return (true);
532                         continue;
533                 }
534
535                 /*
536                  * If there are not waiters on the exclusive queue, drop the
537                  * lock quickly.
538                  */
539                 if ((*xp & LK_ALL_WAITERS) == 0) {
540                         MPASS((*xp & ~LK_EXCLUSIVE_SPINNERS) ==
541                             LK_SHARERS_LOCK(1));
542                         if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
543                             LK_UNLOCKED))
544                                 return (true);
545                         continue;
546                 }
547                 break;
548         }
549         return (false);
550 }
551
552 static __noinline int
553 lockmgr_slock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
554     const char *file, int line, struct lockmgr_wait *lwa)
555 {
556         uintptr_t tid, x;
557         int error = 0;
558         const char *iwmesg;
559         int ipri, itimo;
560
561 #ifdef LOCK_PROFILING
562         uint64_t waittime = 0;
563         int contested = 0;
564 #endif
565
566         if (__predict_false(panicstr != NULL))
567                 goto out;
568
569         tid = (uintptr_t)curthread;
570
571         if (LK_CAN_WITNESS(flags))
572                 WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
573                     file, line, flags & LK_INTERLOCK ? ilk : NULL);
574         for (;;) {
575                 if (lockmgr_slock_try(lk, &x, flags))
576                         break;
577 #ifdef HWPMC_HOOKS
578                 PMC_SOFT_CALL( , , lock, failed);
579 #endif
580                 lock_profile_obtain_lock_failed(&lk->lock_object,
581                     &contested, &waittime);
582
583                 /*
584                  * If the lock is already held by curthread in
585                  * exclusive way avoid a deadlock.
586                  */
587                 if (LK_HOLDER(x) == tid) {
588                         LOCK_LOG2(lk,
589                             "%s: %p already held in exclusive mode",
590                             __func__, lk);
591                         error = EDEADLK;
592                         break;
593                 }
594
595                 /*
596                  * If the lock is expected to not sleep just give up
597                  * and return.
598                  */
599                 if (LK_TRYOP(flags)) {
600                         LOCK_LOG2(lk, "%s: %p fails the try operation",
601                             __func__, lk);
602                         error = EBUSY;
603                         break;
604                 }
605
606                 /*
607                  * Acquire the sleepqueue chain lock because we
608                  * probabilly will need to manipulate waiters flags.
609                  */
610                 sleepq_lock(&lk->lock_object);
611                 x = lk->lk_lock;
612 retry_sleepq:
613
614                 /*
615                  * if the lock can be acquired in shared mode, try
616                  * again.
617                  */
618                 if (LK_CAN_SHARE(x, flags)) {
619                         sleepq_release(&lk->lock_object);
620                         continue;
621                 }
622
623                 /*
624                  * Try to set the LK_SHARED_WAITERS flag.  If we fail,
625                  * loop back and retry.
626                  */
627                 if ((x & LK_SHARED_WAITERS) == 0) {
628                         if (!atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
629                             x | LK_SHARED_WAITERS)) {
630                                 goto retry_sleepq;
631                         }
632                         LOCK_LOG2(lk, "%s: %p set shared waiters flag",
633                             __func__, lk);
634                 }
635
636                 if (lwa == NULL) {
637                         iwmesg = lk->lock_object.lo_name;
638                         ipri = lk->lk_pri;
639                         itimo = lk->lk_timo;
640                 } else {
641                         iwmesg = lwa->iwmesg;
642                         ipri = lwa->ipri;
643                         itimo = lwa->itimo;
644                 }
645
646                 /*
647                  * As far as we have been unable to acquire the
648                  * shared lock and the shared waiters flag is set,
649                  * we will sleep.
650                  */
651                 error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
652                     SQ_SHARED_QUEUE);
653                 flags &= ~LK_INTERLOCK;
654                 if (error) {
655                         LOCK_LOG3(lk,
656                             "%s: interrupted sleep for %p with %d",
657                             __func__, lk, error);
658                         break;
659                 }
660                 LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
661                     __func__, lk);
662         }
663         if (error == 0) {
664 #ifdef LOCK_PROFILING
665                 lockmgr_note_shared_acquire(lk, contested, waittime,
666                     file, line, flags);
667 #else
668                 lockmgr_note_shared_acquire(lk, 0, 0, file, line,
669                     flags);
670 #endif
671         }
672
673 out:
674         lockmgr_exit(flags, ilk, 0);
675         return (error);
676 }
677
678 static __noinline int
679 lockmgr_xlock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
680     const char *file, int line, struct lockmgr_wait *lwa)
681 {
682         struct lock_class *class;
683         uintptr_t tid, x, v;
684         int error = 0;
685         const char *iwmesg;
686         int ipri, itimo;
687
688 #ifdef LOCK_PROFILING
689         uint64_t waittime = 0;
690         int contested = 0;
691 #endif
692
693         if (__predict_false(panicstr != NULL))
694                 goto out;
695
696         tid = (uintptr_t)curthread;
697
698         if (LK_CAN_WITNESS(flags))
699                 WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
700                     LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
701                     ilk : NULL);
702
703         /*
704          * If curthread already holds the lock and this one is
705          * allowed to recurse, simply recurse on it.
706          */
707         if (lockmgr_xlocked(lk)) {
708                 if ((flags & LK_CANRECURSE) == 0 &&
709                     (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
710                         /*
711                          * If the lock is expected to not panic just
712                          * give up and return.
713                          */
714                         if (LK_TRYOP(flags)) {
715                                 LOCK_LOG2(lk,
716                                     "%s: %p fails the try operation",
717                                     __func__, lk);
718                                 error = EBUSY;
719                                 goto out;
720                         }
721                         if (flags & LK_INTERLOCK) {
722                                 class = LOCK_CLASS(ilk);
723                                 class->lc_unlock(ilk);
724                         }
725                         panic("%s: recursing on non recursive lockmgr %p "
726                             "@ %s:%d\n", __func__, lk, file, line);
727                 }
728                 lk->lk_recurse++;
729                 LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
730                 LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
731                     lk->lk_recurse, file, line);
732                 WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
733                     LK_TRYWIT(flags), file, line);
734                 TD_LOCKS_INC(curthread);
735                 goto out;
736         }
737
738         for (;;) {
739                 if (lk->lk_lock == LK_UNLOCKED &&
740                     atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
741                         break;
742 #ifdef HWPMC_HOOKS
743                 PMC_SOFT_CALL( , , lock, failed);
744 #endif
745                 lock_profile_obtain_lock_failed(&lk->lock_object,
746                     &contested, &waittime);
747
748                 /*
749                  * If the lock is expected to not sleep just give up
750                  * and return.
751                  */
752                 if (LK_TRYOP(flags)) {
753                         LOCK_LOG2(lk, "%s: %p fails the try operation",
754                             __func__, lk);
755                         error = EBUSY;
756                         break;
757                 }
758
759                 /*
760                  * Acquire the sleepqueue chain lock because we
761                  * probabilly will need to manipulate waiters flags.
762                  */
763                 sleepq_lock(&lk->lock_object);
764                 x = lk->lk_lock;
765 retry_sleepq:
766
767                 /*
768                  * if the lock has been released while we spun on
769                  * the sleepqueue chain lock just try again.
770                  */
771                 if (x == LK_UNLOCKED) {
772                         sleepq_release(&lk->lock_object);
773                         continue;
774                 }
775
776                 /*
777                  * The lock can be in the state where there is a
778                  * pending queue of waiters, but still no owner.
779                  * This happens when the lock is contested and an
780                  * owner is going to claim the lock.
781                  * If curthread is the one successfully acquiring it
782                  * claim lock ownership and return, preserving waiters
783                  * flags.
784                  */
785                 v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
786                 if ((x & ~v) == LK_UNLOCKED) {
787                         v &= ~LK_EXCLUSIVE_SPINNERS;
788                         if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
789                             tid | v)) {
790                                 sleepq_release(&lk->lock_object);
791                                 LOCK_LOG2(lk,
792                                     "%s: %p claimed by a new writer",
793                                     __func__, lk);
794                                 break;
795                         }
796                         goto retry_sleepq;
797                 }
798
799                 /*
800                  * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
801                  * fail, loop back and retry.
802                  */
803                 if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
804                         if (!atomic_fcmpset_ptr(&lk->lk_lock, &x,
805                             x | LK_EXCLUSIVE_WAITERS)) {
806                                 goto retry_sleepq;
807                         }
808                         LOCK_LOG2(lk, "%s: %p set excl waiters flag",
809                             __func__, lk);
810                 }
811
812                 if (lwa == NULL) {
813                         iwmesg = lk->lock_object.lo_name;
814                         ipri = lk->lk_pri;
815                         itimo = lk->lk_timo;
816                 } else {
817                         iwmesg = lwa->iwmesg;
818                         ipri = lwa->ipri;
819                         itimo = lwa->itimo;
820                 }
821
822                 /*
823                  * As far as we have been unable to acquire the
824                  * exclusive lock and the exclusive waiters flag
825                  * is set, we will sleep.
826                  */
827                 error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
828                     SQ_EXCLUSIVE_QUEUE);
829                 flags &= ~LK_INTERLOCK;
830                 if (error) {
831                         LOCK_LOG3(lk,
832                             "%s: interrupted sleep for %p with %d",
833                             __func__, lk, error);
834                         break;
835                 }
836                 LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
837                     __func__, lk);
838         }
839         if (error == 0) {
840 #ifdef LOCK_PROFILING
841                 lockmgr_note_exclusive_acquire(lk, contested, waittime,
842                     file, line, flags);
843 #else
844                 lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
845                     flags);
846 #endif
847         }
848
849 out:
850         lockmgr_exit(flags, ilk, 0);
851         return (error);
852 }
853
854 static __noinline int
855 lockmgr_upgrade(struct lock *lk, u_int flags, struct lock_object *ilk,
856     const char *file, int line, struct lockmgr_wait *lwa)
857 {
858         uintptr_t tid, x, v;
859         int error = 0;
860         int wakeup_swapper = 0;
861         int op;
862
863         if (__predict_false(panicstr != NULL))
864                 goto out;
865
866         tid = (uintptr_t)curthread;
867
868         _lockmgr_assert(lk, KA_SLOCKED, file, line);
869         v = lk->lk_lock;
870         x = v & LK_ALL_WAITERS;
871         v &= LK_EXCLUSIVE_SPINNERS;
872
873         /*
874          * Try to switch from one shared lock to an exclusive one.
875          * We need to preserve waiters flags during the operation.
876          */
877         if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
878             tid | x)) {
879                 LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
880                     line);
881                 WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
882                     LK_TRYWIT(flags), file, line);
883                 TD_SLOCKS_DEC(curthread);
884                 goto out;
885         }
886
887         op = flags & LK_TYPE_MASK;
888
889         /*
890          * In LK_TRYUPGRADE mode, do not drop the lock,
891          * returning EBUSY instead.
892          */
893         if (op == LK_TRYUPGRADE) {
894                 LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
895                     __func__, lk);
896                 error = EBUSY;
897                 goto out;
898         }
899
900         /*
901          * We have been unable to succeed in upgrading, so just
902          * give up the shared lock.
903          */
904         wakeup_swapper |= wakeupshlk(lk, file, line);
905         error = lockmgr_xlock_hard(lk, flags, ilk, file, line, lwa);
906         flags &= ~LK_INTERLOCK;
907 out:
908         lockmgr_exit(flags, ilk, wakeup_swapper);
909         return (error);
910 }
911
912 int
913 lockmgr_lock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk,
914     const char *file, int line)
915 {
916         struct lock_class *class;
917         uintptr_t x, tid;
918         u_int op;
919         bool locked;
920
921         if (__predict_false(panicstr != NULL))
922                 return (0);
923
924         op = flags & LK_TYPE_MASK;
925         locked = false;
926         switch (op) {
927         case LK_SHARED:
928                 if (LK_CAN_WITNESS(flags))
929                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
930                             file, line, flags & LK_INTERLOCK ? ilk : NULL);
931                 if (__predict_false(lk->lock_object.lo_flags & LK_NOSHARE))
932                         break;
933                 if (lockmgr_slock_try(lk, &x, flags)) {
934                         lockmgr_note_shared_acquire(lk, 0, 0,
935                             file, line, flags);
936                         locked = true;
937                 } else {
938                         return (lockmgr_slock_hard(lk, flags, ilk, file, line,
939                             NULL));
940                 }
941                 break;
942         case LK_EXCLUSIVE:
943                 if (LK_CAN_WITNESS(flags))
944                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
945                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
946                             ilk : NULL);
947                 tid = (uintptr_t)curthread;
948                 if (lk->lk_lock == LK_UNLOCKED &&
949                     atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
950                         lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
951                             flags);
952                         locked = true;
953                 } else {
954                         return (lockmgr_xlock_hard(lk, flags, ilk, file, line,
955                             NULL));
956                 }
957                 break;
958         case LK_UPGRADE:
959         case LK_TRYUPGRADE:
960                 return (lockmgr_upgrade(lk, flags, ilk, file, line, NULL));
961         default:
962                 break;
963         }
964         if (__predict_true(locked)) {
965                 if (__predict_false(flags & LK_INTERLOCK)) {
966                         class = LOCK_CLASS(ilk);
967                         class->lc_unlock(ilk);
968                 }
969                 return (0);
970         } else {
971                 return (__lockmgr_args(lk, flags, ilk, LK_WMESG_DEFAULT,
972                     LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, file, line));
973         }
974 }
975
976 static __noinline int
977 lockmgr_sunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
978     const char *file, int line)
979
980 {
981         int wakeup_swapper = 0;
982
983         if (__predict_false(panicstr != NULL))
984                 goto out;
985
986         wakeup_swapper = wakeupshlk(lk, file, line);
987
988 out:
989         lockmgr_exit(flags, ilk, wakeup_swapper);
990         return (0);
991 }
992
993 static __noinline int
994 lockmgr_xunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
995     const char *file, int line)
996 {
997         uintptr_t tid, v;
998         int wakeup_swapper = 0;
999         u_int realexslp;
1000         int queue;
1001
1002         if (__predict_false(panicstr != NULL))
1003                 goto out;
1004
1005         tid = (uintptr_t)curthread;
1006
1007         /*
1008          * As first option, treact the lock as if it has not
1009          * any waiter.
1010          * Fix-up the tid var if the lock has been disowned.
1011          */
1012         if (LK_HOLDER(x) == LK_KERNPROC)
1013                 tid = LK_KERNPROC;
1014         else {
1015                 WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1016                 TD_LOCKS_DEC(curthread);
1017         }
1018         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
1019
1020         /*
1021          * The lock is held in exclusive mode.
1022          * If the lock is recursed also, then unrecurse it.
1023          */
1024         if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1025                 LOCK_LOG2(lk, "%s: %p unrecursing", __func__, lk);
1026                 lk->lk_recurse--;
1027                 goto out;
1028         }
1029         if (tid != LK_KERNPROC)
1030                 lock_profile_release_lock(&lk->lock_object);
1031
1032         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED))
1033                 goto out;
1034
1035         sleepq_lock(&lk->lock_object);
1036         x = lk->lk_lock;
1037         v = LK_UNLOCKED;
1038
1039         /*
1040          * If the lock has exclusive waiters, give them
1041          * preference in order to avoid deadlock with
1042          * shared runners up.
1043          * If interruptible sleeps left the exclusive queue
1044          * empty avoid a starvation for the threads sleeping
1045          * on the shared queue by giving them precedence
1046          * and cleaning up the exclusive waiters bit anyway.
1047          * Please note that lk_exslpfail count may be lying
1048          * about the real number of waiters with the
1049          * LK_SLEEPFAIL flag on because they may be used in
1050          * conjunction with interruptible sleeps so
1051          * lk_exslpfail might be considered an 'upper limit'
1052          * bound, including the edge cases.
1053          */
1054         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1055         realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE);
1056         if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1057                 if (lk->lk_exslpfail < realexslp) {
1058                         lk->lk_exslpfail = 0;
1059                         queue = SQ_EXCLUSIVE_QUEUE;
1060                         v |= (x & LK_SHARED_WAITERS);
1061                 } else {
1062                         lk->lk_exslpfail = 0;
1063                         LOCK_LOG2(lk,
1064                             "%s: %p has only LK_SLEEPFAIL sleepers",
1065                             __func__, lk);
1066                         LOCK_LOG2(lk,
1067                             "%s: %p waking up threads on the exclusive queue",
1068                             __func__, lk);
1069                         wakeup_swapper = sleepq_broadcast(&lk->lock_object,
1070                             SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1071                         queue = SQ_SHARED_QUEUE;
1072                 }
1073         } else {
1074
1075                 /*
1076                  * Exclusive waiters sleeping with LK_SLEEPFAIL
1077                  * on and using interruptible sleeps/timeout
1078                  * may have left spourious lk_exslpfail counts
1079                  * on, so clean it up anyway.
1080                  */
1081                 lk->lk_exslpfail = 0;
1082                 queue = SQ_SHARED_QUEUE;
1083         }
1084
1085         LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
1086             __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1087             "exclusive");
1088         atomic_store_rel_ptr(&lk->lk_lock, v);
1089         wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
1090         sleepq_release(&lk->lock_object);
1091
1092 out:
1093         lockmgr_exit(flags, ilk, wakeup_swapper);
1094         return (0);
1095 }
1096
1097 int
1098 lockmgr_unlock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk)
1099 {
1100         struct lock_class *class;
1101         uintptr_t x, tid;
1102         bool unlocked;
1103         const char *file;
1104         int line;
1105
1106         if (__predict_false(panicstr != NULL))
1107                 return (0);
1108
1109         file = __FILE__;
1110         line = __LINE__;
1111
1112         _lockmgr_assert(lk, KA_LOCKED, file, line);
1113         unlocked = false;
1114         x = lk->lk_lock;
1115         if (__predict_true(x & LK_SHARE) != 0) {
1116                 if (lockmgr_sunlock_try(lk, &x)) {
1117                         lockmgr_note_shared_release(lk, file, line);
1118                         unlocked = true;
1119                 } else {
1120                         return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1121                 }
1122         } else {
1123                 tid = (uintptr_t)curthread;
1124                 if (!lockmgr_recursed(lk) &&
1125                     atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) {
1126                         lockmgr_note_exclusive_release(lk, file, line);
1127                         unlocked = true;
1128                 } else {
1129                         return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1130                 }
1131         }
1132         if (__predict_false(flags & LK_INTERLOCK)) {
1133                 class = LOCK_CLASS(ilk);
1134                 class->lc_unlock(ilk);
1135         }
1136         return (0);
1137 }
1138
1139 int
1140 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
1141     const char *wmesg, int pri, int timo, const char *file, int line)
1142 {
1143         GIANT_DECLARE;
1144         struct lockmgr_wait lwa;
1145         struct lock_class *class;
1146         const char *iwmesg;
1147         uintptr_t tid, v, x;
1148         u_int op, realexslp;
1149         int error, ipri, itimo, queue, wakeup_swapper;
1150 #ifdef LOCK_PROFILING
1151         uint64_t waittime = 0;
1152         int contested = 0;
1153 #endif
1154
1155         if (panicstr != NULL)
1156                 return (0);
1157
1158         error = 0;
1159         tid = (uintptr_t)curthread;
1160         op = (flags & LK_TYPE_MASK);
1161         iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
1162         ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
1163         itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
1164
1165         lwa.iwmesg = iwmesg;
1166         lwa.ipri = ipri;
1167         lwa.itimo = itimo;
1168
1169         MPASS((flags & ~LK_TOTAL_MASK) == 0);
1170         KASSERT((op & (op - 1)) == 0,
1171             ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
1172         KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
1173             (op != LK_DOWNGRADE && op != LK_RELEASE),
1174             ("%s: Invalid flags in regard of the operation desired @ %s:%d",
1175             __func__, file, line));
1176         KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
1177             ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
1178             __func__, file, line));
1179         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
1180             ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
1181             lk->lock_object.lo_name, file, line));
1182
1183         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
1184
1185         if (lk->lock_object.lo_flags & LK_NOSHARE) {
1186                 switch (op) {
1187                 case LK_SHARED:
1188                         op = LK_EXCLUSIVE;
1189                         break;
1190                 case LK_UPGRADE:
1191                 case LK_TRYUPGRADE:
1192                 case LK_DOWNGRADE:
1193                         _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
1194                             file, line);
1195                         if (flags & LK_INTERLOCK)
1196                                 class->lc_unlock(ilk);
1197                         return (0);
1198                 }
1199         }
1200
1201         wakeup_swapper = 0;
1202         switch (op) {
1203         case LK_SHARED:
1204                 return (lockmgr_slock_hard(lk, flags, ilk, file, line, &lwa));
1205                 break;
1206         case LK_UPGRADE:
1207         case LK_TRYUPGRADE:
1208                 return (lockmgr_upgrade(lk, flags, ilk, file, line, &lwa));
1209                 break;
1210         case LK_EXCLUSIVE:
1211                 return (lockmgr_xlock_hard(lk, flags, ilk, file, line, &lwa));
1212                 break;
1213         case LK_DOWNGRADE:
1214                 _lockmgr_assert(lk, KA_XLOCKED, file, line);
1215                 LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
1216                 WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
1217
1218                 /*
1219                  * Panic if the lock is recursed.
1220                  */
1221                 if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1222                         if (flags & LK_INTERLOCK)
1223                                 class->lc_unlock(ilk);
1224                         panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
1225                             __func__, iwmesg, file, line);
1226                 }
1227                 TD_SLOCKS_INC(curthread);
1228
1229                 /*
1230                  * In order to preserve waiters flags, just spin.
1231                  */
1232                 for (;;) {
1233                         x = lk->lk_lock;
1234                         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1235                         x &= LK_ALL_WAITERS;
1236                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1237                             LK_SHARERS_LOCK(1) | x))
1238                                 break;
1239                         cpu_spinwait();
1240                 }
1241                 break;
1242         case LK_RELEASE:
1243                 _lockmgr_assert(lk, KA_LOCKED, file, line);
1244                 x = lk->lk_lock;
1245
1246                 if (__predict_true(x & LK_SHARE) != 0) {
1247                         return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1248                 } else {
1249                         return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1250                 }
1251                 break;
1252         case LK_DRAIN:
1253                 if (LK_CAN_WITNESS(flags))
1254                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1255                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1256                             ilk : NULL);
1257
1258                 /*
1259                  * Trying to drain a lock we already own will result in a
1260                  * deadlock.
1261                  */
1262                 if (lockmgr_xlocked(lk)) {
1263                         if (flags & LK_INTERLOCK)
1264                                 class->lc_unlock(ilk);
1265                         panic("%s: draining %s with the lock held @ %s:%d\n",
1266                             __func__, iwmesg, file, line);
1267                 }
1268
1269                 for (;;) {
1270                         if (lk->lk_lock == LK_UNLOCKED &&
1271                             atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
1272                                 break;
1273
1274 #ifdef HWPMC_HOOKS
1275                         PMC_SOFT_CALL( , , lock, failed);
1276 #endif
1277                         lock_profile_obtain_lock_failed(&lk->lock_object,
1278                             &contested, &waittime);
1279
1280                         /*
1281                          * If the lock is expected to not sleep just give up
1282                          * and return.
1283                          */
1284                         if (LK_TRYOP(flags)) {
1285                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
1286                                     __func__, lk);
1287                                 error = EBUSY;
1288                                 break;
1289                         }
1290
1291                         /*
1292                          * Acquire the sleepqueue chain lock because we
1293                          * probabilly will need to manipulate waiters flags.
1294                          */
1295                         sleepq_lock(&lk->lock_object);
1296                         x = lk->lk_lock;
1297
1298                         /*
1299                          * if the lock has been released while we spun on
1300                          * the sleepqueue chain lock just try again.
1301                          */
1302                         if (x == LK_UNLOCKED) {
1303                                 sleepq_release(&lk->lock_object);
1304                                 continue;
1305                         }
1306
1307                         v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1308                         if ((x & ~v) == LK_UNLOCKED) {
1309                                 v = (x & ~LK_EXCLUSIVE_SPINNERS);
1310
1311                                 /*
1312                                  * If interruptible sleeps left the exclusive
1313                                  * queue empty avoid a starvation for the
1314                                  * threads sleeping on the shared queue by
1315                                  * giving them precedence and cleaning up the
1316                                  * exclusive waiters bit anyway.
1317                                  * Please note that lk_exslpfail count may be
1318                                  * lying about the real number of waiters with
1319                                  * the LK_SLEEPFAIL flag on because they may
1320                                  * be used in conjunction with interruptible
1321                                  * sleeps so lk_exslpfail might be considered
1322                                  * an 'upper limit' bound, including the edge
1323                                  * cases.
1324                                  */
1325                                 if (v & LK_EXCLUSIVE_WAITERS) {
1326                                         queue = SQ_EXCLUSIVE_QUEUE;
1327                                         v &= ~LK_EXCLUSIVE_WAITERS;
1328                                 } else {
1329
1330                                         /*
1331                                          * Exclusive waiters sleeping with
1332                                          * LK_SLEEPFAIL on and using
1333                                          * interruptible sleeps/timeout may
1334                                          * have left spourious lk_exslpfail
1335                                          * counts on, so clean it up anyway.
1336                                          */
1337                                         MPASS(v & LK_SHARED_WAITERS);
1338                                         lk->lk_exslpfail = 0;
1339                                         queue = SQ_SHARED_QUEUE;
1340                                         v &= ~LK_SHARED_WAITERS;
1341                                 }
1342                                 if (queue == SQ_EXCLUSIVE_QUEUE) {
1343                                         realexslp =
1344                                             sleepq_sleepcnt(&lk->lock_object,
1345                                             SQ_EXCLUSIVE_QUEUE);
1346                                         if (lk->lk_exslpfail >= realexslp) {
1347                                                 lk->lk_exslpfail = 0;
1348                                                 queue = SQ_SHARED_QUEUE;
1349                                                 v &= ~LK_SHARED_WAITERS;
1350                                                 if (realexslp != 0) {
1351                                                         LOCK_LOG2(lk,
1352                                         "%s: %p has only LK_SLEEPFAIL sleepers",
1353                                                             __func__, lk);
1354                                                         LOCK_LOG2(lk,
1355                         "%s: %p waking up threads on the exclusive queue",
1356                                                             __func__, lk);
1357                                                         wakeup_swapper =
1358                                                             sleepq_broadcast(
1359                                                             &lk->lock_object,
1360                                                             SLEEPQ_LK, 0,
1361                                                             SQ_EXCLUSIVE_QUEUE);
1362                                                 }
1363                                         } else
1364                                                 lk->lk_exslpfail = 0;
1365                                 }
1366                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1367                                         sleepq_release(&lk->lock_object);
1368                                         continue;
1369                                 }
1370                                 LOCK_LOG3(lk,
1371                                 "%s: %p waking up all threads on the %s queue",
1372                                     __func__, lk, queue == SQ_SHARED_QUEUE ?
1373                                     "shared" : "exclusive");
1374                                 wakeup_swapper |= sleepq_broadcast(
1375                                     &lk->lock_object, SLEEPQ_LK, 0, queue);
1376
1377                                 /*
1378                                  * If shared waiters have been woken up we need
1379                                  * to wait for one of them to acquire the lock
1380                                  * before to set the exclusive waiters in
1381                                  * order to avoid a deadlock.
1382                                  */
1383                                 if (queue == SQ_SHARED_QUEUE) {
1384                                         for (v = lk->lk_lock;
1385                                             (v & LK_SHARE) && !LK_SHARERS(v);
1386                                             v = lk->lk_lock)
1387                                                 cpu_spinwait();
1388                                 }
1389                         }
1390
1391                         /*
1392                          * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1393                          * fail, loop back and retry.
1394                          */
1395                         if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1396                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1397                                     x | LK_EXCLUSIVE_WAITERS)) {
1398                                         sleepq_release(&lk->lock_object);
1399                                         continue;
1400                                 }
1401                                 LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1402                                     __func__, lk);
1403                         }
1404
1405                         /*
1406                          * As far as we have been unable to acquire the
1407                          * exclusive lock and the exclusive waiters flag
1408                          * is set, we will sleep.
1409                          */
1410                         if (flags & LK_INTERLOCK) {
1411                                 class->lc_unlock(ilk);
1412                                 flags &= ~LK_INTERLOCK;
1413                         }
1414                         GIANT_SAVE();
1415                         sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1416                             SQ_EXCLUSIVE_QUEUE);
1417                         sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1418                         GIANT_RESTORE();
1419                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1420                             __func__, lk);
1421                 }
1422
1423                 if (error == 0) {
1424                         lock_profile_obtain_lock_success(&lk->lock_object,
1425                             contested, waittime, file, line);
1426                         LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1427                             lk->lk_recurse, file, line);
1428                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1429                             LK_TRYWIT(flags), file, line);
1430                         TD_LOCKS_INC(curthread);
1431                         STACK_SAVE(lk);
1432                 }
1433                 break;
1434         default:
1435                 if (flags & LK_INTERLOCK)
1436                         class->lc_unlock(ilk);
1437                 panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1438         }
1439
1440         if (flags & LK_INTERLOCK)
1441                 class->lc_unlock(ilk);
1442         if (wakeup_swapper)
1443                 kick_proc0();
1444
1445         return (error);
1446 }
1447
1448 void
1449 _lockmgr_disown(struct lock *lk, const char *file, int line)
1450 {
1451         uintptr_t tid, x;
1452
1453         if (SCHEDULER_STOPPED())
1454                 return;
1455
1456         tid = (uintptr_t)curthread;
1457         _lockmgr_assert(lk, KA_XLOCKED, file, line);
1458
1459         /*
1460          * Panic if the lock is recursed.
1461          */
1462         if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1463                 panic("%s: disown a recursed lockmgr @ %s:%d\n",
1464                     __func__,  file, line);
1465
1466         /*
1467          * If the owner is already LK_KERNPROC just skip the whole operation.
1468          */
1469         if (LK_HOLDER(lk->lk_lock) != tid)
1470                 return;
1471         lock_profile_release_lock(&lk->lock_object);
1472         LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1473         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1474         TD_LOCKS_DEC(curthread);
1475         STACK_SAVE(lk);
1476
1477         /*
1478          * In order to preserve waiters flags, just spin.
1479          */
1480         for (;;) {
1481                 x = lk->lk_lock;
1482                 MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1483                 x &= LK_ALL_WAITERS;
1484                 if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1485                     LK_KERNPROC | x))
1486                         return;
1487                 cpu_spinwait();
1488         }
1489 }
1490
1491 void
1492 lockmgr_printinfo(const struct lock *lk)
1493 {
1494         struct thread *td;
1495         uintptr_t x;
1496
1497         if (lk->lk_lock == LK_UNLOCKED)
1498                 printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1499         else if (lk->lk_lock & LK_SHARE)
1500                 printf("lock type %s: SHARED (count %ju)\n",
1501                     lk->lock_object.lo_name,
1502                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1503         else {
1504                 td = lockmgr_xholder(lk);
1505                 if (td == (struct thread *)LK_KERNPROC)
1506                         printf("lock type %s: EXCL by KERNPROC\n",
1507                             lk->lock_object.lo_name);
1508                 else
1509                         printf("lock type %s: EXCL by thread %p "
1510                             "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1511                             td, td->td_proc->p_pid, td->td_proc->p_comm,
1512                             td->td_tid);
1513         }
1514
1515         x = lk->lk_lock;
1516         if (x & LK_EXCLUSIVE_WAITERS)
1517                 printf(" with exclusive waiters pending\n");
1518         if (x & LK_SHARED_WAITERS)
1519                 printf(" with shared waiters pending\n");
1520         if (x & LK_EXCLUSIVE_SPINNERS)
1521                 printf(" with exclusive spinners pending\n");
1522
1523         STACK_PRINT(lk);
1524 }
1525
1526 int
1527 lockstatus(const struct lock *lk)
1528 {
1529         uintptr_t v, x;
1530         int ret;
1531
1532         ret = LK_SHARED;
1533         x = lk->lk_lock;
1534         v = LK_HOLDER(x);
1535
1536         if ((x & LK_SHARE) == 0) {
1537                 if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1538                         ret = LK_EXCLUSIVE;
1539                 else
1540                         ret = LK_EXCLOTHER;
1541         } else if (x == LK_UNLOCKED)
1542                 ret = 0;
1543
1544         return (ret);
1545 }
1546
1547 #ifdef INVARIANT_SUPPORT
1548
1549 FEATURE(invariant_support,
1550     "Support for modules compiled with INVARIANTS option");
1551
1552 #ifndef INVARIANTS
1553 #undef  _lockmgr_assert
1554 #endif
1555
1556 void
1557 _lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1558 {
1559         int slocked = 0;
1560
1561         if (panicstr != NULL)
1562                 return;
1563         switch (what) {
1564         case KA_SLOCKED:
1565         case KA_SLOCKED | KA_NOTRECURSED:
1566         case KA_SLOCKED | KA_RECURSED:
1567                 slocked = 1;
1568         case KA_LOCKED:
1569         case KA_LOCKED | KA_NOTRECURSED:
1570         case KA_LOCKED | KA_RECURSED:
1571 #ifdef WITNESS
1572
1573                 /*
1574                  * We cannot trust WITNESS if the lock is held in exclusive
1575                  * mode and a call to lockmgr_disown() happened.
1576                  * Workaround this skipping the check if the lock is held in
1577                  * exclusive mode even for the KA_LOCKED case.
1578                  */
1579                 if (slocked || (lk->lk_lock & LK_SHARE)) {
1580                         witness_assert(&lk->lock_object, what, file, line);
1581                         break;
1582                 }
1583 #endif
1584                 if (lk->lk_lock == LK_UNLOCKED ||
1585                     ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1586                     (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1587                         panic("Lock %s not %slocked @ %s:%d\n",
1588                             lk->lock_object.lo_name, slocked ? "share" : "",
1589                             file, line);
1590
1591                 if ((lk->lk_lock & LK_SHARE) == 0) {
1592                         if (lockmgr_recursed(lk)) {
1593                                 if (what & KA_NOTRECURSED)
1594                                         panic("Lock %s recursed @ %s:%d\n",
1595                                             lk->lock_object.lo_name, file,
1596                                             line);
1597                         } else if (what & KA_RECURSED)
1598                                 panic("Lock %s not recursed @ %s:%d\n",
1599                                     lk->lock_object.lo_name, file, line);
1600                 }
1601                 break;
1602         case KA_XLOCKED:
1603         case KA_XLOCKED | KA_NOTRECURSED:
1604         case KA_XLOCKED | KA_RECURSED:
1605                 if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1606                         panic("Lock %s not exclusively locked @ %s:%d\n",
1607                             lk->lock_object.lo_name, file, line);
1608                 if (lockmgr_recursed(lk)) {
1609                         if (what & KA_NOTRECURSED)
1610                                 panic("Lock %s recursed @ %s:%d\n",
1611                                     lk->lock_object.lo_name, file, line);
1612                 } else if (what & KA_RECURSED)
1613                         panic("Lock %s not recursed @ %s:%d\n",
1614                             lk->lock_object.lo_name, file, line);
1615                 break;
1616         case KA_UNLOCKED:
1617                 if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1618                         panic("Lock %s exclusively locked @ %s:%d\n",
1619                             lk->lock_object.lo_name, file, line);
1620                 break;
1621         default:
1622                 panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1623                     line);
1624         }
1625 }
1626 #endif
1627
1628 #ifdef DDB
1629 int
1630 lockmgr_chain(struct thread *td, struct thread **ownerp)
1631 {
1632         struct lock *lk;
1633
1634         lk = td->td_wchan;
1635
1636         if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1637                 return (0);
1638         db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1639         if (lk->lk_lock & LK_SHARE)
1640                 db_printf("SHARED (count %ju)\n",
1641                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1642         else
1643                 db_printf("EXCL\n");
1644         *ownerp = lockmgr_xholder(lk);
1645
1646         return (1);
1647 }
1648
1649 static void
1650 db_show_lockmgr(const struct lock_object *lock)
1651 {
1652         struct thread *td;
1653         const struct lock *lk;
1654
1655         lk = (const struct lock *)lock;
1656
1657         db_printf(" state: ");
1658         if (lk->lk_lock == LK_UNLOCKED)
1659                 db_printf("UNLOCKED\n");
1660         else if (lk->lk_lock & LK_SHARE)
1661                 db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1662         else {
1663                 td = lockmgr_xholder(lk);
1664                 if (td == (struct thread *)LK_KERNPROC)
1665                         db_printf("XLOCK: LK_KERNPROC\n");
1666                 else
1667                         db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1668                             td->td_tid, td->td_proc->p_pid,
1669                             td->td_proc->p_comm);
1670                 if (lockmgr_recursed(lk))
1671                         db_printf(" recursed: %d\n", lk->lk_recurse);
1672         }
1673         db_printf(" waiters: ");
1674         switch (lk->lk_lock & LK_ALL_WAITERS) {
1675         case LK_SHARED_WAITERS:
1676                 db_printf("shared\n");
1677                 break;
1678         case LK_EXCLUSIVE_WAITERS:
1679                 db_printf("exclusive\n");
1680                 break;
1681         case LK_ALL_WAITERS:
1682                 db_printf("shared and exclusive\n");
1683                 break;
1684         default:
1685                 db_printf("none\n");
1686         }
1687         db_printf(" spinners: ");
1688         if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1689                 db_printf("exclusive\n");
1690         else
1691                 db_printf("none\n");
1692 }
1693 #endif