]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_lock.c
MFV r333789: libpcap 1.9.0 (pre-release)
[FreeBSD/FreeBSD.git] / sys / kern / kern_lock.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28  * DAMAGE.
29  */
30
31 #include "opt_ddb.h"
32 #include "opt_hwpmc_hooks.h"
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include <sys/param.h>
38 #include <sys/kdb.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/lock_profile.h>
42 #include <sys/lockmgr.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sleepqueue.h>
46 #ifdef DEBUG_LOCKS
47 #include <sys/stack.h>
48 #endif
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51
52 #include <machine/cpu.h>
53
54 #ifdef DDB
55 #include <ddb/ddb.h>
56 #endif
57
58 #ifdef HWPMC_HOOKS
59 #include <sys/pmckern.h>
60 PMC_SOFT_DECLARE( , , lock, failed);
61 #endif
62
63 CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
64     (LK_ADAPTIVE | LK_NOSHARE));
65 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
66     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
67
68 #define SQ_EXCLUSIVE_QUEUE      0
69 #define SQ_SHARED_QUEUE         1
70
71 #ifndef INVARIANTS
72 #define _lockmgr_assert(lk, what, file, line)
73 #endif
74
75 #define TD_SLOCKS_INC(td)       ((td)->td_lk_slocks++)
76 #define TD_SLOCKS_DEC(td)       ((td)->td_lk_slocks--)
77
78 #ifndef DEBUG_LOCKS
79 #define STACK_PRINT(lk)
80 #define STACK_SAVE(lk)
81 #define STACK_ZERO(lk)
82 #else
83 #define STACK_PRINT(lk) stack_print_ddb(&(lk)->lk_stack)
84 #define STACK_SAVE(lk)  stack_save(&(lk)->lk_stack)
85 #define STACK_ZERO(lk)  stack_zero(&(lk)->lk_stack)
86 #endif
87
88 #define LOCK_LOG2(lk, string, arg1, arg2)                               \
89         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
90                 CTR2(KTR_LOCK, (string), (arg1), (arg2))
91 #define LOCK_LOG3(lk, string, arg1, arg2, arg3)                         \
92         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
93                 CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
94
95 #define GIANT_DECLARE                                                   \
96         int _i = 0;                                                     \
97         WITNESS_SAVE_DECL(Giant)
98 #define GIANT_RESTORE() do {                                            \
99         if (_i > 0) {                                                   \
100                 while (_i--)                                            \
101                         mtx_lock(&Giant);                               \
102                 WITNESS_RESTORE(&Giant.lock_object, Giant);             \
103         }                                                               \
104 } while (0)
105 #define GIANT_SAVE() do {                                               \
106         if (mtx_owned(&Giant)) {                                        \
107                 WITNESS_SAVE(&Giant.lock_object, Giant);                \
108                 while (mtx_owned(&Giant)) {                             \
109                         _i++;                                           \
110                         mtx_unlock(&Giant);                             \
111                 }                                                       \
112         }                                                               \
113 } while (0)
114
115 #define LK_CAN_SHARE(x, flags)                                          \
116         (((x) & LK_SHARE) &&                                            \
117         (((x) & (LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) == 0 || \
118         (curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||  \
119         (curthread->td_pflags & TDP_DEADLKTREAT)))
120 #define LK_TRYOP(x)                                                     \
121         ((x) & LK_NOWAIT)
122
123 #define LK_CAN_WITNESS(x)                                               \
124         (((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
125 #define LK_TRYWIT(x)                                                    \
126         (LK_TRYOP(x) ? LOP_TRYLOCK : 0)
127
128 #define LK_CAN_ADAPT(lk, f)                                             \
129         (((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&             \
130         ((f) & LK_SLEEPFAIL) == 0)
131
132 #define lockmgr_disowned(lk)                                            \
133         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
134
135 #define lockmgr_xlocked_v(v)                                            \
136         (((v) & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
137
138 #define lockmgr_xlocked(lk) lockmgr_xlocked_v((lk)->lk_lock)
139
140 static void     assert_lockmgr(const struct lock_object *lock, int how);
141 #ifdef DDB
142 static void     db_show_lockmgr(const struct lock_object *lock);
143 #endif
144 static void     lock_lockmgr(struct lock_object *lock, uintptr_t how);
145 #ifdef KDTRACE_HOOKS
146 static int      owner_lockmgr(const struct lock_object *lock,
147                     struct thread **owner);
148 #endif
149 static uintptr_t unlock_lockmgr(struct lock_object *lock);
150
151 struct lock_class lock_class_lockmgr = {
152         .lc_name = "lockmgr",
153         .lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
154         .lc_assert = assert_lockmgr,
155 #ifdef DDB
156         .lc_ddb_show = db_show_lockmgr,
157 #endif
158         .lc_lock = lock_lockmgr,
159         .lc_unlock = unlock_lockmgr,
160 #ifdef KDTRACE_HOOKS
161         .lc_owner = owner_lockmgr,
162 #endif
163 };
164
165 struct lockmgr_wait {
166         const char *iwmesg;
167         int ipri;
168         int itimo;
169 };
170
171 static bool __always_inline lockmgr_slock_try(struct lock *lk, uintptr_t *xp,
172     int flags);
173 static bool __always_inline lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp);
174
175 static void
176 lockmgr_exit(u_int flags, struct lock_object *ilk, int wakeup_swapper)
177 {
178         struct lock_class *class;
179
180         if (flags & LK_INTERLOCK) {
181                 class = LOCK_CLASS(ilk);
182                 class->lc_unlock(ilk);
183         }
184
185         if (__predict_false(wakeup_swapper))
186                 kick_proc0();
187 }
188
189 static void
190 lockmgr_note_shared_acquire(struct lock *lk, int contested,
191     uint64_t waittime, const char *file, int line, int flags)
192 {
193
194         lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime,
195             file, line);
196         LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line);
197         WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line);
198         TD_LOCKS_INC(curthread);
199         TD_SLOCKS_INC(curthread);
200         STACK_SAVE(lk);
201 }
202
203 static void
204 lockmgr_note_shared_release(struct lock *lk, const char *file, int line)
205 {
206
207         lock_profile_release_lock(&lk->lock_object);
208         WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
209         LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
210         TD_LOCKS_DEC(curthread);
211         TD_SLOCKS_DEC(curthread);
212 }
213
214 static void
215 lockmgr_note_exclusive_acquire(struct lock *lk, int contested,
216     uint64_t waittime, const char *file, int line, int flags)
217 {
218
219         lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime,
220             file, line);
221         LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
222         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file,
223             line);
224         TD_LOCKS_INC(curthread);
225         STACK_SAVE(lk);
226 }
227
228 static void
229 lockmgr_note_exclusive_release(struct lock *lk, const char *file, int line)
230 {
231
232         lock_profile_release_lock(&lk->lock_object);
233         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file,
234             line);
235         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
236         TD_LOCKS_DEC(curthread);
237 }
238
239 static __inline struct thread *
240 lockmgr_xholder(const struct lock *lk)
241 {
242         uintptr_t x;
243
244         x = lk->lk_lock;
245         return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
246 }
247
248 /*
249  * It assumes sleepq_lock held and returns with this one unheld.
250  * It also assumes the generic interlock is sane and previously checked.
251  * If LK_INTERLOCK is specified the interlock is not reacquired after the
252  * sleep.
253  */
254 static __inline int
255 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
256     const char *wmesg, int pri, int timo, int queue)
257 {
258         GIANT_DECLARE;
259         struct lock_class *class;
260         int catch, error;
261
262         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
263         catch = pri & PCATCH;
264         pri &= PRIMASK;
265         error = 0;
266
267         LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
268             (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
269
270         if (flags & LK_INTERLOCK)
271                 class->lc_unlock(ilk);
272         if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
273                 lk->lk_exslpfail++;
274         GIANT_SAVE();
275         sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
276             SLEEPQ_INTERRUPTIBLE : 0), queue);
277         if ((flags & LK_TIMELOCK) && timo)
278                 sleepq_set_timeout(&lk->lock_object, timo);
279
280         /*
281          * Decisional switch for real sleeping.
282          */
283         if ((flags & LK_TIMELOCK) && timo && catch)
284                 error = sleepq_timedwait_sig(&lk->lock_object, pri);
285         else if ((flags & LK_TIMELOCK) && timo)
286                 error = sleepq_timedwait(&lk->lock_object, pri);
287         else if (catch)
288                 error = sleepq_wait_sig(&lk->lock_object, pri);
289         else
290                 sleepq_wait(&lk->lock_object, pri);
291         GIANT_RESTORE();
292         if ((flags & LK_SLEEPFAIL) && error == 0)
293                 error = ENOLCK;
294
295         return (error);
296 }
297
298 static __inline int
299 wakeupshlk(struct lock *lk, const char *file, int line)
300 {
301         uintptr_t v, x, orig_x;
302         u_int realexslp;
303         int queue, wakeup_swapper;
304
305         wakeup_swapper = 0;
306         for (;;) {
307                 x = lk->lk_lock;
308                 if (lockmgr_sunlock_try(lk, &x))
309                         break;
310
311                 /*
312                  * We should have a sharer with waiters, so enter the hard
313                  * path in order to handle wakeups correctly.
314                  */
315                 sleepq_lock(&lk->lock_object);
316                 orig_x = lk->lk_lock;
317 retry_sleepq:
318                 x = orig_x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
319                 v = LK_UNLOCKED;
320
321                 /*
322                  * If the lock has exclusive waiters, give them preference in
323                  * order to avoid deadlock with shared runners up.
324                  * If interruptible sleeps left the exclusive queue empty
325                  * avoid a starvation for the threads sleeping on the shared
326                  * queue by giving them precedence and cleaning up the
327                  * exclusive waiters bit anyway.
328                  * Please note that lk_exslpfail count may be lying about
329                  * the real number of waiters with the LK_SLEEPFAIL flag on
330                  * because they may be used in conjunction with interruptible
331                  * sleeps so lk_exslpfail might be considered an 'upper limit'
332                  * bound, including the edge cases.
333                  */
334                 realexslp = sleepq_sleepcnt(&lk->lock_object,
335                     SQ_EXCLUSIVE_QUEUE);
336                 if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
337                         if (lk->lk_exslpfail < realexslp) {
338                                 lk->lk_exslpfail = 0;
339                                 queue = SQ_EXCLUSIVE_QUEUE;
340                                 v |= (x & LK_SHARED_WAITERS);
341                         } else {
342                                 lk->lk_exslpfail = 0;
343                                 LOCK_LOG2(lk,
344                                     "%s: %p has only LK_SLEEPFAIL sleepers",
345                                     __func__, lk);
346                                 LOCK_LOG2(lk,
347                             "%s: %p waking up threads on the exclusive queue",
348                                     __func__, lk);
349                                 wakeup_swapper =
350                                     sleepq_broadcast(&lk->lock_object,
351                                     SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
352                                 queue = SQ_SHARED_QUEUE;
353                         }
354                                 
355                 } else {
356
357                         /*
358                          * Exclusive waiters sleeping with LK_SLEEPFAIL on
359                          * and using interruptible sleeps/timeout may have
360                          * left spourious lk_exslpfail counts on, so clean
361                          * it up anyway.
362                          */
363                         lk->lk_exslpfail = 0;
364                         queue = SQ_SHARED_QUEUE;
365                 }
366
367                 if (lockmgr_sunlock_try(lk, &orig_x)) {
368                         sleepq_release(&lk->lock_object);
369                         break;
370                 }
371
372                 x |= LK_SHARERS_LOCK(1);
373                 if (!atomic_fcmpset_rel_ptr(&lk->lk_lock, &x, v)) {
374                         orig_x = x;
375                         goto retry_sleepq;
376                 }
377                 LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
378                     __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
379                     "exclusive");
380                 wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
381                     0, queue);
382                 sleepq_release(&lk->lock_object);
383                 break;
384         }
385
386         lockmgr_note_shared_release(lk, file, line);
387         return (wakeup_swapper);
388 }
389
390 static void
391 assert_lockmgr(const struct lock_object *lock, int what)
392 {
393
394         panic("lockmgr locks do not support assertions");
395 }
396
397 static void
398 lock_lockmgr(struct lock_object *lock, uintptr_t how)
399 {
400
401         panic("lockmgr locks do not support sleep interlocking");
402 }
403
404 static uintptr_t
405 unlock_lockmgr(struct lock_object *lock)
406 {
407
408         panic("lockmgr locks do not support sleep interlocking");
409 }
410
411 #ifdef KDTRACE_HOOKS
412 static int
413 owner_lockmgr(const struct lock_object *lock, struct thread **owner)
414 {
415
416         panic("lockmgr locks do not support owner inquiring");
417 }
418 #endif
419
420 void
421 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
422 {
423         int iflags;
424
425         MPASS((flags & ~LK_INIT_MASK) == 0);
426         ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
427             ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
428             &lk->lk_lock));
429
430         iflags = LO_SLEEPABLE | LO_UPGRADABLE;
431         if (flags & LK_CANRECURSE)
432                 iflags |= LO_RECURSABLE;
433         if ((flags & LK_NODUP) == 0)
434                 iflags |= LO_DUPOK;
435         if (flags & LK_NOPROFILE)
436                 iflags |= LO_NOPROFILE;
437         if ((flags & LK_NOWITNESS) == 0)
438                 iflags |= LO_WITNESS;
439         if (flags & LK_QUIET)
440                 iflags |= LO_QUIET;
441         if (flags & LK_IS_VNODE)
442                 iflags |= LO_IS_VNODE;
443         iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
444
445         lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
446         lk->lk_lock = LK_UNLOCKED;
447         lk->lk_recurse = 0;
448         lk->lk_exslpfail = 0;
449         lk->lk_timo = timo;
450         lk->lk_pri = pri;
451         STACK_ZERO(lk);
452 }
453
454 /*
455  * XXX: Gross hacks to manipulate external lock flags after
456  * initialization.  Used for certain vnode and buf locks.
457  */
458 void
459 lockallowshare(struct lock *lk)
460 {
461
462         lockmgr_assert(lk, KA_XLOCKED);
463         lk->lock_object.lo_flags &= ~LK_NOSHARE;
464 }
465
466 void
467 lockdisableshare(struct lock *lk)
468 {
469
470         lockmgr_assert(lk, KA_XLOCKED);
471         lk->lock_object.lo_flags |= LK_NOSHARE;
472 }
473
474 void
475 lockallowrecurse(struct lock *lk)
476 {
477
478         lockmgr_assert(lk, KA_XLOCKED);
479         lk->lock_object.lo_flags |= LO_RECURSABLE;
480 }
481
482 void
483 lockdisablerecurse(struct lock *lk)
484 {
485
486         lockmgr_assert(lk, KA_XLOCKED);
487         lk->lock_object.lo_flags &= ~LO_RECURSABLE;
488 }
489
490 void
491 lockdestroy(struct lock *lk)
492 {
493
494         KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
495         KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
496         KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
497         lock_destroy(&lk->lock_object);
498 }
499
500 static bool __always_inline
501 lockmgr_slock_try(struct lock *lk, uintptr_t *xp, int flags)
502 {
503
504         /*
505          * If no other thread has an exclusive lock, or
506          * no exclusive waiter is present, bump the count of
507          * sharers.  Since we have to preserve the state of
508          * waiters, if we fail to acquire the shared lock
509          * loop back and retry.
510          */
511         *xp = lk->lk_lock;
512         while (LK_CAN_SHARE(*xp, flags)) {
513                 if (atomic_fcmpset_acq_ptr(&lk->lk_lock, xp,
514                     *xp + LK_ONE_SHARER)) {
515                         return (true);
516                 }
517         }
518         return (false);
519 }
520
521 static bool __always_inline
522 lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp)
523 {
524
525         for (;;) {
526                 /*
527                  * If there is more than one shared lock held, just drop one
528                  * and return.
529                  */
530                 if (LK_SHARERS(*xp) > 1) {
531                         if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
532                             *xp - LK_ONE_SHARER))
533                                 return (true);
534                         continue;
535                 }
536
537                 /*
538                  * If there are not waiters on the exclusive queue, drop the
539                  * lock quickly.
540                  */
541                 if ((*xp & LK_ALL_WAITERS) == 0) {
542                         MPASS((*xp & ~LK_EXCLUSIVE_SPINNERS) ==
543                             LK_SHARERS_LOCK(1));
544                         if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
545                             LK_UNLOCKED))
546                                 return (true);
547                         continue;
548                 }
549                 break;
550         }
551         return (false);
552 }
553
554 static __noinline int
555 lockmgr_slock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
556     const char *file, int line, struct lockmgr_wait *lwa)
557 {
558         uintptr_t tid, x;
559         int error = 0;
560         const char *iwmesg;
561         int ipri, itimo;
562
563 #ifdef LOCK_PROFILING
564         uint64_t waittime = 0;
565         int contested = 0;
566 #endif
567
568         if (__predict_false(panicstr != NULL))
569                 goto out;
570
571         tid = (uintptr_t)curthread;
572
573         if (LK_CAN_WITNESS(flags))
574                 WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
575                     file, line, flags & LK_INTERLOCK ? ilk : NULL);
576         for (;;) {
577                 if (lockmgr_slock_try(lk, &x, flags))
578                         break;
579 #ifdef HWPMC_HOOKS
580                 PMC_SOFT_CALL( , , lock, failed);
581 #endif
582                 lock_profile_obtain_lock_failed(&lk->lock_object,
583                     &contested, &waittime);
584
585                 /*
586                  * If the lock is already held by curthread in
587                  * exclusive way avoid a deadlock.
588                  */
589                 if (LK_HOLDER(x) == tid) {
590                         LOCK_LOG2(lk,
591                             "%s: %p already held in exclusive mode",
592                             __func__, lk);
593                         error = EDEADLK;
594                         break;
595                 }
596
597                 /*
598                  * If the lock is expected to not sleep just give up
599                  * and return.
600                  */
601                 if (LK_TRYOP(flags)) {
602                         LOCK_LOG2(lk, "%s: %p fails the try operation",
603                             __func__, lk);
604                         error = EBUSY;
605                         break;
606                 }
607
608                 /*
609                  * Acquire the sleepqueue chain lock because we
610                  * probabilly will need to manipulate waiters flags.
611                  */
612                 sleepq_lock(&lk->lock_object);
613                 x = lk->lk_lock;
614 retry_sleepq:
615
616                 /*
617                  * if the lock can be acquired in shared mode, try
618                  * again.
619                  */
620                 if (LK_CAN_SHARE(x, flags)) {
621                         sleepq_release(&lk->lock_object);
622                         continue;
623                 }
624
625                 /*
626                  * Try to set the LK_SHARED_WAITERS flag.  If we fail,
627                  * loop back and retry.
628                  */
629                 if ((x & LK_SHARED_WAITERS) == 0) {
630                         if (!atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
631                             x | LK_SHARED_WAITERS)) {
632                                 goto retry_sleepq;
633                         }
634                         LOCK_LOG2(lk, "%s: %p set shared waiters flag",
635                             __func__, lk);
636                 }
637
638                 if (lwa == NULL) {
639                         iwmesg = lk->lock_object.lo_name;
640                         ipri = lk->lk_pri;
641                         itimo = lk->lk_timo;
642                 } else {
643                         iwmesg = lwa->iwmesg;
644                         ipri = lwa->ipri;
645                         itimo = lwa->itimo;
646                 }
647
648                 /*
649                  * As far as we have been unable to acquire the
650                  * shared lock and the shared waiters flag is set,
651                  * we will sleep.
652                  */
653                 error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
654                     SQ_SHARED_QUEUE);
655                 flags &= ~LK_INTERLOCK;
656                 if (error) {
657                         LOCK_LOG3(lk,
658                             "%s: interrupted sleep for %p with %d",
659                             __func__, lk, error);
660                         break;
661                 }
662                 LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
663                     __func__, lk);
664         }
665         if (error == 0) {
666 #ifdef LOCK_PROFILING
667                 lockmgr_note_shared_acquire(lk, contested, waittime,
668                     file, line, flags);
669 #else
670                 lockmgr_note_shared_acquire(lk, 0, 0, file, line,
671                     flags);
672 #endif
673         }
674
675 out:
676         lockmgr_exit(flags, ilk, 0);
677         return (error);
678 }
679
680 static __noinline int
681 lockmgr_xlock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
682     const char *file, int line, struct lockmgr_wait *lwa)
683 {
684         struct lock_class *class;
685         uintptr_t tid, x, v;
686         int error = 0;
687         const char *iwmesg;
688         int ipri, itimo;
689
690 #ifdef LOCK_PROFILING
691         uint64_t waittime = 0;
692         int contested = 0;
693 #endif
694
695         if (__predict_false(panicstr != NULL))
696                 goto out;
697
698         tid = (uintptr_t)curthread;
699
700         if (LK_CAN_WITNESS(flags))
701                 WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
702                     LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
703                     ilk : NULL);
704
705         /*
706          * If curthread already holds the lock and this one is
707          * allowed to recurse, simply recurse on it.
708          */
709         if (lockmgr_xlocked(lk)) {
710                 if ((flags & LK_CANRECURSE) == 0 &&
711                     (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
712                         /*
713                          * If the lock is expected to not panic just
714                          * give up and return.
715                          */
716                         if (LK_TRYOP(flags)) {
717                                 LOCK_LOG2(lk,
718                                     "%s: %p fails the try operation",
719                                     __func__, lk);
720                                 error = EBUSY;
721                                 goto out;
722                         }
723                         if (flags & LK_INTERLOCK) {
724                                 class = LOCK_CLASS(ilk);
725                                 class->lc_unlock(ilk);
726                         }
727                         panic("%s: recursing on non recursive lockmgr %p "
728                             "@ %s:%d\n", __func__, lk, file, line);
729                 }
730                 lk->lk_recurse++;
731                 LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
732                 LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
733                     lk->lk_recurse, file, line);
734                 WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
735                     LK_TRYWIT(flags), file, line);
736                 TD_LOCKS_INC(curthread);
737                 goto out;
738         }
739
740         for (;;) {
741                 if (lk->lk_lock == LK_UNLOCKED &&
742                     atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
743                         break;
744 #ifdef HWPMC_HOOKS
745                 PMC_SOFT_CALL( , , lock, failed);
746 #endif
747                 lock_profile_obtain_lock_failed(&lk->lock_object,
748                     &contested, &waittime);
749
750                 /*
751                  * If the lock is expected to not sleep just give up
752                  * and return.
753                  */
754                 if (LK_TRYOP(flags)) {
755                         LOCK_LOG2(lk, "%s: %p fails the try operation",
756                             __func__, lk);
757                         error = EBUSY;
758                         break;
759                 }
760
761                 /*
762                  * Acquire the sleepqueue chain lock because we
763                  * probabilly will need to manipulate waiters flags.
764                  */
765                 sleepq_lock(&lk->lock_object);
766                 x = lk->lk_lock;
767 retry_sleepq:
768
769                 /*
770                  * if the lock has been released while we spun on
771                  * the sleepqueue chain lock just try again.
772                  */
773                 if (x == LK_UNLOCKED) {
774                         sleepq_release(&lk->lock_object);
775                         continue;
776                 }
777
778                 /*
779                  * The lock can be in the state where there is a
780                  * pending queue of waiters, but still no owner.
781                  * This happens when the lock is contested and an
782                  * owner is going to claim the lock.
783                  * If curthread is the one successfully acquiring it
784                  * claim lock ownership and return, preserving waiters
785                  * flags.
786                  */
787                 v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
788                 if ((x & ~v) == LK_UNLOCKED) {
789                         v &= ~LK_EXCLUSIVE_SPINNERS;
790                         if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
791                             tid | v)) {
792                                 sleepq_release(&lk->lock_object);
793                                 LOCK_LOG2(lk,
794                                     "%s: %p claimed by a new writer",
795                                     __func__, lk);
796                                 break;
797                         }
798                         goto retry_sleepq;
799                 }
800
801                 /*
802                  * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
803                  * fail, loop back and retry.
804                  */
805                 if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
806                         if (!atomic_fcmpset_ptr(&lk->lk_lock, &x,
807                             x | LK_EXCLUSIVE_WAITERS)) {
808                                 goto retry_sleepq;
809                         }
810                         LOCK_LOG2(lk, "%s: %p set excl waiters flag",
811                             __func__, lk);
812                 }
813
814                 if (lwa == NULL) {
815                         iwmesg = lk->lock_object.lo_name;
816                         ipri = lk->lk_pri;
817                         itimo = lk->lk_timo;
818                 } else {
819                         iwmesg = lwa->iwmesg;
820                         ipri = lwa->ipri;
821                         itimo = lwa->itimo;
822                 }
823
824                 /*
825                  * As far as we have been unable to acquire the
826                  * exclusive lock and the exclusive waiters flag
827                  * is set, we will sleep.
828                  */
829                 error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
830                     SQ_EXCLUSIVE_QUEUE);
831                 flags &= ~LK_INTERLOCK;
832                 if (error) {
833                         LOCK_LOG3(lk,
834                             "%s: interrupted sleep for %p with %d",
835                             __func__, lk, error);
836                         break;
837                 }
838                 LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
839                     __func__, lk);
840         }
841         if (error == 0) {
842 #ifdef LOCK_PROFILING
843                 lockmgr_note_exclusive_acquire(lk, contested, waittime,
844                     file, line, flags);
845 #else
846                 lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
847                     flags);
848 #endif
849         }
850
851 out:
852         lockmgr_exit(flags, ilk, 0);
853         return (error);
854 }
855
856 static __noinline int
857 lockmgr_upgrade(struct lock *lk, u_int flags, struct lock_object *ilk,
858     const char *file, int line, struct lockmgr_wait *lwa)
859 {
860         uintptr_t tid, x, v;
861         int error = 0;
862         int wakeup_swapper = 0;
863         int op;
864
865         if (__predict_false(panicstr != NULL))
866                 goto out;
867
868         tid = (uintptr_t)curthread;
869
870         _lockmgr_assert(lk, KA_SLOCKED, file, line);
871         v = lk->lk_lock;
872         x = v & LK_ALL_WAITERS;
873         v &= LK_EXCLUSIVE_SPINNERS;
874
875         /*
876          * Try to switch from one shared lock to an exclusive one.
877          * We need to preserve waiters flags during the operation.
878          */
879         if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
880             tid | x)) {
881                 LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
882                     line);
883                 WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
884                     LK_TRYWIT(flags), file, line);
885                 TD_SLOCKS_DEC(curthread);
886                 goto out;
887         }
888
889         op = flags & LK_TYPE_MASK;
890
891         /*
892          * In LK_TRYUPGRADE mode, do not drop the lock,
893          * returning EBUSY instead.
894          */
895         if (op == LK_TRYUPGRADE) {
896                 LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
897                     __func__, lk);
898                 error = EBUSY;
899                 goto out;
900         }
901
902         /*
903          * We have been unable to succeed in upgrading, so just
904          * give up the shared lock.
905          */
906         wakeup_swapper |= wakeupshlk(lk, file, line);
907         error = lockmgr_xlock_hard(lk, flags, ilk, file, line, lwa);
908         flags &= ~LK_INTERLOCK;
909 out:
910         lockmgr_exit(flags, ilk, wakeup_swapper);
911         return (error);
912 }
913
914 int
915 lockmgr_lock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk,
916     const char *file, int line)
917 {
918         struct lock_class *class;
919         uintptr_t x, tid;
920         u_int op;
921         bool locked;
922
923         if (__predict_false(panicstr != NULL))
924                 return (0);
925
926         op = flags & LK_TYPE_MASK;
927         locked = false;
928         switch (op) {
929         case LK_SHARED:
930                 if (LK_CAN_WITNESS(flags))
931                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
932                             file, line, flags & LK_INTERLOCK ? ilk : NULL);
933                 if (__predict_false(lk->lock_object.lo_flags & LK_NOSHARE))
934                         break;
935                 if (lockmgr_slock_try(lk, &x, flags)) {
936                         lockmgr_note_shared_acquire(lk, 0, 0,
937                             file, line, flags);
938                         locked = true;
939                 } else {
940                         return (lockmgr_slock_hard(lk, flags, ilk, file, line,
941                             NULL));
942                 }
943                 break;
944         case LK_EXCLUSIVE:
945                 if (LK_CAN_WITNESS(flags))
946                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
947                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
948                             ilk : NULL);
949                 tid = (uintptr_t)curthread;
950                 if (lk->lk_lock == LK_UNLOCKED &&
951                     atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
952                         lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
953                             flags);
954                         locked = true;
955                 } else {
956                         return (lockmgr_xlock_hard(lk, flags, ilk, file, line,
957                             NULL));
958                 }
959                 break;
960         case LK_UPGRADE:
961         case LK_TRYUPGRADE:
962                 return (lockmgr_upgrade(lk, flags, ilk, file, line, NULL));
963         default:
964                 break;
965         }
966         if (__predict_true(locked)) {
967                 if (__predict_false(flags & LK_INTERLOCK)) {
968                         class = LOCK_CLASS(ilk);
969                         class->lc_unlock(ilk);
970                 }
971                 return (0);
972         } else {
973                 return (__lockmgr_args(lk, flags, ilk, LK_WMESG_DEFAULT,
974                     LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, file, line));
975         }
976 }
977
978 static __noinline int
979 lockmgr_sunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
980     const char *file, int line)
981
982 {
983         int wakeup_swapper = 0;
984
985         if (__predict_false(panicstr != NULL))
986                 goto out;
987
988         wakeup_swapper = wakeupshlk(lk, file, line);
989
990 out:
991         lockmgr_exit(flags, ilk, wakeup_swapper);
992         return (0);
993 }
994
995 static __noinline int
996 lockmgr_xunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
997     const char *file, int line)
998 {
999         uintptr_t tid, v;
1000         int wakeup_swapper = 0;
1001         u_int realexslp;
1002         int queue;
1003
1004         if (__predict_false(panicstr != NULL))
1005                 goto out;
1006
1007         tid = (uintptr_t)curthread;
1008
1009         /*
1010          * As first option, treact the lock as if it has not
1011          * any waiter.
1012          * Fix-up the tid var if the lock has been disowned.
1013          */
1014         if (LK_HOLDER(x) == LK_KERNPROC)
1015                 tid = LK_KERNPROC;
1016         else {
1017                 WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1018                 TD_LOCKS_DEC(curthread);
1019         }
1020         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
1021
1022         /*
1023          * The lock is held in exclusive mode.
1024          * If the lock is recursed also, then unrecurse it.
1025          */
1026         if (lockmgr_xlocked_v(x) && lockmgr_recursed(lk)) {
1027                 LOCK_LOG2(lk, "%s: %p unrecursing", __func__, lk);
1028                 lk->lk_recurse--;
1029                 goto out;
1030         }
1031         if (tid != LK_KERNPROC)
1032                 lock_profile_release_lock(&lk->lock_object);
1033
1034         if (x == tid && atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED))
1035                 goto out;
1036
1037         sleepq_lock(&lk->lock_object);
1038         x = lk->lk_lock;
1039         v = LK_UNLOCKED;
1040
1041         /*
1042          * If the lock has exclusive waiters, give them
1043          * preference in order to avoid deadlock with
1044          * shared runners up.
1045          * If interruptible sleeps left the exclusive queue
1046          * empty avoid a starvation for the threads sleeping
1047          * on the shared queue by giving them precedence
1048          * and cleaning up the exclusive waiters bit anyway.
1049          * Please note that lk_exslpfail count may be lying
1050          * about the real number of waiters with the
1051          * LK_SLEEPFAIL flag on because they may be used in
1052          * conjunction with interruptible sleeps so
1053          * lk_exslpfail might be considered an 'upper limit'
1054          * bound, including the edge cases.
1055          */
1056         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1057         realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE);
1058         if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1059                 if (lk->lk_exslpfail < realexslp) {
1060                         lk->lk_exslpfail = 0;
1061                         queue = SQ_EXCLUSIVE_QUEUE;
1062                         v |= (x & LK_SHARED_WAITERS);
1063                 } else {
1064                         lk->lk_exslpfail = 0;
1065                         LOCK_LOG2(lk,
1066                             "%s: %p has only LK_SLEEPFAIL sleepers",
1067                             __func__, lk);
1068                         LOCK_LOG2(lk,
1069                             "%s: %p waking up threads on the exclusive queue",
1070                             __func__, lk);
1071                         wakeup_swapper = sleepq_broadcast(&lk->lock_object,
1072                             SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1073                         queue = SQ_SHARED_QUEUE;
1074                 }
1075         } else {
1076
1077                 /*
1078                  * Exclusive waiters sleeping with LK_SLEEPFAIL
1079                  * on and using interruptible sleeps/timeout
1080                  * may have left spourious lk_exslpfail counts
1081                  * on, so clean it up anyway.
1082                  */
1083                 lk->lk_exslpfail = 0;
1084                 queue = SQ_SHARED_QUEUE;
1085         }
1086
1087         LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
1088             __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1089             "exclusive");
1090         atomic_store_rel_ptr(&lk->lk_lock, v);
1091         wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
1092         sleepq_release(&lk->lock_object);
1093
1094 out:
1095         lockmgr_exit(flags, ilk, wakeup_swapper);
1096         return (0);
1097 }
1098
1099 int
1100 lockmgr_unlock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk)
1101 {
1102         struct lock_class *class;
1103         uintptr_t x, tid;
1104         const char *file;
1105         int line;
1106
1107         if (__predict_false(panicstr != NULL))
1108                 return (0);
1109
1110         file = __FILE__;
1111         line = __LINE__;
1112
1113         _lockmgr_assert(lk, KA_LOCKED, file, line);
1114         x = lk->lk_lock;
1115         if (__predict_true(x & LK_SHARE) != 0) {
1116                 if (lockmgr_sunlock_try(lk, &x)) {
1117                         lockmgr_note_shared_release(lk, file, line);
1118                 } else {
1119                         return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1120                 }
1121         } else {
1122                 tid = (uintptr_t)curthread;
1123                 if (!lockmgr_recursed(lk) &&
1124                     atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) {
1125                         lockmgr_note_exclusive_release(lk, file, line);
1126                 } else {
1127                         return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1128                 }
1129         }
1130         if (__predict_false(flags & LK_INTERLOCK)) {
1131                 class = LOCK_CLASS(ilk);
1132                 class->lc_unlock(ilk);
1133         }
1134         return (0);
1135 }
1136
1137 int
1138 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
1139     const char *wmesg, int pri, int timo, const char *file, int line)
1140 {
1141         GIANT_DECLARE;
1142         struct lockmgr_wait lwa;
1143         struct lock_class *class;
1144         const char *iwmesg;
1145         uintptr_t tid, v, x;
1146         u_int op, realexslp;
1147         int error, ipri, itimo, queue, wakeup_swapper;
1148 #ifdef LOCK_PROFILING
1149         uint64_t waittime = 0;
1150         int contested = 0;
1151 #endif
1152
1153         if (panicstr != NULL)
1154                 return (0);
1155
1156         error = 0;
1157         tid = (uintptr_t)curthread;
1158         op = (flags & LK_TYPE_MASK);
1159         iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
1160         ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
1161         itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
1162
1163         lwa.iwmesg = iwmesg;
1164         lwa.ipri = ipri;
1165         lwa.itimo = itimo;
1166
1167         MPASS((flags & ~LK_TOTAL_MASK) == 0);
1168         KASSERT((op & (op - 1)) == 0,
1169             ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
1170         KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
1171             (op != LK_DOWNGRADE && op != LK_RELEASE),
1172             ("%s: Invalid flags in regard of the operation desired @ %s:%d",
1173             __func__, file, line));
1174         KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
1175             ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
1176             __func__, file, line));
1177         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
1178             ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
1179             lk->lock_object.lo_name, file, line));
1180
1181         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
1182
1183         if (lk->lock_object.lo_flags & LK_NOSHARE) {
1184                 switch (op) {
1185                 case LK_SHARED:
1186                         op = LK_EXCLUSIVE;
1187                         break;
1188                 case LK_UPGRADE:
1189                 case LK_TRYUPGRADE:
1190                 case LK_DOWNGRADE:
1191                         _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
1192                             file, line);
1193                         if (flags & LK_INTERLOCK)
1194                                 class->lc_unlock(ilk);
1195                         return (0);
1196                 }
1197         }
1198
1199         wakeup_swapper = 0;
1200         switch (op) {
1201         case LK_SHARED:
1202                 return (lockmgr_slock_hard(lk, flags, ilk, file, line, &lwa));
1203                 break;
1204         case LK_UPGRADE:
1205         case LK_TRYUPGRADE:
1206                 return (lockmgr_upgrade(lk, flags, ilk, file, line, &lwa));
1207                 break;
1208         case LK_EXCLUSIVE:
1209                 return (lockmgr_xlock_hard(lk, flags, ilk, file, line, &lwa));
1210                 break;
1211         case LK_DOWNGRADE:
1212                 _lockmgr_assert(lk, KA_XLOCKED, file, line);
1213                 LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
1214                 WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
1215
1216                 /*
1217                  * Panic if the lock is recursed.
1218                  */
1219                 if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1220                         if (flags & LK_INTERLOCK)
1221                                 class->lc_unlock(ilk);
1222                         panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
1223                             __func__, iwmesg, file, line);
1224                 }
1225                 TD_SLOCKS_INC(curthread);
1226
1227                 /*
1228                  * In order to preserve waiters flags, just spin.
1229                  */
1230                 for (;;) {
1231                         x = lk->lk_lock;
1232                         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1233                         x &= LK_ALL_WAITERS;
1234                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1235                             LK_SHARERS_LOCK(1) | x))
1236                                 break;
1237                         cpu_spinwait();
1238                 }
1239                 break;
1240         case LK_RELEASE:
1241                 _lockmgr_assert(lk, KA_LOCKED, file, line);
1242                 x = lk->lk_lock;
1243
1244                 if (__predict_true(x & LK_SHARE) != 0) {
1245                         return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1246                 } else {
1247                         return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1248                 }
1249                 break;
1250         case LK_DRAIN:
1251                 if (LK_CAN_WITNESS(flags))
1252                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1253                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1254                             ilk : NULL);
1255
1256                 /*
1257                  * Trying to drain a lock we already own will result in a
1258                  * deadlock.
1259                  */
1260                 if (lockmgr_xlocked(lk)) {
1261                         if (flags & LK_INTERLOCK)
1262                                 class->lc_unlock(ilk);
1263                         panic("%s: draining %s with the lock held @ %s:%d\n",
1264                             __func__, iwmesg, file, line);
1265                 }
1266
1267                 for (;;) {
1268                         if (lk->lk_lock == LK_UNLOCKED &&
1269                             atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
1270                                 break;
1271
1272 #ifdef HWPMC_HOOKS
1273                         PMC_SOFT_CALL( , , lock, failed);
1274 #endif
1275                         lock_profile_obtain_lock_failed(&lk->lock_object,
1276                             &contested, &waittime);
1277
1278                         /*
1279                          * If the lock is expected to not sleep just give up
1280                          * and return.
1281                          */
1282                         if (LK_TRYOP(flags)) {
1283                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
1284                                     __func__, lk);
1285                                 error = EBUSY;
1286                                 break;
1287                         }
1288
1289                         /*
1290                          * Acquire the sleepqueue chain lock because we
1291                          * probabilly will need to manipulate waiters flags.
1292                          */
1293                         sleepq_lock(&lk->lock_object);
1294                         x = lk->lk_lock;
1295
1296                         /*
1297                          * if the lock has been released while we spun on
1298                          * the sleepqueue chain lock just try again.
1299                          */
1300                         if (x == LK_UNLOCKED) {
1301                                 sleepq_release(&lk->lock_object);
1302                                 continue;
1303                         }
1304
1305                         v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1306                         if ((x & ~v) == LK_UNLOCKED) {
1307                                 v = (x & ~LK_EXCLUSIVE_SPINNERS);
1308
1309                                 /*
1310                                  * If interruptible sleeps left the exclusive
1311                                  * queue empty avoid a starvation for the
1312                                  * threads sleeping on the shared queue by
1313                                  * giving them precedence and cleaning up the
1314                                  * exclusive waiters bit anyway.
1315                                  * Please note that lk_exslpfail count may be
1316                                  * lying about the real number of waiters with
1317                                  * the LK_SLEEPFAIL flag on because they may
1318                                  * be used in conjunction with interruptible
1319                                  * sleeps so lk_exslpfail might be considered
1320                                  * an 'upper limit' bound, including the edge
1321                                  * cases.
1322                                  */
1323                                 if (v & LK_EXCLUSIVE_WAITERS) {
1324                                         queue = SQ_EXCLUSIVE_QUEUE;
1325                                         v &= ~LK_EXCLUSIVE_WAITERS;
1326                                 } else {
1327
1328                                         /*
1329                                          * Exclusive waiters sleeping with
1330                                          * LK_SLEEPFAIL on and using
1331                                          * interruptible sleeps/timeout may
1332                                          * have left spourious lk_exslpfail
1333                                          * counts on, so clean it up anyway.
1334                                          */
1335                                         MPASS(v & LK_SHARED_WAITERS);
1336                                         lk->lk_exslpfail = 0;
1337                                         queue = SQ_SHARED_QUEUE;
1338                                         v &= ~LK_SHARED_WAITERS;
1339                                 }
1340                                 if (queue == SQ_EXCLUSIVE_QUEUE) {
1341                                         realexslp =
1342                                             sleepq_sleepcnt(&lk->lock_object,
1343                                             SQ_EXCLUSIVE_QUEUE);
1344                                         if (lk->lk_exslpfail >= realexslp) {
1345                                                 lk->lk_exslpfail = 0;
1346                                                 queue = SQ_SHARED_QUEUE;
1347                                                 v &= ~LK_SHARED_WAITERS;
1348                                                 if (realexslp != 0) {
1349                                                         LOCK_LOG2(lk,
1350                                         "%s: %p has only LK_SLEEPFAIL sleepers",
1351                                                             __func__, lk);
1352                                                         LOCK_LOG2(lk,
1353                         "%s: %p waking up threads on the exclusive queue",
1354                                                             __func__, lk);
1355                                                         wakeup_swapper =
1356                                                             sleepq_broadcast(
1357                                                             &lk->lock_object,
1358                                                             SLEEPQ_LK, 0,
1359                                                             SQ_EXCLUSIVE_QUEUE);
1360                                                 }
1361                                         } else
1362                                                 lk->lk_exslpfail = 0;
1363                                 }
1364                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1365                                         sleepq_release(&lk->lock_object);
1366                                         continue;
1367                                 }
1368                                 LOCK_LOG3(lk,
1369                                 "%s: %p waking up all threads on the %s queue",
1370                                     __func__, lk, queue == SQ_SHARED_QUEUE ?
1371                                     "shared" : "exclusive");
1372                                 wakeup_swapper |= sleepq_broadcast(
1373                                     &lk->lock_object, SLEEPQ_LK, 0, queue);
1374
1375                                 /*
1376                                  * If shared waiters have been woken up we need
1377                                  * to wait for one of them to acquire the lock
1378                                  * before to set the exclusive waiters in
1379                                  * order to avoid a deadlock.
1380                                  */
1381                                 if (queue == SQ_SHARED_QUEUE) {
1382                                         for (v = lk->lk_lock;
1383                                             (v & LK_SHARE) && !LK_SHARERS(v);
1384                                             v = lk->lk_lock)
1385                                                 cpu_spinwait();
1386                                 }
1387                         }
1388
1389                         /*
1390                          * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1391                          * fail, loop back and retry.
1392                          */
1393                         if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1394                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1395                                     x | LK_EXCLUSIVE_WAITERS)) {
1396                                         sleepq_release(&lk->lock_object);
1397                                         continue;
1398                                 }
1399                                 LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1400                                     __func__, lk);
1401                         }
1402
1403                         /*
1404                          * As far as we have been unable to acquire the
1405                          * exclusive lock and the exclusive waiters flag
1406                          * is set, we will sleep.
1407                          */
1408                         if (flags & LK_INTERLOCK) {
1409                                 class->lc_unlock(ilk);
1410                                 flags &= ~LK_INTERLOCK;
1411                         }
1412                         GIANT_SAVE();
1413                         sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1414                             SQ_EXCLUSIVE_QUEUE);
1415                         sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1416                         GIANT_RESTORE();
1417                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1418                             __func__, lk);
1419                 }
1420
1421                 if (error == 0) {
1422                         lock_profile_obtain_lock_success(&lk->lock_object,
1423                             contested, waittime, file, line);
1424                         LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1425                             lk->lk_recurse, file, line);
1426                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1427                             LK_TRYWIT(flags), file, line);
1428                         TD_LOCKS_INC(curthread);
1429                         STACK_SAVE(lk);
1430                 }
1431                 break;
1432         default:
1433                 if (flags & LK_INTERLOCK)
1434                         class->lc_unlock(ilk);
1435                 panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1436         }
1437
1438         if (flags & LK_INTERLOCK)
1439                 class->lc_unlock(ilk);
1440         if (wakeup_swapper)
1441                 kick_proc0();
1442
1443         return (error);
1444 }
1445
1446 void
1447 _lockmgr_disown(struct lock *lk, const char *file, int line)
1448 {
1449         uintptr_t tid, x;
1450
1451         if (SCHEDULER_STOPPED())
1452                 return;
1453
1454         tid = (uintptr_t)curthread;
1455         _lockmgr_assert(lk, KA_XLOCKED, file, line);
1456
1457         /*
1458          * Panic if the lock is recursed.
1459          */
1460         if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1461                 panic("%s: disown a recursed lockmgr @ %s:%d\n",
1462                     __func__,  file, line);
1463
1464         /*
1465          * If the owner is already LK_KERNPROC just skip the whole operation.
1466          */
1467         if (LK_HOLDER(lk->lk_lock) != tid)
1468                 return;
1469         lock_profile_release_lock(&lk->lock_object);
1470         LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1471         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1472         TD_LOCKS_DEC(curthread);
1473         STACK_SAVE(lk);
1474
1475         /*
1476          * In order to preserve waiters flags, just spin.
1477          */
1478         for (;;) {
1479                 x = lk->lk_lock;
1480                 MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1481                 x &= LK_ALL_WAITERS;
1482                 if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1483                     LK_KERNPROC | x))
1484                         return;
1485                 cpu_spinwait();
1486         }
1487 }
1488
1489 void
1490 lockmgr_printinfo(const struct lock *lk)
1491 {
1492         struct thread *td;
1493         uintptr_t x;
1494
1495         if (lk->lk_lock == LK_UNLOCKED)
1496                 printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1497         else if (lk->lk_lock & LK_SHARE)
1498                 printf("lock type %s: SHARED (count %ju)\n",
1499                     lk->lock_object.lo_name,
1500                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1501         else {
1502                 td = lockmgr_xholder(lk);
1503                 if (td == (struct thread *)LK_KERNPROC)
1504                         printf("lock type %s: EXCL by KERNPROC\n",
1505                             lk->lock_object.lo_name);
1506                 else
1507                         printf("lock type %s: EXCL by thread %p "
1508                             "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1509                             td, td->td_proc->p_pid, td->td_proc->p_comm,
1510                             td->td_tid);
1511         }
1512
1513         x = lk->lk_lock;
1514         if (x & LK_EXCLUSIVE_WAITERS)
1515                 printf(" with exclusive waiters pending\n");
1516         if (x & LK_SHARED_WAITERS)
1517                 printf(" with shared waiters pending\n");
1518         if (x & LK_EXCLUSIVE_SPINNERS)
1519                 printf(" with exclusive spinners pending\n");
1520
1521         STACK_PRINT(lk);
1522 }
1523
1524 int
1525 lockstatus(const struct lock *lk)
1526 {
1527         uintptr_t v, x;
1528         int ret;
1529
1530         ret = LK_SHARED;
1531         x = lk->lk_lock;
1532         v = LK_HOLDER(x);
1533
1534         if ((x & LK_SHARE) == 0) {
1535                 if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1536                         ret = LK_EXCLUSIVE;
1537                 else
1538                         ret = LK_EXCLOTHER;
1539         } else if (x == LK_UNLOCKED)
1540                 ret = 0;
1541
1542         return (ret);
1543 }
1544
1545 #ifdef INVARIANT_SUPPORT
1546
1547 FEATURE(invariant_support,
1548     "Support for modules compiled with INVARIANTS option");
1549
1550 #ifndef INVARIANTS
1551 #undef  _lockmgr_assert
1552 #endif
1553
1554 void
1555 _lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1556 {
1557         int slocked = 0;
1558
1559         if (panicstr != NULL)
1560                 return;
1561         switch (what) {
1562         case KA_SLOCKED:
1563         case KA_SLOCKED | KA_NOTRECURSED:
1564         case KA_SLOCKED | KA_RECURSED:
1565                 slocked = 1;
1566         case KA_LOCKED:
1567         case KA_LOCKED | KA_NOTRECURSED:
1568         case KA_LOCKED | KA_RECURSED:
1569 #ifdef WITNESS
1570
1571                 /*
1572                  * We cannot trust WITNESS if the lock is held in exclusive
1573                  * mode and a call to lockmgr_disown() happened.
1574                  * Workaround this skipping the check if the lock is held in
1575                  * exclusive mode even for the KA_LOCKED case.
1576                  */
1577                 if (slocked || (lk->lk_lock & LK_SHARE)) {
1578                         witness_assert(&lk->lock_object, what, file, line);
1579                         break;
1580                 }
1581 #endif
1582                 if (lk->lk_lock == LK_UNLOCKED ||
1583                     ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1584                     (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1585                         panic("Lock %s not %slocked @ %s:%d\n",
1586                             lk->lock_object.lo_name, slocked ? "share" : "",
1587                             file, line);
1588
1589                 if ((lk->lk_lock & LK_SHARE) == 0) {
1590                         if (lockmgr_recursed(lk)) {
1591                                 if (what & KA_NOTRECURSED)
1592                                         panic("Lock %s recursed @ %s:%d\n",
1593                                             lk->lock_object.lo_name, file,
1594                                             line);
1595                         } else if (what & KA_RECURSED)
1596                                 panic("Lock %s not recursed @ %s:%d\n",
1597                                     lk->lock_object.lo_name, file, line);
1598                 }
1599                 break;
1600         case KA_XLOCKED:
1601         case KA_XLOCKED | KA_NOTRECURSED:
1602         case KA_XLOCKED | KA_RECURSED:
1603                 if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1604                         panic("Lock %s not exclusively locked @ %s:%d\n",
1605                             lk->lock_object.lo_name, file, line);
1606                 if (lockmgr_recursed(lk)) {
1607                         if (what & KA_NOTRECURSED)
1608                                 panic("Lock %s recursed @ %s:%d\n",
1609                                     lk->lock_object.lo_name, file, line);
1610                 } else if (what & KA_RECURSED)
1611                         panic("Lock %s not recursed @ %s:%d\n",
1612                             lk->lock_object.lo_name, file, line);
1613                 break;
1614         case KA_UNLOCKED:
1615                 if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1616                         panic("Lock %s exclusively locked @ %s:%d\n",
1617                             lk->lock_object.lo_name, file, line);
1618                 break;
1619         default:
1620                 panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1621                     line);
1622         }
1623 }
1624 #endif
1625
1626 #ifdef DDB
1627 int
1628 lockmgr_chain(struct thread *td, struct thread **ownerp)
1629 {
1630         struct lock *lk;
1631
1632         lk = td->td_wchan;
1633
1634         if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1635                 return (0);
1636         db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1637         if (lk->lk_lock & LK_SHARE)
1638                 db_printf("SHARED (count %ju)\n",
1639                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1640         else
1641                 db_printf("EXCL\n");
1642         *ownerp = lockmgr_xholder(lk);
1643
1644         return (1);
1645 }
1646
1647 static void
1648 db_show_lockmgr(const struct lock_object *lock)
1649 {
1650         struct thread *td;
1651         const struct lock *lk;
1652
1653         lk = (const struct lock *)lock;
1654
1655         db_printf(" state: ");
1656         if (lk->lk_lock == LK_UNLOCKED)
1657                 db_printf("UNLOCKED\n");
1658         else if (lk->lk_lock & LK_SHARE)
1659                 db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1660         else {
1661                 td = lockmgr_xholder(lk);
1662                 if (td == (struct thread *)LK_KERNPROC)
1663                         db_printf("XLOCK: LK_KERNPROC\n");
1664                 else
1665                         db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1666                             td->td_tid, td->td_proc->p_pid,
1667                             td->td_proc->p_comm);
1668                 if (lockmgr_recursed(lk))
1669                         db_printf(" recursed: %d\n", lk->lk_recurse);
1670         }
1671         db_printf(" waiters: ");
1672         switch (lk->lk_lock & LK_ALL_WAITERS) {
1673         case LK_SHARED_WAITERS:
1674                 db_printf("shared\n");
1675                 break;
1676         case LK_EXCLUSIVE_WAITERS:
1677                 db_printf("exclusive\n");
1678                 break;
1679         case LK_ALL_WAITERS:
1680                 db_printf("shared and exclusive\n");
1681                 break;
1682         default:
1683                 db_printf("none\n");
1684         }
1685         db_printf(" spinners: ");
1686         if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1687                 db_printf("exclusive\n");
1688         else
1689                 db_printf("none\n");
1690 }
1691 #endif