]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_lock.c
Fix missing pfctl(8) tunable.
[FreeBSD/FreeBSD.git] / sys / kern / kern_lock.c
1 /*-
2  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice(s), this list of conditions and the following disclaimer as
10  *    the first lines of this file unmodified other than the possible
11  *    addition of one or more copyright notices.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice(s), this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  */
28
29 #include "opt_adaptive_lockmgrs.h"
30 #include "opt_ddb.h"
31 #include "opt_hwpmc_hooks.h"
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/kdb.h>
38 #include <sys/ktr.h>
39 #include <sys/lock.h>
40 #include <sys/lock_profile.h>
41 #include <sys/lockmgr.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sleepqueue.h>
45 #ifdef DEBUG_LOCKS
46 #include <sys/stack.h>
47 #endif
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50
51 #include <machine/cpu.h>
52
53 #ifdef DDB
54 #include <ddb/ddb.h>
55 #endif
56
57 #ifdef HWPMC_HOOKS
58 #include <sys/pmckern.h>
59 PMC_SOFT_DECLARE( , , lock, failed);
60 #endif
61
62 CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
63     (LK_ADAPTIVE | LK_NOSHARE));
64 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
65     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
66
67 #define SQ_EXCLUSIVE_QUEUE      0
68 #define SQ_SHARED_QUEUE         1
69
70 #ifndef INVARIANTS
71 #define _lockmgr_assert(lk, what, file, line)
72 #endif
73
74 #define TD_SLOCKS_INC(td)       ((td)->td_lk_slocks++)
75 #define TD_SLOCKS_DEC(td)       ((td)->td_lk_slocks--)
76
77 #ifndef DEBUG_LOCKS
78 #define STACK_PRINT(lk)
79 #define STACK_SAVE(lk)
80 #define STACK_ZERO(lk)
81 #else
82 #define STACK_PRINT(lk) stack_print_ddb(&(lk)->lk_stack)
83 #define STACK_SAVE(lk)  stack_save(&(lk)->lk_stack)
84 #define STACK_ZERO(lk)  stack_zero(&(lk)->lk_stack)
85 #endif
86
87 #define LOCK_LOG2(lk, string, arg1, arg2)                               \
88         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
89                 CTR2(KTR_LOCK, (string), (arg1), (arg2))
90 #define LOCK_LOG3(lk, string, arg1, arg2, arg3)                         \
91         if (LOCK_LOG_TEST(&(lk)->lock_object, 0))                       \
92                 CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
93
94 #define GIANT_DECLARE                                                   \
95         int _i = 0;                                                     \
96         WITNESS_SAVE_DECL(Giant)
97 #define GIANT_RESTORE() do {                                            \
98         if (_i > 0) {                                                   \
99                 while (_i--)                                            \
100                         mtx_lock(&Giant);                               \
101                 WITNESS_RESTORE(&Giant.lock_object, Giant);             \
102         }                                                               \
103 } while (0)
104 #define GIANT_SAVE() do {                                               \
105         if (mtx_owned(&Giant)) {                                        \
106                 WITNESS_SAVE(&Giant.lock_object, Giant);                \
107                 while (mtx_owned(&Giant)) {                             \
108                         _i++;                                           \
109                         mtx_unlock(&Giant);                             \
110                 }                                                       \
111         }                                                               \
112 } while (0)
113
114 #define LK_CAN_SHARE(x, flags)                                          \
115         (((x) & LK_SHARE) &&                                            \
116         (((x) & (LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) == 0 || \
117         (curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||  \
118         (curthread->td_pflags & TDP_DEADLKTREAT)))
119 #define LK_TRYOP(x)                                                     \
120         ((x) & LK_NOWAIT)
121
122 #define LK_CAN_WITNESS(x)                                               \
123         (((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
124 #define LK_TRYWIT(x)                                                    \
125         (LK_TRYOP(x) ? LOP_TRYLOCK : 0)
126
127 #define LK_CAN_ADAPT(lk, f)                                             \
128         (((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&             \
129         ((f) & LK_SLEEPFAIL) == 0)
130
131 #define lockmgr_disowned(lk)                                            \
132         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
133
134 #define lockmgr_xlocked(lk)                                             \
135         (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
136
137 static void     assert_lockmgr(const struct lock_object *lock, int how);
138 #ifdef DDB
139 static void     db_show_lockmgr(const struct lock_object *lock);
140 #endif
141 static void     lock_lockmgr(struct lock_object *lock, uintptr_t how);
142 #ifdef KDTRACE_HOOKS
143 static int      owner_lockmgr(const struct lock_object *lock,
144                     struct thread **owner);
145 #endif
146 static uintptr_t unlock_lockmgr(struct lock_object *lock);
147
148 struct lock_class lock_class_lockmgr = {
149         .lc_name = "lockmgr",
150         .lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
151         .lc_assert = assert_lockmgr,
152 #ifdef DDB
153         .lc_ddb_show = db_show_lockmgr,
154 #endif
155         .lc_lock = lock_lockmgr,
156         .lc_unlock = unlock_lockmgr,
157 #ifdef KDTRACE_HOOKS
158         .lc_owner = owner_lockmgr,
159 #endif
160 };
161
162 #ifdef ADAPTIVE_LOCKMGRS
163 static u_int alk_retries = 10;
164 static u_int alk_loops = 10000;
165 static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL,
166     "lockmgr debugging");
167 SYSCTL_UINT(_debug_lockmgr, OID_AUTO, retries, CTLFLAG_RW, &alk_retries, 0, "");
168 SYSCTL_UINT(_debug_lockmgr, OID_AUTO, loops, CTLFLAG_RW, &alk_loops, 0, "");
169 #endif
170
171 static bool __always_inline lockmgr_slock_try(struct lock *lk, uintptr_t *xp,
172     int flags);
173 static bool __always_inline lockmgr_sunlock_try(struct lock *lk, uintptr_t x);
174
175 static void
176 lockmgr_note_shared_acquire(struct lock *lk, int contested,
177     uint64_t waittime, const char *file, int line, int flags)
178 {
179
180         lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime,
181             file, line);
182         LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line);
183         WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line);
184         TD_LOCKS_INC(curthread);
185         TD_SLOCKS_INC(curthread);
186         STACK_SAVE(lk);
187 }
188
189 static void
190 lockmgr_note_shared_release(struct lock *lk, const char *file, int line)
191 {
192
193         lock_profile_release_lock(&lk->lock_object);
194         WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
195         LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
196         TD_LOCKS_DEC(curthread);
197         TD_SLOCKS_DEC(curthread);
198 }
199
200 static void
201 lockmgr_note_exclusive_acquire(struct lock *lk, int contested,
202     uint64_t waittime, const char *file, int line, int flags)
203 {
204
205         lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime,
206             file, line);
207         LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
208         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file,
209             line);
210         TD_LOCKS_INC(curthread);
211         STACK_SAVE(lk);
212 }
213
214 static void
215 lockmgr_note_exclusive_release(struct lock *lk, const char *file, int line)
216 {
217
218         lock_profile_release_lock(&lk->lock_object);
219         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file,
220             line);
221         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
222         TD_LOCKS_DEC(curthread);
223 }
224
225 static void
226 lockmgr_note_exclusive_upgrade(struct lock *lk, const char *file, int line,
227     int flags)
228 {
229
230         LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
231             line);
232         WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
233             LK_TRYWIT(flags), file, line);
234         TD_SLOCKS_DEC(curthread);
235 }
236
237 static __inline struct thread *
238 lockmgr_xholder(const struct lock *lk)
239 {
240         uintptr_t x;
241
242         x = lk->lk_lock;
243         return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
244 }
245
246 /*
247  * It assumes sleepq_lock held and returns with this one unheld.
248  * It also assumes the generic interlock is sane and previously checked.
249  * If LK_INTERLOCK is specified the interlock is not reacquired after the
250  * sleep.
251  */
252 static __inline int
253 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
254     const char *wmesg, int pri, int timo, int queue)
255 {
256         GIANT_DECLARE;
257         struct lock_class *class;
258         int catch, error;
259
260         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
261         catch = pri & PCATCH;
262         pri &= PRIMASK;
263         error = 0;
264
265         LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
266             (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
267
268         if (flags & LK_INTERLOCK)
269                 class->lc_unlock(ilk);
270         if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
271                 lk->lk_exslpfail++;
272         GIANT_SAVE();
273         sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
274             SLEEPQ_INTERRUPTIBLE : 0), queue);
275         if ((flags & LK_TIMELOCK) && timo)
276                 sleepq_set_timeout(&lk->lock_object, timo);
277
278         /*
279          * Decisional switch for real sleeping.
280          */
281         if ((flags & LK_TIMELOCK) && timo && catch)
282                 error = sleepq_timedwait_sig(&lk->lock_object, pri);
283         else if ((flags & LK_TIMELOCK) && timo)
284                 error = sleepq_timedwait(&lk->lock_object, pri);
285         else if (catch)
286                 error = sleepq_wait_sig(&lk->lock_object, pri);
287         else
288                 sleepq_wait(&lk->lock_object, pri);
289         GIANT_RESTORE();
290         if ((flags & LK_SLEEPFAIL) && error == 0)
291                 error = ENOLCK;
292
293         return (error);
294 }
295
296 static __inline int
297 wakeupshlk(struct lock *lk, const char *file, int line)
298 {
299         uintptr_t v, x;
300         u_int realexslp;
301         int queue, wakeup_swapper;
302
303         wakeup_swapper = 0;
304         for (;;) {
305                 x = lk->lk_lock;
306                 if (lockmgr_sunlock_try(lk, x))
307                         break;
308
309                 /*
310                  * We should have a sharer with waiters, so enter the hard
311                  * path in order to handle wakeups correctly.
312                  */
313                 sleepq_lock(&lk->lock_object);
314                 x = lk->lk_lock & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
315                 v = LK_UNLOCKED;
316
317                 /*
318                  * If the lock has exclusive waiters, give them preference in
319                  * order to avoid deadlock with shared runners up.
320                  * If interruptible sleeps left the exclusive queue empty
321                  * avoid a starvation for the threads sleeping on the shared
322                  * queue by giving them precedence and cleaning up the
323                  * exclusive waiters bit anyway.
324                  * Please note that lk_exslpfail count may be lying about
325                  * the real number of waiters with the LK_SLEEPFAIL flag on
326                  * because they may be used in conjunction with interruptible
327                  * sleeps so lk_exslpfail might be considered an 'upper limit'
328                  * bound, including the edge cases.
329                  */
330                 realexslp = sleepq_sleepcnt(&lk->lock_object,
331                     SQ_EXCLUSIVE_QUEUE);
332                 if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
333                         if (lk->lk_exslpfail < realexslp) {
334                                 lk->lk_exslpfail = 0;
335                                 queue = SQ_EXCLUSIVE_QUEUE;
336                                 v |= (x & LK_SHARED_WAITERS);
337                         } else {
338                                 lk->lk_exslpfail = 0;
339                                 LOCK_LOG2(lk,
340                                     "%s: %p has only LK_SLEEPFAIL sleepers",
341                                     __func__, lk);
342                                 LOCK_LOG2(lk,
343                             "%s: %p waking up threads on the exclusive queue",
344                                     __func__, lk);
345                                 wakeup_swapper =
346                                     sleepq_broadcast(&lk->lock_object,
347                                     SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
348                                 queue = SQ_SHARED_QUEUE;
349                         }
350                                 
351                 } else {
352
353                         /*
354                          * Exclusive waiters sleeping with LK_SLEEPFAIL on
355                          * and using interruptible sleeps/timeout may have
356                          * left spourious lk_exslpfail counts on, so clean
357                          * it up anyway.
358                          */
359                         lk->lk_exslpfail = 0;
360                         queue = SQ_SHARED_QUEUE;
361                 }
362
363                 if (!atomic_cmpset_rel_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
364                     v)) {
365                         sleepq_release(&lk->lock_object);
366                         continue;
367                 }
368                 LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
369                     __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
370                     "exclusive");
371                 wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
372                     0, queue);
373                 sleepq_release(&lk->lock_object);
374                 break;
375         }
376
377         lockmgr_note_shared_release(lk, file, line);
378         return (wakeup_swapper);
379 }
380
381 static void
382 assert_lockmgr(const struct lock_object *lock, int what)
383 {
384
385         panic("lockmgr locks do not support assertions");
386 }
387
388 static void
389 lock_lockmgr(struct lock_object *lock, uintptr_t how)
390 {
391
392         panic("lockmgr locks do not support sleep interlocking");
393 }
394
395 static uintptr_t
396 unlock_lockmgr(struct lock_object *lock)
397 {
398
399         panic("lockmgr locks do not support sleep interlocking");
400 }
401
402 #ifdef KDTRACE_HOOKS
403 static int
404 owner_lockmgr(const struct lock_object *lock, struct thread **owner)
405 {
406
407         panic("lockmgr locks do not support owner inquiring");
408 }
409 #endif
410
411 void
412 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
413 {
414         int iflags;
415
416         MPASS((flags & ~LK_INIT_MASK) == 0);
417         ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
418             ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
419             &lk->lk_lock));
420
421         iflags = LO_SLEEPABLE | LO_UPGRADABLE;
422         if (flags & LK_CANRECURSE)
423                 iflags |= LO_RECURSABLE;
424         if ((flags & LK_NODUP) == 0)
425                 iflags |= LO_DUPOK;
426         if (flags & LK_NOPROFILE)
427                 iflags |= LO_NOPROFILE;
428         if ((flags & LK_NOWITNESS) == 0)
429                 iflags |= LO_WITNESS;
430         if (flags & LK_QUIET)
431                 iflags |= LO_QUIET;
432         if (flags & LK_IS_VNODE)
433                 iflags |= LO_IS_VNODE;
434         iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
435
436         lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
437         lk->lk_lock = LK_UNLOCKED;
438         lk->lk_recurse = 0;
439         lk->lk_exslpfail = 0;
440         lk->lk_timo = timo;
441         lk->lk_pri = pri;
442         STACK_ZERO(lk);
443 }
444
445 /*
446  * XXX: Gross hacks to manipulate external lock flags after
447  * initialization.  Used for certain vnode and buf locks.
448  */
449 void
450 lockallowshare(struct lock *lk)
451 {
452
453         lockmgr_assert(lk, KA_XLOCKED);
454         lk->lock_object.lo_flags &= ~LK_NOSHARE;
455 }
456
457 void
458 lockdisableshare(struct lock *lk)
459 {
460
461         lockmgr_assert(lk, KA_XLOCKED);
462         lk->lock_object.lo_flags |= LK_NOSHARE;
463 }
464
465 void
466 lockallowrecurse(struct lock *lk)
467 {
468
469         lockmgr_assert(lk, KA_XLOCKED);
470         lk->lock_object.lo_flags |= LO_RECURSABLE;
471 }
472
473 void
474 lockdisablerecurse(struct lock *lk)
475 {
476
477         lockmgr_assert(lk, KA_XLOCKED);
478         lk->lock_object.lo_flags &= ~LO_RECURSABLE;
479 }
480
481 void
482 lockdestroy(struct lock *lk)
483 {
484
485         KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
486         KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
487         KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
488         lock_destroy(&lk->lock_object);
489 }
490
491 static bool __always_inline
492 lockmgr_slock_try(struct lock *lk, uintptr_t *xp, int flags)
493 {
494
495         /*
496          * If no other thread has an exclusive lock, or
497          * no exclusive waiter is present, bump the count of
498          * sharers.  Since we have to preserve the state of
499          * waiters, if we fail to acquire the shared lock
500          * loop back and retry.
501          */
502         *xp = lk->lk_lock;
503         while (LK_CAN_SHARE(*xp, flags)) {
504                 if (atomic_fcmpset_acq_ptr(&lk->lk_lock, xp,
505                     *xp + LK_ONE_SHARER)) {
506                         return (true);
507                 }
508         }
509         return (false);
510 }
511
512 static bool __always_inline
513 lockmgr_sunlock_try(struct lock *lk, uintptr_t x)
514 {
515
516         for (;;) {
517                 /*
518                  * If there is more than one shared lock held, just drop one
519                  * and return.
520                  */
521                 if (LK_SHARERS(x) > 1) {
522                         if (atomic_fcmpset_rel_ptr(&lk->lk_lock, &x,
523                             x - LK_ONE_SHARER))
524                                 return (true);
525                         continue;
526                 }
527
528                 /*
529                  * If there are not waiters on the exclusive queue, drop the
530                  * lock quickly.
531                  */
532                 if ((x & LK_ALL_WAITERS) == 0) {
533                         MPASS((x & ~LK_EXCLUSIVE_SPINNERS) ==
534                             LK_SHARERS_LOCK(1));
535                         if (atomic_fcmpset_rel_ptr(&lk->lk_lock, &x,
536                             LK_UNLOCKED))
537                                 return (true);
538                         continue;
539                 }
540                 break;
541         }
542         return (false);
543 }
544
545 int
546 lockmgr_lock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk,
547     const char *file, int line)
548 {
549         struct lock_class *class;
550         uintptr_t x, v, tid;
551         u_int op;
552         bool locked;
553
554         op = flags & LK_TYPE_MASK;
555         locked = false;
556         switch (op) {
557         case LK_SHARED:
558                 if (LK_CAN_WITNESS(flags))
559                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
560                             file, line, flags & LK_INTERLOCK ? ilk : NULL);
561                 if (__predict_false(lk->lock_object.lo_flags & LK_NOSHARE))
562                         break;
563                 if (lockmgr_slock_try(lk, &x, flags)) {
564                         lockmgr_note_shared_acquire(lk, 0, 0,
565                             file, line, flags);
566                         locked = true;
567                 }
568                 break;
569         case LK_EXCLUSIVE:
570                 if (LK_CAN_WITNESS(flags))
571                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
572                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
573                             ilk : NULL);
574                 tid = (uintptr_t)curthread;
575                 if (lk->lk_lock == LK_UNLOCKED &&
576                     atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
577                         lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
578                             flags);
579                         locked = true;
580                 }
581                 break;
582         case LK_UPGRADE:
583         case LK_TRYUPGRADE:
584                 _lockmgr_assert(lk, KA_SLOCKED, file, line);
585                 tid = (uintptr_t)curthread;
586                 v = lk->lk_lock;
587                 x = v & LK_ALL_WAITERS;
588                 v &= LK_EXCLUSIVE_SPINNERS;
589                 if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
590                     tid | x)) {
591                         lockmgr_note_exclusive_upgrade(lk, file, line, flags);
592                         locked = true;
593                 }
594                 break;
595         default:
596                 break;
597         }
598         if (__predict_true(locked)) {
599                 if (__predict_false(flags & LK_INTERLOCK)) {
600                         class = LOCK_CLASS(ilk);
601                         class->lc_unlock(ilk);
602                 }
603                 return (0);
604         } else {
605                 return (__lockmgr_args(lk, flags, ilk, LK_WMESG_DEFAULT,
606                     LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, file, line));
607         }
608 }
609
610 int
611 lockmgr_unlock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk)
612 {
613         struct lock_class *class;
614         uintptr_t x, tid;
615         bool unlocked;
616         const char *file;
617         int line;
618
619         file = __FILE__;
620         line = __LINE__;
621
622         _lockmgr_assert(lk, KA_LOCKED, file, line);
623         unlocked = false;
624         x = lk->lk_lock;
625         if (__predict_true(x & LK_SHARE) != 0) {
626                 if (lockmgr_sunlock_try(lk, x)) {
627                         lockmgr_note_shared_release(lk, file, line);
628                         unlocked = true;
629                 }
630         } else {
631                 tid = (uintptr_t)curthread;
632                 if (!lockmgr_recursed(lk) &&
633                     atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) {
634                         lockmgr_note_exclusive_release(lk, file, line);
635                         unlocked = true;
636                 }
637         }
638         if (__predict_true(unlocked)) {
639                 if (__predict_false(flags & LK_INTERLOCK)) {
640                         class = LOCK_CLASS(ilk);
641                         class->lc_unlock(ilk);
642                 }
643                 return (0);
644         } else {
645                 return (__lockmgr_args(lk, flags | LK_RELEASE, ilk, LK_WMESG_DEFAULT,
646                     LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, LOCK_FILE, LOCK_LINE));
647         }
648 }
649
650 int
651 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
652     const char *wmesg, int pri, int timo, const char *file, int line)
653 {
654         GIANT_DECLARE;
655         struct lock_class *class;
656         const char *iwmesg;
657         uintptr_t tid, v, x;
658         u_int op, realexslp;
659         int error, ipri, itimo, queue, wakeup_swapper;
660 #ifdef LOCK_PROFILING
661         uint64_t waittime = 0;
662         int contested = 0;
663 #endif
664 #ifdef ADAPTIVE_LOCKMGRS
665         volatile struct thread *owner;
666         u_int i, spintries = 0;
667 #endif
668
669         error = 0;
670         tid = (uintptr_t)curthread;
671         op = (flags & LK_TYPE_MASK);
672         iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
673         ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
674         itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
675
676         MPASS((flags & ~LK_TOTAL_MASK) == 0);
677         KASSERT((op & (op - 1)) == 0,
678             ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
679         KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
680             (op != LK_DOWNGRADE && op != LK_RELEASE),
681             ("%s: Invalid flags in regard of the operation desired @ %s:%d",
682             __func__, file, line));
683         KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
684             ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
685             __func__, file, line));
686         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
687             ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
688             lk->lock_object.lo_name, file, line));
689
690         class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
691         if (panicstr != NULL) {
692                 if (flags & LK_INTERLOCK)
693                         class->lc_unlock(ilk);
694                 return (0);
695         }
696
697         if (lk->lock_object.lo_flags & LK_NOSHARE) {
698                 switch (op) {
699                 case LK_SHARED:
700                         op = LK_EXCLUSIVE;
701                         break;
702                 case LK_UPGRADE:
703                 case LK_TRYUPGRADE:
704                 case LK_DOWNGRADE:
705                         _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
706                             file, line);
707                         if (flags & LK_INTERLOCK)
708                                 class->lc_unlock(ilk);
709                         return (0);
710                 }
711         }
712
713         wakeup_swapper = 0;
714         switch (op) {
715         case LK_SHARED:
716                 if (LK_CAN_WITNESS(flags))
717                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
718                             file, line, flags & LK_INTERLOCK ? ilk : NULL);
719                 for (;;) {
720                         if (lockmgr_slock_try(lk, &x, flags))
721                                 break;
722 #ifdef HWPMC_HOOKS
723                         PMC_SOFT_CALL( , , lock, failed);
724 #endif
725                         lock_profile_obtain_lock_failed(&lk->lock_object,
726                             &contested, &waittime);
727
728                         /*
729                          * If the lock is already held by curthread in
730                          * exclusive way avoid a deadlock.
731                          */
732                         if (LK_HOLDER(x) == tid) {
733                                 LOCK_LOG2(lk,
734                                     "%s: %p already held in exclusive mode",
735                                     __func__, lk);
736                                 error = EDEADLK;
737                                 break;
738                         }
739
740                         /*
741                          * If the lock is expected to not sleep just give up
742                          * and return.
743                          */
744                         if (LK_TRYOP(flags)) {
745                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
746                                     __func__, lk);
747                                 error = EBUSY;
748                                 break;
749                         }
750
751 #ifdef ADAPTIVE_LOCKMGRS
752                         /*
753                          * If the owner is running on another CPU, spin until
754                          * the owner stops running or the state of the lock
755                          * changes.  We need a double-state handle here
756                          * because for a failed acquisition the lock can be
757                          * either held in exclusive mode or shared mode
758                          * (for the writer starvation avoidance technique).
759                          */
760                         if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
761                             LK_HOLDER(x) != LK_KERNPROC) {
762                                 owner = (struct thread *)LK_HOLDER(x);
763                                 if (LOCK_LOG_TEST(&lk->lock_object, 0))
764                                         CTR3(KTR_LOCK,
765                                             "%s: spinning on %p held by %p",
766                                             __func__, lk, owner);
767                                 KTR_STATE1(KTR_SCHED, "thread",
768                                     sched_tdname(td), "spinning",
769                                     "lockname:\"%s\"", lk->lock_object.lo_name);
770
771                                 /*
772                                  * If we are holding also an interlock drop it
773                                  * in order to avoid a deadlock if the lockmgr
774                                  * owner is adaptively spinning on the
775                                  * interlock itself.
776                                  */
777                                 if (flags & LK_INTERLOCK) {
778                                         class->lc_unlock(ilk);
779                                         flags &= ~LK_INTERLOCK;
780                                 }
781                                 GIANT_SAVE();
782                                 while (LK_HOLDER(lk->lk_lock) ==
783                                     (uintptr_t)owner && TD_IS_RUNNING(owner))
784                                         cpu_spinwait();
785                                 KTR_STATE0(KTR_SCHED, "thread",
786                                     sched_tdname(td), "running");
787                                 GIANT_RESTORE();
788                                 continue;
789                         } else if (LK_CAN_ADAPT(lk, flags) &&
790                             (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
791                             spintries < alk_retries) {
792                                 KTR_STATE1(KTR_SCHED, "thread",
793                                     sched_tdname(td), "spinning",
794                                     "lockname:\"%s\"", lk->lock_object.lo_name);
795                                 if (flags & LK_INTERLOCK) {
796                                         class->lc_unlock(ilk);
797                                         flags &= ~LK_INTERLOCK;
798                                 }
799                                 GIANT_SAVE();
800                                 spintries++;
801                                 for (i = 0; i < alk_loops; i++) {
802                                         if (LOCK_LOG_TEST(&lk->lock_object, 0))
803                                                 CTR4(KTR_LOCK,
804                                     "%s: shared spinning on %p with %u and %u",
805                                                     __func__, lk, spintries, i);
806                                         x = lk->lk_lock;
807                                         if ((x & LK_SHARE) == 0 ||
808                                             LK_CAN_SHARE(x, flags) != 0)
809                                                 break;
810                                         cpu_spinwait();
811                                 }
812                                 KTR_STATE0(KTR_SCHED, "thread",
813                                     sched_tdname(td), "running");
814                                 GIANT_RESTORE();
815                                 if (i != alk_loops)
816                                         continue;
817                         }
818 #endif
819
820                         /*
821                          * Acquire the sleepqueue chain lock because we
822                          * probabilly will need to manipulate waiters flags.
823                          */
824                         sleepq_lock(&lk->lock_object);
825                         x = lk->lk_lock;
826
827                         /*
828                          * if the lock can be acquired in shared mode, try
829                          * again.
830                          */
831                         if (LK_CAN_SHARE(x, flags)) {
832                                 sleepq_release(&lk->lock_object);
833                                 continue;
834                         }
835
836 #ifdef ADAPTIVE_LOCKMGRS
837                         /*
838                          * The current lock owner might have started executing
839                          * on another CPU (or the lock could have changed
840                          * owner) while we were waiting on the turnstile
841                          * chain lock.  If so, drop the turnstile lock and try
842                          * again.
843                          */
844                         if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
845                             LK_HOLDER(x) != LK_KERNPROC) {
846                                 owner = (struct thread *)LK_HOLDER(x);
847                                 if (TD_IS_RUNNING(owner)) {
848                                         sleepq_release(&lk->lock_object);
849                                         continue;
850                                 }
851                         }
852 #endif
853
854                         /*
855                          * Try to set the LK_SHARED_WAITERS flag.  If we fail,
856                          * loop back and retry.
857                          */
858                         if ((x & LK_SHARED_WAITERS) == 0) {
859                                 if (!atomic_cmpset_acq_ptr(&lk->lk_lock, x,
860                                     x | LK_SHARED_WAITERS)) {
861                                         sleepq_release(&lk->lock_object);
862                                         continue;
863                                 }
864                                 LOCK_LOG2(lk, "%s: %p set shared waiters flag",
865                                     __func__, lk);
866                         }
867
868                         /*
869                          * As far as we have been unable to acquire the
870                          * shared lock and the shared waiters flag is set,
871                          * we will sleep.
872                          */
873                         error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
874                             SQ_SHARED_QUEUE);
875                         flags &= ~LK_INTERLOCK;
876                         if (error) {
877                                 LOCK_LOG3(lk,
878                                     "%s: interrupted sleep for %p with %d",
879                                     __func__, lk, error);
880                                 break;
881                         }
882                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
883                             __func__, lk);
884                 }
885                 if (error == 0) {
886 #ifdef LOCK_PROFILING
887                         lockmgr_note_shared_acquire(lk, contested, waittime,
888                             file, line, flags);
889 #else
890                         lockmgr_note_shared_acquire(lk, 0, 0, file, line,
891                             flags);
892 #endif
893                 }
894                 break;
895         case LK_UPGRADE:
896         case LK_TRYUPGRADE:
897                 _lockmgr_assert(lk, KA_SLOCKED, file, line);
898                 v = lk->lk_lock;
899                 x = v & LK_ALL_WAITERS;
900                 v &= LK_EXCLUSIVE_SPINNERS;
901
902                 /*
903                  * Try to switch from one shared lock to an exclusive one.
904                  * We need to preserve waiters flags during the operation.
905                  */
906                 if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
907                     tid | x)) {
908                         LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
909                             line);
910                         WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
911                             LK_TRYWIT(flags), file, line);
912                         TD_SLOCKS_DEC(curthread);
913                         break;
914                 }
915
916                 /*
917                  * In LK_TRYUPGRADE mode, do not drop the lock,
918                  * returning EBUSY instead.
919                  */
920                 if (op == LK_TRYUPGRADE) {
921                         LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
922                             __func__, lk);
923                         error = EBUSY;
924                         break;
925                 }
926
927                 /*
928                  * We have been unable to succeed in upgrading, so just
929                  * give up the shared lock.
930                  */
931                 wakeup_swapper |= wakeupshlk(lk, file, line);
932
933                 /* FALLTHROUGH */
934         case LK_EXCLUSIVE:
935                 if (LK_CAN_WITNESS(flags))
936                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
937                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
938                             ilk : NULL);
939
940                 /*
941                  * If curthread already holds the lock and this one is
942                  * allowed to recurse, simply recurse on it.
943                  */
944                 if (lockmgr_xlocked(lk)) {
945                         if ((flags & LK_CANRECURSE) == 0 &&
946                             (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
947
948                                 /*
949                                  * If the lock is expected to not panic just
950                                  * give up and return.
951                                  */
952                                 if (LK_TRYOP(flags)) {
953                                         LOCK_LOG2(lk,
954                                             "%s: %p fails the try operation",
955                                             __func__, lk);
956                                         error = EBUSY;
957                                         break;
958                                 }
959                                 if (flags & LK_INTERLOCK)
960                                         class->lc_unlock(ilk);
961                 panic("%s: recursing on non recursive lockmgr %s @ %s:%d\n",
962                                     __func__, iwmesg, file, line);
963                         }
964                         lk->lk_recurse++;
965                         LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
966                         LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
967                             lk->lk_recurse, file, line);
968                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
969                             LK_TRYWIT(flags), file, line);
970                         TD_LOCKS_INC(curthread);
971                         break;
972                 }
973
974                 for (;;) {
975                         if (lk->lk_lock == LK_UNLOCKED &&
976                             atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
977                                 break;
978 #ifdef HWPMC_HOOKS
979                         PMC_SOFT_CALL( , , lock, failed);
980 #endif
981                         lock_profile_obtain_lock_failed(&lk->lock_object,
982                             &contested, &waittime);
983
984                         /*
985                          * If the lock is expected to not sleep just give up
986                          * and return.
987                          */
988                         if (LK_TRYOP(flags)) {
989                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
990                                     __func__, lk);
991                                 error = EBUSY;
992                                 break;
993                         }
994
995 #ifdef ADAPTIVE_LOCKMGRS
996                         /*
997                          * If the owner is running on another CPU, spin until
998                          * the owner stops running or the state of the lock
999                          * changes.
1000                          */
1001                         x = lk->lk_lock;
1002                         if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
1003                             LK_HOLDER(x) != LK_KERNPROC) {
1004                                 owner = (struct thread *)LK_HOLDER(x);
1005                                 if (LOCK_LOG_TEST(&lk->lock_object, 0))
1006                                         CTR3(KTR_LOCK,
1007                                             "%s: spinning on %p held by %p",
1008                                             __func__, lk, owner);
1009                                 KTR_STATE1(KTR_SCHED, "thread",
1010                                     sched_tdname(td), "spinning",
1011                                     "lockname:\"%s\"", lk->lock_object.lo_name);
1012
1013                                 /*
1014                                  * If we are holding also an interlock drop it
1015                                  * in order to avoid a deadlock if the lockmgr
1016                                  * owner is adaptively spinning on the
1017                                  * interlock itself.
1018                                  */
1019                                 if (flags & LK_INTERLOCK) {
1020                                         class->lc_unlock(ilk);
1021                                         flags &= ~LK_INTERLOCK;
1022                                 }
1023                                 GIANT_SAVE();
1024                                 while (LK_HOLDER(lk->lk_lock) ==
1025                                     (uintptr_t)owner && TD_IS_RUNNING(owner))
1026                                         cpu_spinwait();
1027                                 KTR_STATE0(KTR_SCHED, "thread",
1028                                     sched_tdname(td), "running");
1029                                 GIANT_RESTORE();
1030                                 continue;
1031                         } else if (LK_CAN_ADAPT(lk, flags) &&
1032                             (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
1033                             spintries < alk_retries) {
1034                                 if ((x & LK_EXCLUSIVE_SPINNERS) == 0 &&
1035                                     !atomic_cmpset_ptr(&lk->lk_lock, x,
1036                                     x | LK_EXCLUSIVE_SPINNERS))
1037                                         continue;
1038                                 KTR_STATE1(KTR_SCHED, "thread",
1039                                     sched_tdname(td), "spinning",
1040                                     "lockname:\"%s\"", lk->lock_object.lo_name);
1041                                 if (flags & LK_INTERLOCK) {
1042                                         class->lc_unlock(ilk);
1043                                         flags &= ~LK_INTERLOCK;
1044                                 }
1045                                 GIANT_SAVE();
1046                                 spintries++;
1047                                 for (i = 0; i < alk_loops; i++) {
1048                                         if (LOCK_LOG_TEST(&lk->lock_object, 0))
1049                                                 CTR4(KTR_LOCK,
1050                                     "%s: shared spinning on %p with %u and %u",
1051                                                     __func__, lk, spintries, i);
1052                                         if ((lk->lk_lock &
1053                                             LK_EXCLUSIVE_SPINNERS) == 0)
1054                                                 break;
1055                                         cpu_spinwait();
1056                                 }
1057                                 KTR_STATE0(KTR_SCHED, "thread",
1058                                     sched_tdname(td), "running");
1059                                 GIANT_RESTORE();
1060                                 if (i != alk_loops)
1061                                         continue;
1062                         }
1063 #endif
1064
1065                         /*
1066                          * Acquire the sleepqueue chain lock because we
1067                          * probabilly will need to manipulate waiters flags.
1068                          */
1069                         sleepq_lock(&lk->lock_object);
1070                         x = lk->lk_lock;
1071
1072                         /*
1073                          * if the lock has been released while we spun on
1074                          * the sleepqueue chain lock just try again.
1075                          */
1076                         if (x == LK_UNLOCKED) {
1077                                 sleepq_release(&lk->lock_object);
1078                                 continue;
1079                         }
1080
1081 #ifdef ADAPTIVE_LOCKMGRS
1082                         /*
1083                          * The current lock owner might have started executing
1084                          * on another CPU (or the lock could have changed
1085                          * owner) while we were waiting on the turnstile
1086                          * chain lock.  If so, drop the turnstile lock and try
1087                          * again.
1088                          */
1089                         if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
1090                             LK_HOLDER(x) != LK_KERNPROC) {
1091                                 owner = (struct thread *)LK_HOLDER(x);
1092                                 if (TD_IS_RUNNING(owner)) {
1093                                         sleepq_release(&lk->lock_object);
1094                                         continue;
1095                                 }
1096                         }
1097 #endif
1098
1099                         /*
1100                          * The lock can be in the state where there is a
1101                          * pending queue of waiters, but still no owner.
1102                          * This happens when the lock is contested and an
1103                          * owner is going to claim the lock.
1104                          * If curthread is the one successfully acquiring it
1105                          * claim lock ownership and return, preserving waiters
1106                          * flags.
1107                          */
1108                         v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1109                         if ((x & ~v) == LK_UNLOCKED) {
1110                                 v &= ~LK_EXCLUSIVE_SPINNERS;
1111                                 if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
1112                                     tid | v)) {
1113                                         sleepq_release(&lk->lock_object);
1114                                         LOCK_LOG2(lk,
1115                                             "%s: %p claimed by a new writer",
1116                                             __func__, lk);
1117                                         break;
1118                                 }
1119                                 sleepq_release(&lk->lock_object);
1120                                 continue;
1121                         }
1122
1123                         /*
1124                          * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1125                          * fail, loop back and retry.
1126                          */
1127                         if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1128                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1129                                     x | LK_EXCLUSIVE_WAITERS)) {
1130                                         sleepq_release(&lk->lock_object);
1131                                         continue;
1132                                 }
1133                                 LOCK_LOG2(lk, "%s: %p set excl waiters flag",
1134                                     __func__, lk);
1135                         }
1136
1137                         /*
1138                          * As far as we have been unable to acquire the
1139                          * exclusive lock and the exclusive waiters flag
1140                          * is set, we will sleep.
1141                          */
1142                         error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
1143                             SQ_EXCLUSIVE_QUEUE);
1144                         flags &= ~LK_INTERLOCK;
1145                         if (error) {
1146                                 LOCK_LOG3(lk,
1147                                     "%s: interrupted sleep for %p with %d",
1148                                     __func__, lk, error);
1149                                 break;
1150                         }
1151                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1152                             __func__, lk);
1153                 }
1154                 if (error == 0) {
1155 #ifdef LOCK_PROFILING
1156                         lockmgr_note_exclusive_acquire(lk, contested, waittime,
1157                             file, line, flags);
1158 #else
1159                         lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
1160                             flags);
1161 #endif
1162                 }
1163                 break;
1164         case LK_DOWNGRADE:
1165                 _lockmgr_assert(lk, KA_XLOCKED, file, line);
1166                 LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
1167                 WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
1168
1169                 /*
1170                  * Panic if the lock is recursed.
1171                  */
1172                 if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1173                         if (flags & LK_INTERLOCK)
1174                                 class->lc_unlock(ilk);
1175                         panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
1176                             __func__, iwmesg, file, line);
1177                 }
1178                 TD_SLOCKS_INC(curthread);
1179
1180                 /*
1181                  * In order to preserve waiters flags, just spin.
1182                  */
1183                 for (;;) {
1184                         x = lk->lk_lock;
1185                         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1186                         x &= LK_ALL_WAITERS;
1187                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1188                             LK_SHARERS_LOCK(1) | x))
1189                                 break;
1190                         cpu_spinwait();
1191                 }
1192                 break;
1193         case LK_RELEASE:
1194                 _lockmgr_assert(lk, KA_LOCKED, file, line);
1195                 x = lk->lk_lock;
1196
1197                 if ((x & LK_SHARE) == 0) {
1198
1199                         /*
1200                          * As first option, treact the lock as if it has not
1201                          * any waiter.
1202                          * Fix-up the tid var if the lock has been disowned.
1203                          */
1204                         if (LK_HOLDER(x) == LK_KERNPROC)
1205                                 tid = LK_KERNPROC;
1206                         else {
1207                                 WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE,
1208                                     file, line);
1209                                 TD_LOCKS_DEC(curthread);
1210                         }
1211                         LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0,
1212                             lk->lk_recurse, file, line);
1213
1214                         /*
1215                          * The lock is held in exclusive mode.
1216                          * If the lock is recursed also, then unrecurse it.
1217                          */
1218                         if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1219                                 LOCK_LOG2(lk, "%s: %p unrecursing", __func__,
1220                                     lk);
1221                                 lk->lk_recurse--;
1222                                 break;
1223                         }
1224                         if (tid != LK_KERNPROC)
1225                                 lock_profile_release_lock(&lk->lock_object);
1226
1227                         if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid,
1228                             LK_UNLOCKED))
1229                                 break;
1230
1231                         sleepq_lock(&lk->lock_object);
1232                         x = lk->lk_lock;
1233                         v = LK_UNLOCKED;
1234
1235                         /*
1236                          * If the lock has exclusive waiters, give them
1237                          * preference in order to avoid deadlock with
1238                          * shared runners up.
1239                          * If interruptible sleeps left the exclusive queue
1240                          * empty avoid a starvation for the threads sleeping
1241                          * on the shared queue by giving them precedence
1242                          * and cleaning up the exclusive waiters bit anyway.
1243                          * Please note that lk_exslpfail count may be lying
1244                          * about the real number of waiters with the
1245                          * LK_SLEEPFAIL flag on because they may be used in
1246                          * conjunction with interruptible sleeps so
1247                          * lk_exslpfail might be considered an 'upper limit'
1248                          * bound, including the edge cases.
1249                          */
1250                         MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1251                         realexslp = sleepq_sleepcnt(&lk->lock_object,
1252                             SQ_EXCLUSIVE_QUEUE);
1253                         if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1254                                 if (lk->lk_exslpfail < realexslp) {
1255                                         lk->lk_exslpfail = 0;
1256                                         queue = SQ_EXCLUSIVE_QUEUE;
1257                                         v |= (x & LK_SHARED_WAITERS);
1258                                 } else {
1259                                         lk->lk_exslpfail = 0;
1260                                         LOCK_LOG2(lk,
1261                                         "%s: %p has only LK_SLEEPFAIL sleepers",
1262                                             __func__, lk);
1263                                         LOCK_LOG2(lk,
1264                         "%s: %p waking up threads on the exclusive queue",
1265                                             __func__, lk);
1266                                         wakeup_swapper =
1267                                             sleepq_broadcast(&lk->lock_object,
1268                                             SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1269                                         queue = SQ_SHARED_QUEUE;
1270                                 }
1271                         } else {
1272
1273                                 /*
1274                                  * Exclusive waiters sleeping with LK_SLEEPFAIL
1275                                  * on and using interruptible sleeps/timeout
1276                                  * may have left spourious lk_exslpfail counts
1277                                  * on, so clean it up anyway. 
1278                                  */
1279                                 lk->lk_exslpfail = 0;
1280                                 queue = SQ_SHARED_QUEUE;
1281                         }
1282
1283                         LOCK_LOG3(lk,
1284                             "%s: %p waking up threads on the %s queue",
1285                             __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1286                             "exclusive");
1287                         atomic_store_rel_ptr(&lk->lk_lock, v);
1288                         wakeup_swapper |= sleepq_broadcast(&lk->lock_object,
1289                             SLEEPQ_LK, 0, queue);
1290                         sleepq_release(&lk->lock_object);
1291                         break;
1292                 } else
1293                         wakeup_swapper = wakeupshlk(lk, file, line);
1294                 break;
1295         case LK_DRAIN:
1296                 if (LK_CAN_WITNESS(flags))
1297                         WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1298                             LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1299                             ilk : NULL);
1300
1301                 /*
1302                  * Trying to drain a lock we already own will result in a
1303                  * deadlock.
1304                  */
1305                 if (lockmgr_xlocked(lk)) {
1306                         if (flags & LK_INTERLOCK)
1307                                 class->lc_unlock(ilk);
1308                         panic("%s: draining %s with the lock held @ %s:%d\n",
1309                             __func__, iwmesg, file, line);
1310                 }
1311
1312                 for (;;) {
1313                         if (lk->lk_lock == LK_UNLOCKED &&
1314                             atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
1315                                 break;
1316
1317 #ifdef HWPMC_HOOKS
1318                         PMC_SOFT_CALL( , , lock, failed);
1319 #endif
1320                         lock_profile_obtain_lock_failed(&lk->lock_object,
1321                             &contested, &waittime);
1322
1323                         /*
1324                          * If the lock is expected to not sleep just give up
1325                          * and return.
1326                          */
1327                         if (LK_TRYOP(flags)) {
1328                                 LOCK_LOG2(lk, "%s: %p fails the try operation",
1329                                     __func__, lk);
1330                                 error = EBUSY;
1331                                 break;
1332                         }
1333
1334                         /*
1335                          * Acquire the sleepqueue chain lock because we
1336                          * probabilly will need to manipulate waiters flags.
1337                          */
1338                         sleepq_lock(&lk->lock_object);
1339                         x = lk->lk_lock;
1340
1341                         /*
1342                          * if the lock has been released while we spun on
1343                          * the sleepqueue chain lock just try again.
1344                          */
1345                         if (x == LK_UNLOCKED) {
1346                                 sleepq_release(&lk->lock_object);
1347                                 continue;
1348                         }
1349
1350                         v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1351                         if ((x & ~v) == LK_UNLOCKED) {
1352                                 v = (x & ~LK_EXCLUSIVE_SPINNERS);
1353
1354                                 /*
1355                                  * If interruptible sleeps left the exclusive
1356                                  * queue empty avoid a starvation for the
1357                                  * threads sleeping on the shared queue by
1358                                  * giving them precedence and cleaning up the
1359                                  * exclusive waiters bit anyway.
1360                                  * Please note that lk_exslpfail count may be
1361                                  * lying about the real number of waiters with
1362                                  * the LK_SLEEPFAIL flag on because they may
1363                                  * be used in conjunction with interruptible
1364                                  * sleeps so lk_exslpfail might be considered
1365                                  * an 'upper limit' bound, including the edge
1366                                  * cases.
1367                                  */
1368                                 if (v & LK_EXCLUSIVE_WAITERS) {
1369                                         queue = SQ_EXCLUSIVE_QUEUE;
1370                                         v &= ~LK_EXCLUSIVE_WAITERS;
1371                                 } else {
1372
1373                                         /*
1374                                          * Exclusive waiters sleeping with
1375                                          * LK_SLEEPFAIL on and using
1376                                          * interruptible sleeps/timeout may
1377                                          * have left spourious lk_exslpfail
1378                                          * counts on, so clean it up anyway.
1379                                          */
1380                                         MPASS(v & LK_SHARED_WAITERS);
1381                                         lk->lk_exslpfail = 0;
1382                                         queue = SQ_SHARED_QUEUE;
1383                                         v &= ~LK_SHARED_WAITERS;
1384                                 }
1385                                 if (queue == SQ_EXCLUSIVE_QUEUE) {
1386                                         realexslp =
1387                                             sleepq_sleepcnt(&lk->lock_object,
1388                                             SQ_EXCLUSIVE_QUEUE);
1389                                         if (lk->lk_exslpfail >= realexslp) {
1390                                                 lk->lk_exslpfail = 0;
1391                                                 queue = SQ_SHARED_QUEUE;
1392                                                 v &= ~LK_SHARED_WAITERS;
1393                                                 if (realexslp != 0) {
1394                                                         LOCK_LOG2(lk,
1395                                         "%s: %p has only LK_SLEEPFAIL sleepers",
1396                                                             __func__, lk);
1397                                                         LOCK_LOG2(lk,
1398                         "%s: %p waking up threads on the exclusive queue",
1399                                                             __func__, lk);
1400                                                         wakeup_swapper =
1401                                                             sleepq_broadcast(
1402                                                             &lk->lock_object,
1403                                                             SLEEPQ_LK, 0,
1404                                                             SQ_EXCLUSIVE_QUEUE);
1405                                                 }
1406                                         } else
1407                                                 lk->lk_exslpfail = 0;
1408                                 }
1409                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1410                                         sleepq_release(&lk->lock_object);
1411                                         continue;
1412                                 }
1413                                 LOCK_LOG3(lk,
1414                                 "%s: %p waking up all threads on the %s queue",
1415                                     __func__, lk, queue == SQ_SHARED_QUEUE ?
1416                                     "shared" : "exclusive");
1417                                 wakeup_swapper |= sleepq_broadcast(
1418                                     &lk->lock_object, SLEEPQ_LK, 0, queue);
1419
1420                                 /*
1421                                  * If shared waiters have been woken up we need
1422                                  * to wait for one of them to acquire the lock
1423                                  * before to set the exclusive waiters in
1424                                  * order to avoid a deadlock.
1425                                  */
1426                                 if (queue == SQ_SHARED_QUEUE) {
1427                                         for (v = lk->lk_lock;
1428                                             (v & LK_SHARE) && !LK_SHARERS(v);
1429                                             v = lk->lk_lock)
1430                                                 cpu_spinwait();
1431                                 }
1432                         }
1433
1434                         /*
1435                          * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1436                          * fail, loop back and retry.
1437                          */
1438                         if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1439                                 if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1440                                     x | LK_EXCLUSIVE_WAITERS)) {
1441                                         sleepq_release(&lk->lock_object);
1442                                         continue;
1443                                 }
1444                                 LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1445                                     __func__, lk);
1446                         }
1447
1448                         /*
1449                          * As far as we have been unable to acquire the
1450                          * exclusive lock and the exclusive waiters flag
1451                          * is set, we will sleep.
1452                          */
1453                         if (flags & LK_INTERLOCK) {
1454                                 class->lc_unlock(ilk);
1455                                 flags &= ~LK_INTERLOCK;
1456                         }
1457                         GIANT_SAVE();
1458                         sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1459                             SQ_EXCLUSIVE_QUEUE);
1460                         sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1461                         GIANT_RESTORE();
1462                         LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1463                             __func__, lk);
1464                 }
1465
1466                 if (error == 0) {
1467                         lock_profile_obtain_lock_success(&lk->lock_object,
1468                             contested, waittime, file, line);
1469                         LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1470                             lk->lk_recurse, file, line);
1471                         WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1472                             LK_TRYWIT(flags), file, line);
1473                         TD_LOCKS_INC(curthread);
1474                         STACK_SAVE(lk);
1475                 }
1476                 break;
1477         default:
1478                 if (flags & LK_INTERLOCK)
1479                         class->lc_unlock(ilk);
1480                 panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1481         }
1482
1483         if (flags & LK_INTERLOCK)
1484                 class->lc_unlock(ilk);
1485         if (wakeup_swapper)
1486                 kick_proc0();
1487
1488         return (error);
1489 }
1490
1491 void
1492 _lockmgr_disown(struct lock *lk, const char *file, int line)
1493 {
1494         uintptr_t tid, x;
1495
1496         if (SCHEDULER_STOPPED())
1497                 return;
1498
1499         tid = (uintptr_t)curthread;
1500         _lockmgr_assert(lk, KA_XLOCKED, file, line);
1501
1502         /*
1503          * Panic if the lock is recursed.
1504          */
1505         if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1506                 panic("%s: disown a recursed lockmgr @ %s:%d\n",
1507                     __func__,  file, line);
1508
1509         /*
1510          * If the owner is already LK_KERNPROC just skip the whole operation.
1511          */
1512         if (LK_HOLDER(lk->lk_lock) != tid)
1513                 return;
1514         lock_profile_release_lock(&lk->lock_object);
1515         LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1516         WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1517         TD_LOCKS_DEC(curthread);
1518         STACK_SAVE(lk);
1519
1520         /*
1521          * In order to preserve waiters flags, just spin.
1522          */
1523         for (;;) {
1524                 x = lk->lk_lock;
1525                 MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1526                 x &= LK_ALL_WAITERS;
1527                 if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1528                     LK_KERNPROC | x))
1529                         return;
1530                 cpu_spinwait();
1531         }
1532 }
1533
1534 void
1535 lockmgr_printinfo(const struct lock *lk)
1536 {
1537         struct thread *td;
1538         uintptr_t x;
1539
1540         if (lk->lk_lock == LK_UNLOCKED)
1541                 printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1542         else if (lk->lk_lock & LK_SHARE)
1543                 printf("lock type %s: SHARED (count %ju)\n",
1544                     lk->lock_object.lo_name,
1545                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1546         else {
1547                 td = lockmgr_xholder(lk);
1548                 if (td == (struct thread *)LK_KERNPROC)
1549                         printf("lock type %s: EXCL by KERNPROC\n",
1550                             lk->lock_object.lo_name);
1551                 else
1552                         printf("lock type %s: EXCL by thread %p "
1553                             "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1554                             td, td->td_proc->p_pid, td->td_proc->p_comm,
1555                             td->td_tid);
1556         }
1557
1558         x = lk->lk_lock;
1559         if (x & LK_EXCLUSIVE_WAITERS)
1560                 printf(" with exclusive waiters pending\n");
1561         if (x & LK_SHARED_WAITERS)
1562                 printf(" with shared waiters pending\n");
1563         if (x & LK_EXCLUSIVE_SPINNERS)
1564                 printf(" with exclusive spinners pending\n");
1565
1566         STACK_PRINT(lk);
1567 }
1568
1569 int
1570 lockstatus(const struct lock *lk)
1571 {
1572         uintptr_t v, x;
1573         int ret;
1574
1575         ret = LK_SHARED;
1576         x = lk->lk_lock;
1577         v = LK_HOLDER(x);
1578
1579         if ((x & LK_SHARE) == 0) {
1580                 if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1581                         ret = LK_EXCLUSIVE;
1582                 else
1583                         ret = LK_EXCLOTHER;
1584         } else if (x == LK_UNLOCKED)
1585                 ret = 0;
1586
1587         return (ret);
1588 }
1589
1590 #ifdef INVARIANT_SUPPORT
1591
1592 FEATURE(invariant_support,
1593     "Support for modules compiled with INVARIANTS option");
1594
1595 #ifndef INVARIANTS
1596 #undef  _lockmgr_assert
1597 #endif
1598
1599 void
1600 _lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1601 {
1602         int slocked = 0;
1603
1604         if (panicstr != NULL)
1605                 return;
1606         switch (what) {
1607         case KA_SLOCKED:
1608         case KA_SLOCKED | KA_NOTRECURSED:
1609         case KA_SLOCKED | KA_RECURSED:
1610                 slocked = 1;
1611         case KA_LOCKED:
1612         case KA_LOCKED | KA_NOTRECURSED:
1613         case KA_LOCKED | KA_RECURSED:
1614 #ifdef WITNESS
1615
1616                 /*
1617                  * We cannot trust WITNESS if the lock is held in exclusive
1618                  * mode and a call to lockmgr_disown() happened.
1619                  * Workaround this skipping the check if the lock is held in
1620                  * exclusive mode even for the KA_LOCKED case.
1621                  */
1622                 if (slocked || (lk->lk_lock & LK_SHARE)) {
1623                         witness_assert(&lk->lock_object, what, file, line);
1624                         break;
1625                 }
1626 #endif
1627                 if (lk->lk_lock == LK_UNLOCKED ||
1628                     ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1629                     (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1630                         panic("Lock %s not %slocked @ %s:%d\n",
1631                             lk->lock_object.lo_name, slocked ? "share" : "",
1632                             file, line);
1633
1634                 if ((lk->lk_lock & LK_SHARE) == 0) {
1635                         if (lockmgr_recursed(lk)) {
1636                                 if (what & KA_NOTRECURSED)
1637                                         panic("Lock %s recursed @ %s:%d\n",
1638                                             lk->lock_object.lo_name, file,
1639                                             line);
1640                         } else if (what & KA_RECURSED)
1641                                 panic("Lock %s not recursed @ %s:%d\n",
1642                                     lk->lock_object.lo_name, file, line);
1643                 }
1644                 break;
1645         case KA_XLOCKED:
1646         case KA_XLOCKED | KA_NOTRECURSED:
1647         case KA_XLOCKED | KA_RECURSED:
1648                 if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1649                         panic("Lock %s not exclusively locked @ %s:%d\n",
1650                             lk->lock_object.lo_name, file, line);
1651                 if (lockmgr_recursed(lk)) {
1652                         if (what & KA_NOTRECURSED)
1653                                 panic("Lock %s recursed @ %s:%d\n",
1654                                     lk->lock_object.lo_name, file, line);
1655                 } else if (what & KA_RECURSED)
1656                         panic("Lock %s not recursed @ %s:%d\n",
1657                             lk->lock_object.lo_name, file, line);
1658                 break;
1659         case KA_UNLOCKED:
1660                 if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1661                         panic("Lock %s exclusively locked @ %s:%d\n",
1662                             lk->lock_object.lo_name, file, line);
1663                 break;
1664         default:
1665                 panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1666                     line);
1667         }
1668 }
1669 #endif
1670
1671 #ifdef DDB
1672 int
1673 lockmgr_chain(struct thread *td, struct thread **ownerp)
1674 {
1675         struct lock *lk;
1676
1677         lk = td->td_wchan;
1678
1679         if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1680                 return (0);
1681         db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1682         if (lk->lk_lock & LK_SHARE)
1683                 db_printf("SHARED (count %ju)\n",
1684                     (uintmax_t)LK_SHARERS(lk->lk_lock));
1685         else
1686                 db_printf("EXCL\n");
1687         *ownerp = lockmgr_xholder(lk);
1688
1689         return (1);
1690 }
1691
1692 static void
1693 db_show_lockmgr(const struct lock_object *lock)
1694 {
1695         struct thread *td;
1696         const struct lock *lk;
1697
1698         lk = (const struct lock *)lock;
1699
1700         db_printf(" state: ");
1701         if (lk->lk_lock == LK_UNLOCKED)
1702                 db_printf("UNLOCKED\n");
1703         else if (lk->lk_lock & LK_SHARE)
1704                 db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1705         else {
1706                 td = lockmgr_xholder(lk);
1707                 if (td == (struct thread *)LK_KERNPROC)
1708                         db_printf("XLOCK: LK_KERNPROC\n");
1709                 else
1710                         db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1711                             td->td_tid, td->td_proc->p_pid,
1712                             td->td_proc->p_comm);
1713                 if (lockmgr_recursed(lk))
1714                         db_printf(" recursed: %d\n", lk->lk_recurse);
1715         }
1716         db_printf(" waiters: ");
1717         switch (lk->lk_lock & LK_ALL_WAITERS) {
1718         case LK_SHARED_WAITERS:
1719                 db_printf("shared\n");
1720                 break;
1721         case LK_EXCLUSIVE_WAITERS:
1722                 db_printf("exclusive\n");
1723                 break;
1724         case LK_ALL_WAITERS:
1725                 db_printf("shared and exclusive\n");
1726                 break;
1727         default:
1728                 db_printf("none\n");
1729         }
1730         db_printf(" spinners: ");
1731         if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1732                 db_printf("exclusive\n");
1733         else
1734                 db_printf("none\n");
1735 }
1736 #endif