2 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * Machine independent bits of reader/writer lock implementation.
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
35 #include "opt_hwpmc_hooks.h"
36 #include "opt_kdtrace.h"
37 #include "opt_no_adaptive_rwlocks.h"
39 #include <sys/param.h>
42 #include <sys/kernel.h>
44 #include <sys/mutex.h>
46 #include <sys/rwlock.h>
47 #include <sys/sched.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/turnstile.h>
53 #include <machine/cpu.h>
55 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
56 #define ADAPTIVE_RWLOCKS
60 #include <sys/pmckern.h>
61 PMC_SOFT_DECLARE( , , lock, failed);
65 * Return the rwlock address when the lock cookie address is provided.
66 * This functionality assumes that struct rwlock* have a member named rw_lock.
68 #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock))
73 static void db_show_rwlock(const struct lock_object *lock);
75 static void assert_rw(const struct lock_object *lock, int what);
76 static void lock_rw(struct lock_object *lock, uintptr_t how);
78 static int owner_rw(const struct lock_object *lock, struct thread **owner);
80 static uintptr_t unlock_rw(struct lock_object *lock);
82 struct lock_class lock_class_rw = {
84 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
85 .lc_assert = assert_rw,
87 .lc_ddb_show = db_show_rwlock,
90 .lc_unlock = unlock_rw,
96 #ifdef ADAPTIVE_RWLOCKS
97 static int rowner_retries = 10;
98 static int rowner_loops = 10000;
99 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
101 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
102 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
104 static struct lock_delay_config rw_delay = {
111 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_initial, CTLFLAG_RW, &rw_delay.initial,
113 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_step, CTLFLAG_RW, &rw_delay.step,
115 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_min, CTLFLAG_RW, &rw_delay.min,
117 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
121 rw_delay_sysinit(void *dummy)
124 rw_delay.initial = mp_ncpus * 25;
125 rw_delay.step = (mp_ncpus * 25) / 2;
126 rw_delay.min = mp_ncpus * 5;
127 rw_delay.max = mp_ncpus * 25 * 10;
129 LOCK_DELAY_SYSINIT(rw_delay_sysinit);
133 * Return a pointer to the owning thread if the lock is write-locked or
134 * NULL if the lock is unlocked or read-locked.
136 #define rw_wowner(rw) \
137 ((rw)->rw_lock & RW_LOCK_READ ? NULL : \
138 (struct thread *)RW_OWNER((rw)->rw_lock))
141 * Returns if a write owner is recursed. Write ownership is not assured
142 * here and should be previously checked.
144 #define rw_recursed(rw) ((rw)->rw_recurse != 0)
147 * Return true if curthread helds the lock.
149 #define rw_wlocked(rw) (rw_wowner((rw)) == curthread)
152 * Return a pointer to the owning thread for this lock who should receive
153 * any priority lent by threads that block on this lock. Currently this
154 * is identical to rw_wowner().
156 #define rw_owner(rw) rw_wowner(rw)
159 #define __rw_assert(c, what, file, line)
163 assert_rw(const struct lock_object *lock, int what)
166 rw_assert((const struct rwlock *)lock, what);
170 lock_rw(struct lock_object *lock, uintptr_t how)
174 rw = (struct rwlock *)lock;
182 unlock_rw(struct lock_object *lock)
186 rw = (struct rwlock *)lock;
187 rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
188 if (rw->rw_lock & RW_LOCK_READ) {
199 owner_rw(const struct lock_object *lock, struct thread **owner)
201 const struct rwlock *rw = (const struct rwlock *)lock;
202 uintptr_t x = rw->rw_lock;
204 *owner = rw_wowner(rw);
205 return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) :
211 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
218 MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
219 RW_RECURSE | RW_NEW)) == 0);
220 ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
221 ("%s: rw_lock not aligned for %s: %p", __func__, name,
224 flags = LO_UPGRADABLE;
227 if (opts & RW_NOPROFILE)
228 flags |= LO_NOPROFILE;
229 if (!(opts & RW_NOWITNESS))
231 if (opts & RW_RECURSE)
232 flags |= LO_RECURSABLE;
238 lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
239 rw->rw_lock = RW_UNLOCKED;
244 _rw_destroy(volatile uintptr_t *c)
250 KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
251 KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
252 rw->rw_lock = RW_DESTROYED;
253 lock_destroy(&rw->lock_object);
257 rw_sysinit(void *arg)
259 struct rw_args *args = arg;
261 rw_init((struct rwlock *)args->ra_rw, args->ra_desc);
265 rw_sysinit_flags(void *arg)
267 struct rw_args_flags *args = arg;
269 rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
274 _rw_wowned(const volatile uintptr_t *c)
277 return (rw_wowner(rwlock2rw(c)) == curthread);
281 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
285 if (SCHEDULER_STOPPED())
290 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
291 ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
292 curthread, rw->lock_object.lo_name, file, line));
293 KASSERT(rw->rw_lock != RW_DESTROYED,
294 ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
295 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
297 __rw_wlock(rw, curthread, file, line);
298 LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
299 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
300 curthread->td_locks++;
304 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
309 if (SCHEDULER_STOPPED())
314 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
315 ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
316 curthread, rw->lock_object.lo_name, file, line));
317 KASSERT(rw->rw_lock != RW_DESTROYED,
318 ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
320 if (rw_wlocked(rw) &&
321 (rw->lock_object.lo_flags & LO_RECURSABLE) != 0) {
325 rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED,
326 (uintptr_t)curthread);
328 LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
330 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
332 if (!rw_recursed(rw))
333 LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE,
334 rw, 0, 0, file, line);
335 curthread->td_locks++;
341 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
345 if (SCHEDULER_STOPPED())
350 KASSERT(rw->rw_lock != RW_DESTROYED,
351 ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
352 __rw_assert(c, RA_WLOCKED, file, line);
353 WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
354 LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
356 if (!rw_recursed(rw))
357 LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_WUNLOCK_RELEASE, rw);
358 __rw_wunlock(rw, curthread, file, line);
359 curthread->td_locks--;
362 * Determines whether a new reader can acquire a lock. Succeeds if the
363 * reader already owns a read lock and the lock is locked for read to
364 * prevent deadlock from reader recursion. Also succeeds if the lock
365 * is unlocked and has no writer waiters or spinners. Failing otherwise
366 * prioritizes writers before readers.
368 #define RW_CAN_READ(_rw) \
369 ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) & \
370 (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) == \
374 __rw_rlock(volatile uintptr_t *c, const char *file, int line)
377 struct turnstile *ts;
378 #ifdef ADAPTIVE_RWLOCKS
379 volatile struct thread *owner;
383 #ifdef LOCK_PROFILING
384 uint64_t waittime = 0;
388 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
389 struct lock_delay_arg lda;
394 int64_t sleep_time = 0;
395 int64_t all_time = 0;
398 if (SCHEDULER_STOPPED())
401 #if defined(ADAPTIVE_RWLOCKS)
402 lock_delay_arg_init(&lda, &rw_delay);
403 #elif defined(KDTRACE_HOOKS)
404 lock_delay_arg_init(&lda, NULL);
408 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
409 ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
410 curthread, rw->lock_object.lo_name, file, line));
411 KASSERT(rw->rw_lock != RW_DESTROYED,
412 ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
413 KASSERT(rw_wowner(rw) != curthread,
414 ("rw_rlock: wlock already held for %s @ %s:%d",
415 rw->lock_object.lo_name, file, line));
416 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
419 all_time -= lockstat_nsecs(&rw->lock_object);
424 * Handle the easy case. If no other thread has a write
425 * lock, then try to bump up the count of read locks. Note
426 * that we have to preserve the current state of the
427 * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a
428 * read lock, then rw_lock must have changed, so restart
429 * the loop. Note that this handles the case of a
430 * completely unlocked rwlock since such a lock is encoded
431 * as a read lock with no waiters.
434 if (RW_CAN_READ(v)) {
436 * The RW_LOCK_READ_WAITERS flag should only be set
437 * if the lock has been unlocked and write waiters
440 if (atomic_cmpset_acq_ptr(&rw->rw_lock, v,
441 v + RW_ONE_READER)) {
442 if (LOCK_LOG_TEST(&rw->lock_object, 0))
444 "%s: %p succeed %p -> %p", __func__,
446 (void *)(v + RW_ONE_READER));
455 PMC_SOFT_CALL( , , lock, failed);
457 lock_profile_obtain_lock_failed(&rw->lock_object,
458 &contested, &waittime);
460 #ifdef ADAPTIVE_RWLOCKS
462 * If the owner is running on another CPU, spin until
463 * the owner stops running or the state of the lock
466 if ((v & RW_LOCK_READ) == 0) {
467 owner = (struct thread *)RW_OWNER(v);
468 if (TD_IS_RUNNING(owner)) {
469 if (LOCK_LOG_TEST(&rw->lock_object, 0))
471 "%s: spinning on %p held by %p",
472 __func__, rw, owner);
473 KTR_STATE1(KTR_SCHED, "thread",
474 sched_tdname(curthread), "spinning",
475 "lockname:\"%s\"", rw->lock_object.lo_name);
476 while ((struct thread*)RW_OWNER(rw->rw_lock) ==
477 owner && TD_IS_RUNNING(owner))
479 KTR_STATE0(KTR_SCHED, "thread",
480 sched_tdname(curthread), "running");
483 } else if (spintries < rowner_retries) {
485 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
486 "spinning", "lockname:\"%s\"",
487 rw->lock_object.lo_name);
488 for (i = 0; i < rowner_loops; i++) {
490 if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
495 lda.spin_cnt += rowner_loops - i;
497 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
499 if (i != rowner_loops)
505 * Okay, now it's the hard case. Some other thread already
506 * has a write lock or there are write waiters present,
507 * acquire the turnstile lock so we can begin the process
510 ts = turnstile_trywait(&rw->lock_object);
513 * The lock might have been released while we spun, so
514 * recheck its state and restart the loop if needed.
517 if (RW_CAN_READ(v)) {
518 turnstile_cancel(ts);
522 #ifdef ADAPTIVE_RWLOCKS
524 * The current lock owner might have started executing
525 * on another CPU (or the lock could have changed
526 * owners) while we were waiting on the turnstile
527 * chain lock. If so, drop the turnstile lock and try
530 if ((v & RW_LOCK_READ) == 0) {
531 owner = (struct thread *)RW_OWNER(v);
532 if (TD_IS_RUNNING(owner)) {
533 turnstile_cancel(ts);
540 * The lock is held in write mode or it already has waiters.
542 MPASS(!RW_CAN_READ(v));
545 * If the RW_LOCK_READ_WAITERS flag is already set, then
546 * we can go ahead and block. If it is not set then try
547 * to set it. If we fail to set it drop the turnstile
548 * lock and restart the loop.
550 if (!(v & RW_LOCK_READ_WAITERS)) {
551 if (!atomic_cmpset_ptr(&rw->rw_lock, v,
552 v | RW_LOCK_READ_WAITERS)) {
553 turnstile_cancel(ts);
556 if (LOCK_LOG_TEST(&rw->lock_object, 0))
557 CTR2(KTR_LOCK, "%s: %p set read waiters flag",
562 * We were unable to acquire the lock and the read waiters
563 * flag is set, so we must block on the turnstile.
565 if (LOCK_LOG_TEST(&rw->lock_object, 0))
566 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
569 sleep_time -= lockstat_nsecs(&rw->lock_object);
571 turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
573 sleep_time += lockstat_nsecs(&rw->lock_object);
576 if (LOCK_LOG_TEST(&rw->lock_object, 0))
577 CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
581 all_time += lockstat_nsecs(&rw->lock_object);
583 LOCKSTAT_RECORD4(LS_RW_RLOCK_BLOCK, rw, sleep_time,
584 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
585 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
587 /* Record only the loops spinning and not sleeping. */
588 if (lda.spin_cnt > sleep_cnt)
589 LOCKSTAT_RECORD4(LS_RW_RLOCK_SPIN, rw, all_time - sleep_time,
590 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
591 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
594 * TODO: acquire "owner of record" here. Here be turnstile dragons
595 * however. turnstiles don't like owners changing between calls to
596 * turnstile_wait() currently.
598 LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE, rw, contested,
599 waittime, file, line);
600 LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
601 WITNESS_LOCK(&rw->lock_object, 0, file, line);
602 curthread->td_locks++;
603 curthread->td_rw_rlocks++;
607 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
612 if (SCHEDULER_STOPPED())
617 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
618 ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
619 curthread, rw->lock_object.lo_name, file, line));
623 KASSERT(rw->rw_lock != RW_DESTROYED,
624 ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
625 if (!(x & RW_LOCK_READ))
627 if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) {
628 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
630 WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
631 LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE,
632 rw, 0, 0, file, line);
633 curthread->td_locks++;
634 curthread->td_rw_rlocks++;
639 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
644 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
647 struct turnstile *ts;
648 uintptr_t x, v, queue;
650 if (SCHEDULER_STOPPED())
655 KASSERT(rw->rw_lock != RW_DESTROYED,
656 ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
657 __rw_assert(c, RA_RLOCKED, file, line);
658 WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
659 LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
661 /* TODO: drop "owner of record" here. */
665 * See if there is more than one read lock held. If so,
666 * just drop one and return.
669 if (RW_READERS(x) > 1) {
670 if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
671 x - RW_ONE_READER)) {
672 if (LOCK_LOG_TEST(&rw->lock_object, 0))
674 "%s: %p succeeded %p -> %p",
675 __func__, rw, (void *)x,
676 (void *)(x - RW_ONE_READER));
682 * If there aren't any waiters for a write lock, then try
683 * to drop it quickly.
685 if (!(x & RW_LOCK_WAITERS)) {
686 MPASS((x & ~RW_LOCK_WRITE_SPINNER) ==
688 if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
690 if (LOCK_LOG_TEST(&rw->lock_object, 0))
691 CTR2(KTR_LOCK, "%s: %p last succeeded",
698 * Ok, we know we have waiters and we think we are the
699 * last reader, so grab the turnstile lock.
701 turnstile_chain_lock(&rw->lock_object);
702 v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
703 MPASS(v & RW_LOCK_WAITERS);
706 * Try to drop our lock leaving the lock in a unlocked
709 * If you wanted to do explicit lock handoff you'd have to
710 * do it here. You'd also want to use turnstile_signal()
711 * and you'd have to handle the race where a higher
712 * priority thread blocks on the write lock before the
713 * thread you wakeup actually runs and have the new thread
714 * "steal" the lock. For now it's a lot simpler to just
715 * wakeup all of the waiters.
717 * As above, if we fail, then another thread might have
718 * acquired a read lock, so drop the turnstile lock and
722 if (v & RW_LOCK_WRITE_WAITERS) {
723 queue = TS_EXCLUSIVE_QUEUE;
724 x |= (v & RW_LOCK_READ_WAITERS);
726 queue = TS_SHARED_QUEUE;
727 if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
729 turnstile_chain_unlock(&rw->lock_object);
732 if (LOCK_LOG_TEST(&rw->lock_object, 0))
733 CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
737 * Ok. The lock is released and all that's left is to
738 * wake up the waiters. Note that the lock might not be
739 * free anymore, but in that case the writers will just
740 * block again if they run before the new lock holder(s)
743 ts = turnstile_lookup(&rw->lock_object);
745 turnstile_broadcast(ts, queue);
746 turnstile_unpend(ts, TS_SHARED_LOCK);
747 turnstile_chain_unlock(&rw->lock_object);
750 LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw);
751 curthread->td_locks--;
752 curthread->td_rw_rlocks--;
756 * This function is called when we are unable to obtain a write lock on the
757 * first try. This means that at least one other thread holds either a
758 * read or write lock.
761 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
765 struct turnstile *ts;
766 #ifdef ADAPTIVE_RWLOCKS
767 volatile struct thread *owner;
772 #ifdef LOCK_PROFILING
773 uint64_t waittime = 0;
776 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
777 struct lock_delay_arg lda;
782 int64_t sleep_time = 0;
783 int64_t all_time = 0;
786 if (SCHEDULER_STOPPED())
789 #if defined(ADAPTIVE_RWLOCKS)
790 lock_delay_arg_init(&lda, &rw_delay);
791 #elif defined(KDTRACE_HOOKS)
792 lock_delay_arg_init(&lda, NULL);
796 if (rw_wlocked(rw)) {
797 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
798 ("%s: recursing but non-recursive rw %s @ %s:%d\n",
799 __func__, rw->lock_object.lo_name, file, line));
801 if (LOCK_LOG_TEST(&rw->lock_object, 0))
802 CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
806 if (LOCK_LOG_TEST(&rw->lock_object, 0))
807 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
808 rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
811 all_time -= lockstat_nsecs(&rw->lock_object);
815 if (rw->rw_lock == RW_UNLOCKED && _rw_write_lock(rw, tid))
821 PMC_SOFT_CALL( , , lock, failed);
823 lock_profile_obtain_lock_failed(&rw->lock_object,
824 &contested, &waittime);
825 #ifdef ADAPTIVE_RWLOCKS
827 * If the lock is write locked and the owner is
828 * running on another CPU, spin until the owner stops
829 * running or the state of the lock changes.
832 owner = (struct thread *)RW_OWNER(v);
833 if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
834 if (LOCK_LOG_TEST(&rw->lock_object, 0))
835 CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
836 __func__, rw, owner);
837 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
838 "spinning", "lockname:\"%s\"",
839 rw->lock_object.lo_name);
840 while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
841 TD_IS_RUNNING(owner))
843 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
847 if ((v & RW_LOCK_READ) && RW_READERS(v) &&
848 spintries < rowner_retries) {
849 if (!(v & RW_LOCK_WRITE_SPINNER)) {
850 if (!atomic_cmpset_ptr(&rw->rw_lock, v,
851 v | RW_LOCK_WRITE_SPINNER)) {
856 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
857 "spinning", "lockname:\"%s\"",
858 rw->lock_object.lo_name);
859 for (i = 0; i < rowner_loops; i++) {
860 if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0)
864 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
867 lda.spin_cnt += rowner_loops - i;
869 if (i != rowner_loops)
873 ts = turnstile_trywait(&rw->lock_object);
876 #ifdef ADAPTIVE_RWLOCKS
878 * The current lock owner might have started executing
879 * on another CPU (or the lock could have changed
880 * owners) while we were waiting on the turnstile
881 * chain lock. If so, drop the turnstile lock and try
884 if (!(v & RW_LOCK_READ)) {
885 owner = (struct thread *)RW_OWNER(v);
886 if (TD_IS_RUNNING(owner)) {
887 turnstile_cancel(ts);
893 * Check for the waiters flags about this rwlock.
894 * If the lock was released, without maintain any pending
895 * waiters queue, simply try to acquire it.
896 * If a pending waiters queue is present, claim the lock
897 * ownership and maintain the pending queue.
899 x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
900 if ((v & ~x) == RW_UNLOCKED) {
901 x &= ~RW_LOCK_WRITE_SPINNER;
902 if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) {
906 turnstile_cancel(ts);
909 turnstile_cancel(ts);
913 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
914 * set it. If we fail to set it, then loop back and try
917 if (!(v & RW_LOCK_WRITE_WAITERS)) {
918 if (!atomic_cmpset_ptr(&rw->rw_lock, v,
919 v | RW_LOCK_WRITE_WAITERS)) {
920 turnstile_cancel(ts);
923 if (LOCK_LOG_TEST(&rw->lock_object, 0))
924 CTR2(KTR_LOCK, "%s: %p set write waiters flag",
928 * We were unable to acquire the lock and the write waiters
929 * flag is set, so we must block on the turnstile.
931 if (LOCK_LOG_TEST(&rw->lock_object, 0))
932 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
935 sleep_time -= lockstat_nsecs(&rw->lock_object);
937 turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
939 sleep_time += lockstat_nsecs(&rw->lock_object);
942 if (LOCK_LOG_TEST(&rw->lock_object, 0))
943 CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
945 #ifdef ADAPTIVE_RWLOCKS
950 all_time += lockstat_nsecs(&rw->lock_object);
952 LOCKSTAT_RECORD4(LS_RW_WLOCK_BLOCK, rw, sleep_time,
953 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
954 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
956 /* Record only the loops spinning and not sleeping. */
957 if (lda.spin_cnt > sleep_cnt)
958 LOCKSTAT_RECORD4(LS_RW_WLOCK_SPIN, rw, all_time - sleep_time,
959 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
960 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
962 LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
963 waittime, file, line);
967 * This function is called if the first try at releasing a write lock failed.
968 * This means that one of the 2 waiter bits must be set indicating that at
969 * least one thread is waiting on this lock.
972 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
976 struct turnstile *ts;
980 if (SCHEDULER_STOPPED())
985 if (rw_wlocked(rw) && rw_recursed(rw)) {
987 if (LOCK_LOG_TEST(&rw->lock_object, 0))
988 CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
992 KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
993 ("%s: neither of the waiter flags are set", __func__));
995 if (LOCK_LOG_TEST(&rw->lock_object, 0))
996 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
998 turnstile_chain_lock(&rw->lock_object);
999 ts = turnstile_lookup(&rw->lock_object);
1003 * Use the same algo as sx locks for now. Prefer waking up shared
1004 * waiters if we have any over writers. This is probably not ideal.
1006 * 'v' is the value we are going to write back to rw_lock. If we
1007 * have waiters on both queues, we need to preserve the state of
1008 * the waiter flag for the queue we don't wake up. For now this is
1009 * hardcoded for the algorithm mentioned above.
1011 * In the case of both readers and writers waiting we wakeup the
1012 * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a
1013 * new writer comes in before a reader it will claim the lock up
1014 * above. There is probably a potential priority inversion in
1015 * there that could be worked around either by waking both queues
1016 * of waiters or doing some complicated lock handoff gymnastics.
1019 if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) {
1020 queue = TS_EXCLUSIVE_QUEUE;
1021 v |= (rw->rw_lock & RW_LOCK_READ_WAITERS);
1023 queue = TS_SHARED_QUEUE;
1025 /* Wake up all waiters for the specific queue. */
1026 if (LOCK_LOG_TEST(&rw->lock_object, 0))
1027 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
1028 queue == TS_SHARED_QUEUE ? "read" : "write");
1029 turnstile_broadcast(ts, queue);
1030 atomic_store_rel_ptr(&rw->rw_lock, v);
1031 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
1032 turnstile_chain_unlock(&rw->lock_object);
1036 * Attempt to do a non-blocking upgrade from a read lock to a write
1037 * lock. This will only succeed if this thread holds a single read
1038 * lock. Returns true if the upgrade succeeded and false otherwise.
1041 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
1044 uintptr_t v, x, tid;
1045 struct turnstile *ts;
1048 if (SCHEDULER_STOPPED())
1053 KASSERT(rw->rw_lock != RW_DESTROYED,
1054 ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
1055 __rw_assert(c, RA_RLOCKED, file, line);
1058 * Attempt to switch from one reader to a writer. If there
1059 * are any write waiters, then we will have to lock the
1060 * turnstile first to prevent races with another writer
1061 * calling turnstile_wait() before we have claimed this
1062 * turnstile. So, do the simple case of no waiters first.
1064 tid = (uintptr_t)curthread;
1068 if (RW_READERS(v) > 1)
1070 if (!(v & RW_LOCK_WAITERS)) {
1071 success = atomic_cmpset_ptr(&rw->rw_lock, v, tid);
1078 * Ok, we think we have waiters, so lock the turnstile.
1080 ts = turnstile_trywait(&rw->lock_object);
1082 if (RW_READERS(v) > 1) {
1083 turnstile_cancel(ts);
1087 * Try to switch from one reader to a writer again. This time
1088 * we honor the current state of the waiters flags.
1089 * If we obtain the lock with the flags set, then claim
1090 * ownership of the turnstile.
1092 x = rw->rw_lock & RW_LOCK_WAITERS;
1093 success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x);
1096 turnstile_claim(ts);
1098 turnstile_cancel(ts);
1101 turnstile_cancel(ts);
1103 LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
1105 curthread->td_rw_rlocks--;
1106 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
1108 LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, rw);
1114 * Downgrade a write lock into a single read lock.
1117 __rw_downgrade(volatile uintptr_t *c, const char *file, int line)
1120 struct turnstile *ts;
1124 if (SCHEDULER_STOPPED())
1129 KASSERT(rw->rw_lock != RW_DESTROYED,
1130 ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
1131 __rw_assert(c, RA_WLOCKED | RA_NOTRECURSED, file, line);
1133 if (rw_recursed(rw))
1134 panic("downgrade of a recursed lock");
1137 WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
1140 * Convert from a writer to a single reader. First we handle
1141 * the easy case with no waiters. If there are any waiters, we
1142 * lock the turnstile and "disown" the lock.
1144 tid = (uintptr_t)curthread;
1145 if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
1149 * Ok, we think we have waiters, so lock the turnstile so we can
1150 * read the waiter flags without any races.
1152 turnstile_chain_lock(&rw->lock_object);
1153 v = rw->rw_lock & RW_LOCK_WAITERS;
1154 rwait = v & RW_LOCK_READ_WAITERS;
1155 wwait = v & RW_LOCK_WRITE_WAITERS;
1156 MPASS(rwait | wwait);
1159 * Downgrade from a write lock while preserving waiters flag
1160 * and give up ownership of the turnstile.
1162 ts = turnstile_lookup(&rw->lock_object);
1165 v &= ~RW_LOCK_READ_WAITERS;
1166 atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
1168 * Wake other readers if there are no writers pending. Otherwise they
1169 * won't be able to acquire the lock anyway.
1171 if (rwait && !wwait) {
1172 turnstile_broadcast(ts, TS_SHARED_QUEUE);
1173 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
1175 turnstile_disown(ts);
1176 turnstile_chain_unlock(&rw->lock_object);
1178 curthread->td_rw_rlocks++;
1179 LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
1180 LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, rw);
1183 #ifdef INVARIANT_SUPPORT
1189 * In the non-WITNESS case, rw_assert() can only detect that at least
1190 * *some* thread owns an rlock, but it cannot guarantee that *this*
1191 * thread owns an rlock.
1194 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
1196 const struct rwlock *rw;
1198 if (panicstr != NULL)
1205 case RA_LOCKED | RA_RECURSED:
1206 case RA_LOCKED | RA_NOTRECURSED:
1208 case RA_RLOCKED | RA_RECURSED:
1209 case RA_RLOCKED | RA_NOTRECURSED:
1211 witness_assert(&rw->lock_object, what, file, line);
1214 * If some other thread has a write lock or we have one
1215 * and are asserting a read lock, fail. Also, if no one
1216 * has a lock at all, fail.
1218 if (rw->rw_lock == RW_UNLOCKED ||
1219 (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
1220 rw_wowner(rw) != curthread)))
1221 panic("Lock %s not %slocked @ %s:%d\n",
1222 rw->lock_object.lo_name, (what & RA_RLOCKED) ?
1223 "read " : "", file, line);
1225 if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
1226 if (rw_recursed(rw)) {
1227 if (what & RA_NOTRECURSED)
1228 panic("Lock %s recursed @ %s:%d\n",
1229 rw->lock_object.lo_name, file,
1231 } else if (what & RA_RECURSED)
1232 panic("Lock %s not recursed @ %s:%d\n",
1233 rw->lock_object.lo_name, file, line);
1238 case RA_WLOCKED | RA_RECURSED:
1239 case RA_WLOCKED | RA_NOTRECURSED:
1240 if (rw_wowner(rw) != curthread)
1241 panic("Lock %s not exclusively locked @ %s:%d\n",
1242 rw->lock_object.lo_name, file, line);
1243 if (rw_recursed(rw)) {
1244 if (what & RA_NOTRECURSED)
1245 panic("Lock %s recursed @ %s:%d\n",
1246 rw->lock_object.lo_name, file, line);
1247 } else if (what & RA_RECURSED)
1248 panic("Lock %s not recursed @ %s:%d\n",
1249 rw->lock_object.lo_name, file, line);
1253 witness_assert(&rw->lock_object, what, file, line);
1256 * If we hold a write lock fail. We can't reliably check
1257 * to see if we hold a read lock or not.
1259 if (rw_wowner(rw) == curthread)
1260 panic("Lock %s exclusively locked @ %s:%d\n",
1261 rw->lock_object.lo_name, file, line);
1265 panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
1269 #endif /* INVARIANT_SUPPORT */
1273 db_show_rwlock(const struct lock_object *lock)
1275 const struct rwlock *rw;
1278 rw = (const struct rwlock *)lock;
1280 db_printf(" state: ");
1281 if (rw->rw_lock == RW_UNLOCKED)
1282 db_printf("UNLOCKED\n");
1283 else if (rw->rw_lock == RW_DESTROYED) {
1284 db_printf("DESTROYED\n");
1286 } else if (rw->rw_lock & RW_LOCK_READ)
1287 db_printf("RLOCK: %ju locks\n",
1288 (uintmax_t)(RW_READERS(rw->rw_lock)));
1291 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1292 td->td_tid, td->td_proc->p_pid, td->td_name);
1293 if (rw_recursed(rw))
1294 db_printf(" recursed: %u\n", rw->rw_recurse);
1296 db_printf(" waiters: ");
1297 switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
1298 case RW_LOCK_READ_WAITERS:
1299 db_printf("readers\n");
1301 case RW_LOCK_WRITE_WAITERS:
1302 db_printf("writers\n");
1304 case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
1305 db_printf("readers and writers\n");
1308 db_printf("none\n");