]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_rwlock.c
add -n option to suppress clearing the build tree and add -DNO_CLEAN
[FreeBSD/FreeBSD.git] / sys / kern / kern_rwlock.c
1 /*-
2  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the author nor the names of any co-contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 /*
31  * Machine independent bits of reader/writer lock implementation.
32  */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include "opt_ddb.h"
38 #include "opt_no_adaptive_rwlocks.h"
39
40 #include <sys/param.h>
41 #include <sys/ktr.h>
42 #include <sys/kernel.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/proc.h>
46 #include <sys/rwlock.h>
47 #include <sys/sysctl.h>
48 #include <sys/systm.h>
49 #include <sys/turnstile.h>
50
51 #include <machine/cpu.h>
52
53 CTASSERT((RW_RECURSE & LO_CLASSFLAGS) == RW_RECURSE);
54
55 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
56 #define ADAPTIVE_RWLOCKS
57 #endif
58
59 #ifdef ADAPTIVE_RWLOCKS
60 static int rowner_retries = 10;
61 static int rowner_loops = 10000;
62 SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, "rwlock debugging");
63 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
64 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
65 #endif
66
67 #ifdef DDB
68 #include <ddb/ddb.h>
69
70 static void     db_show_rwlock(struct lock_object *lock);
71 #endif
72 static void     assert_rw(struct lock_object *lock, int what);
73 static void     lock_rw(struct lock_object *lock, int how);
74 static int      unlock_rw(struct lock_object *lock);
75
76 struct lock_class lock_class_rw = {
77         .lc_name = "rw",
78         .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
79         .lc_assert = assert_rw,
80 #ifdef DDB
81         .lc_ddb_show = db_show_rwlock,
82 #endif
83         .lc_lock = lock_rw,
84         .lc_unlock = unlock_rw,
85 };
86
87 /*
88  * Return a pointer to the owning thread if the lock is write-locked or
89  * NULL if the lock is unlocked or read-locked.
90  */
91 #define rw_wowner(rw)                                                   \
92         ((rw)->rw_lock & RW_LOCK_READ ? NULL :                          \
93             (struct thread *)RW_OWNER((rw)->rw_lock))
94
95 /*
96  * Returns if a write owner is recursed.  Write ownership is not assured
97  * here and should be previously checked.
98  */
99 #define rw_recursed(rw)         ((rw)->rw_recurse != 0)
100
101 /*
102  * Return true if curthread helds the lock.
103  */
104 #define rw_wlocked(rw)          (rw_wowner((rw)) == curthread)
105
106 /*
107  * Return a pointer to the owning thread for this lock who should receive
108  * any priority lent by threads that block on this lock.  Currently this
109  * is identical to rw_wowner().
110  */
111 #define rw_owner(rw)            rw_wowner(rw)
112
113 #ifndef INVARIANTS
114 #define _rw_assert(rw, what, file, line)
115 #endif
116
117 void
118 assert_rw(struct lock_object *lock, int what)
119 {
120
121         rw_assert((struct rwlock *)lock, what);
122 }
123
124 void
125 lock_rw(struct lock_object *lock, int how)
126 {
127         struct rwlock *rw;
128
129         rw = (struct rwlock *)lock;
130         if (how)
131                 rw_wlock(rw);
132         else
133                 rw_rlock(rw);
134 }
135
136 int
137 unlock_rw(struct lock_object *lock)
138 {
139         struct rwlock *rw;
140
141         rw = (struct rwlock *)lock;
142         rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
143         if (rw->rw_lock & RW_LOCK_READ) {
144                 rw_runlock(rw);
145                 return (0);
146         } else {
147                 rw_wunlock(rw);
148                 return (1);
149         }
150 }
151
152 void
153 rw_init_flags(struct rwlock *rw, const char *name, int opts)
154 {
155         int flags;
156
157         MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
158             RW_RECURSE)) == 0);
159
160         flags = LO_UPGRADABLE | LO_RECURSABLE;
161         if (opts & RW_DUPOK)
162                 flags |= LO_DUPOK;
163         if (opts & RW_NOPROFILE)
164                 flags |= LO_NOPROFILE;
165         if (!(opts & RW_NOWITNESS))
166                 flags |= LO_WITNESS;
167         if (opts & RW_QUIET)
168                 flags |= LO_QUIET;
169         flags |= opts & RW_RECURSE;
170
171         rw->rw_lock = RW_UNLOCKED;
172         rw->rw_recurse = 0;
173         lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
174 }
175
176 void
177 rw_destroy(struct rwlock *rw)
178 {
179
180         KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked"));
181         KASSERT(rw->rw_recurse == 0, ("rw lock still recursed"));
182         rw->rw_lock = RW_DESTROYED;
183         lock_destroy(&rw->lock_object);
184 }
185
186 void
187 rw_sysinit(void *arg)
188 {
189         struct rw_args *args = arg;
190
191         rw_init(args->ra_rw, args->ra_desc);
192 }
193
194 int
195 rw_wowned(struct rwlock *rw)
196 {
197
198         return (rw_wowner(rw) == curthread);
199 }
200
201 void
202 _rw_wlock(struct rwlock *rw, const char *file, int line)
203 {
204
205         MPASS(curthread != NULL);
206         KASSERT(rw->rw_lock != RW_DESTROYED,
207             ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
208         WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
209             line, NULL);
210         __rw_wlock(rw, curthread, file, line);
211         LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
212         WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
213         curthread->td_locks++;
214 }
215
216 int
217 _rw_try_wlock(struct rwlock *rw, const char *file, int line)
218 {
219         int rval;
220
221         KASSERT(rw->rw_lock != RW_DESTROYED,
222             ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
223
224         if (rw_wlocked(rw) && (rw->lock_object.lo_flags & RW_RECURSE) != 0) {
225                 rw->rw_recurse++;
226                 rval = 1;
227         } else
228                 rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED,
229                     (uintptr_t)curthread);
230
231         LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
232         if (rval) {
233                 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
234                     file, line);
235                 curthread->td_locks++;
236         }
237         return (rval);
238 }
239
240 void
241 _rw_wunlock(struct rwlock *rw, const char *file, int line)
242 {
243
244         MPASS(curthread != NULL);
245         KASSERT(rw->rw_lock != RW_DESTROYED,
246             ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
247         _rw_assert(rw, RA_WLOCKED, file, line);
248         curthread->td_locks--;
249         WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
250         LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
251             line);
252         if (!rw_recursed(rw))
253                 lock_profile_release_lock(&rw->lock_object);
254         __rw_wunlock(rw, curthread, file, line);
255 }
256 /*
257  * Determines whether a new reader can acquire a lock.  Succeeds if the
258  * reader already owns a read lock and the lock is locked for read to
259  * prevent deadlock from reader recursion.  Also succeeds if the lock
260  * is unlocked and has no writer waiters or spinners.  Failing otherwise
261  * prioritizes writers before readers.
262  */
263 #define RW_CAN_READ(_rw)                                                \
264     ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) &      \
265     (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) ==  \
266     RW_LOCK_READ)
267
268 void
269 _rw_rlock(struct rwlock *rw, const char *file, int line)
270 {
271         struct turnstile *ts;
272 #ifdef ADAPTIVE_RWLOCKS
273         volatile struct thread *owner;
274         int spintries = 0;
275         int i;
276 #endif
277         uint64_t waittime = 0;
278         int contested = 0;
279         uintptr_t v;
280
281         KASSERT(rw->rw_lock != RW_DESTROYED,
282             ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
283         KASSERT(rw_wowner(rw) != curthread,
284             ("%s (%s): wlock already held @ %s:%d", __func__,
285             rw->lock_object.lo_name, file, line));
286         WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
287
288         for (;;) {
289                 /*
290                  * Handle the easy case.  If no other thread has a write
291                  * lock, then try to bump up the count of read locks.  Note
292                  * that we have to preserve the current state of the
293                  * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
294                  * read lock, then rw_lock must have changed, so restart
295                  * the loop.  Note that this handles the case of a
296                  * completely unlocked rwlock since such a lock is encoded
297                  * as a read lock with no waiters.
298                  */
299                 v = rw->rw_lock;
300                 if (RW_CAN_READ(v)) {
301                         /*
302                          * The RW_LOCK_READ_WAITERS flag should only be set
303                          * if the lock has been unlocked and write waiters
304                          * were present.
305                          */
306                         if (atomic_cmpset_acq_ptr(&rw->rw_lock, v,
307                             v + RW_ONE_READER)) {
308                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
309                                         CTR4(KTR_LOCK,
310                                             "%s: %p succeed %p -> %p", __func__,
311                                             rw, (void *)v,
312                                             (void *)(v + RW_ONE_READER));
313                                 break;
314                         }
315                         cpu_spinwait();
316                         continue;
317                 }
318                 lock_profile_obtain_lock_failed(&rw->lock_object,
319                     &contested, &waittime);
320
321 #ifdef ADAPTIVE_RWLOCKS
322                 /*
323                  * If the owner is running on another CPU, spin until
324                  * the owner stops running or the state of the lock
325                  * changes.
326                  */
327                 if ((v & RW_LOCK_READ) == 0) {
328                         owner = (struct thread *)RW_OWNER(v);
329                         if (TD_IS_RUNNING(owner)) {
330                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
331                                         CTR3(KTR_LOCK,
332                                             "%s: spinning on %p held by %p",
333                                             __func__, rw, owner);
334                                 while ((struct thread*)RW_OWNER(rw->rw_lock) ==
335                                     owner && TD_IS_RUNNING(owner))
336                                         cpu_spinwait();
337                                 continue;
338                         }
339                 } else if (spintries < rowner_retries) {
340                         spintries++;
341                         for (i = 0; i < rowner_loops; i++) {
342                                 v = rw->rw_lock;
343                                 if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
344                                         break;
345                                 cpu_spinwait();
346                         }
347                         if (i != rowner_loops)
348                                 continue;
349                 }
350 #endif
351
352                 /*
353                  * Okay, now it's the hard case.  Some other thread already
354                  * has a write lock or there are write waiters present,
355                  * acquire the turnstile lock so we can begin the process
356                  * of blocking.
357                  */
358                 ts = turnstile_trywait(&rw->lock_object);
359
360                 /*
361                  * The lock might have been released while we spun, so
362                  * recheck its state and restart the loop if needed.
363                  */
364                 v = rw->rw_lock;
365                 if (RW_CAN_READ(v)) {
366                         turnstile_cancel(ts);
367                         cpu_spinwait();
368                         continue;
369                 }
370
371 #ifdef ADAPTIVE_RWLOCKS
372                 /*
373                  * If the current owner of the lock is executing on another
374                  * CPU quit the hard path and try to spin.
375                  */
376                 if ((v & RW_LOCK_READ) == 0) {
377                         owner = (struct thread *)RW_OWNER(v);
378                         if (TD_IS_RUNNING(owner)) {
379                                 turnstile_cancel(ts);
380                                 cpu_spinwait();
381                                 continue;
382                         }
383                 }
384 #endif
385
386                 /*
387                  * The lock is held in write mode or it already has waiters.
388                  */
389                 MPASS(!RW_CAN_READ(v));
390
391                 /*
392                  * If the RW_LOCK_READ_WAITERS flag is already set, then
393                  * we can go ahead and block.  If it is not set then try
394                  * to set it.  If we fail to set it drop the turnstile
395                  * lock and restart the loop.
396                  */
397                 if (!(v & RW_LOCK_READ_WAITERS)) {
398                         if (!atomic_cmpset_ptr(&rw->rw_lock, v,
399                             v | RW_LOCK_READ_WAITERS)) {
400                                 turnstile_cancel(ts);
401                                 cpu_spinwait();
402                                 continue;
403                         }
404                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
405                                 CTR2(KTR_LOCK, "%s: %p set read waiters flag",
406                                     __func__, rw);
407                 }
408
409                 /*
410                  * We were unable to acquire the lock and the read waiters
411                  * flag is set, so we must block on the turnstile.
412                  */
413                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
414                         CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
415                             rw);
416                 turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
417                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
418                         CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
419                             __func__, rw);
420         }
421
422         /*
423          * TODO: acquire "owner of record" here.  Here be turnstile dragons
424          * however.  turnstiles don't like owners changing between calls to
425          * turnstile_wait() currently.
426          */
427         lock_profile_obtain_lock_success( &rw->lock_object, contested,
428             waittime, file, line);
429         LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
430         WITNESS_LOCK(&rw->lock_object, 0, file, line);
431         curthread->td_locks++;
432         curthread->td_rw_rlocks++;
433 }
434
435 int
436 _rw_try_rlock(struct rwlock *rw, const char *file, int line)
437 {
438         uintptr_t x;
439
440         for (;;) {
441                 x = rw->rw_lock;
442                 KASSERT(rw->rw_lock != RW_DESTROYED,
443                     ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
444                 if (!(x & RW_LOCK_READ))
445                         break;
446                 if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) {
447                         LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
448                             line);
449                         WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
450                         curthread->td_locks++;
451                         curthread->td_rw_rlocks++;
452                         return (1);
453                 }
454         }
455
456         LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
457         return (0);
458 }
459
460 void
461 _rw_runlock(struct rwlock *rw, const char *file, int line)
462 {
463         struct turnstile *ts;
464         uintptr_t x, v, queue;
465
466         KASSERT(rw->rw_lock != RW_DESTROYED,
467             ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
468         _rw_assert(rw, RA_RLOCKED, file, line);
469         curthread->td_locks--;
470         curthread->td_rw_rlocks--;
471         WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
472         LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
473
474         /* TODO: drop "owner of record" here. */
475
476         for (;;) {
477                 /*
478                  * See if there is more than one read lock held.  If so,
479                  * just drop one and return.
480                  */
481                 x = rw->rw_lock;
482                 if (RW_READERS(x) > 1) {
483                         if (atomic_cmpset_ptr(&rw->rw_lock, x,
484                             x - RW_ONE_READER)) {
485                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
486                                         CTR4(KTR_LOCK,
487                                             "%s: %p succeeded %p -> %p",
488                                             __func__, rw, (void *)x,
489                                             (void *)(x - RW_ONE_READER));
490                                 break;
491                         }
492                         continue;
493                 }
494                 /*
495                  * If there aren't any waiters for a write lock, then try
496                  * to drop it quickly.
497                  */
498                 if (!(x & RW_LOCK_WAITERS)) {
499                         MPASS((x & ~RW_LOCK_WRITE_SPINNER) ==
500                             RW_READERS_LOCK(1));
501                         if (atomic_cmpset_ptr(&rw->rw_lock, x, RW_UNLOCKED)) {
502                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
503                                         CTR2(KTR_LOCK, "%s: %p last succeeded",
504                                             __func__, rw);
505                                 break;
506                         }
507                         continue;
508                 }
509                 /*
510                  * Ok, we know we have waiters and we think we are the
511                  * last reader, so grab the turnstile lock.
512                  */
513                 turnstile_chain_lock(&rw->lock_object);
514                 v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
515                 MPASS(v & RW_LOCK_WAITERS);
516
517                 /*
518                  * Try to drop our lock leaving the lock in a unlocked
519                  * state.
520                  *
521                  * If you wanted to do explicit lock handoff you'd have to
522                  * do it here.  You'd also want to use turnstile_signal()
523                  * and you'd have to handle the race where a higher
524                  * priority thread blocks on the write lock before the
525                  * thread you wakeup actually runs and have the new thread
526                  * "steal" the lock.  For now it's a lot simpler to just
527                  * wakeup all of the waiters.
528                  *
529                  * As above, if we fail, then another thread might have
530                  * acquired a read lock, so drop the turnstile lock and
531                  * restart.
532                  */
533                 x = RW_UNLOCKED;
534                 if (v & RW_LOCK_WRITE_WAITERS) {
535                         queue = TS_EXCLUSIVE_QUEUE;
536                         x |= (v & RW_LOCK_READ_WAITERS);
537                 } else
538                         queue = TS_SHARED_QUEUE;
539                 if (!atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
540                     x)) {
541                         turnstile_chain_unlock(&rw->lock_object);
542                         continue;
543                 }
544                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
545                         CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
546                             __func__, rw);
547
548                 /*
549                  * Ok.  The lock is released and all that's left is to
550                  * wake up the waiters.  Note that the lock might not be
551                  * free anymore, but in that case the writers will just
552                  * block again if they run before the new lock holder(s)
553                  * release the lock.
554                  */
555                 ts = turnstile_lookup(&rw->lock_object);
556                 MPASS(ts != NULL);
557                 turnstile_broadcast(ts, queue);
558                 turnstile_unpend(ts, TS_SHARED_LOCK);
559                 turnstile_chain_unlock(&rw->lock_object);
560                 break;
561         }
562         lock_profile_release_lock(&rw->lock_object);
563 }
564
565 /*
566  * This function is called when we are unable to obtain a write lock on the
567  * first try.  This means that at least one other thread holds either a
568  * read or write lock.
569  */
570 void
571 _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
572 {
573         struct turnstile *ts;
574 #ifdef ADAPTIVE_RWLOCKS
575         volatile struct thread *owner;
576         int spintries = 0;
577         int i;
578 #endif
579         uint64_t waittime = 0;
580         uintptr_t v, x;
581         int contested = 0;
582
583         if (rw_wlocked(rw)) {
584                 KASSERT(rw->lock_object.lo_flags & RW_RECURSE,
585                     ("%s: recursing but non-recursive rw %s @ %s:%d\n",
586                     __func__, rw->lock_object.lo_name, file, line));
587                 rw->rw_recurse++;
588                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
589                         CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
590                 return;
591         }
592
593         if (LOCK_LOG_TEST(&rw->lock_object, 0))
594                 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
595                     rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
596
597         while (!_rw_write_lock(rw, tid)) {
598                 lock_profile_obtain_lock_failed(&rw->lock_object,
599                     &contested, &waittime);
600 #ifdef ADAPTIVE_RWLOCKS
601                 /*
602                  * If the lock is write locked and the owner is
603                  * running on another CPU, spin until the owner stops
604                  * running or the state of the lock changes.
605                  */
606                 v = rw->rw_lock;
607                 owner = (struct thread *)RW_OWNER(v);
608                 if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
609                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
610                                 CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
611                                     __func__, rw, owner);
612                         while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
613                             TD_IS_RUNNING(owner))
614                                 cpu_spinwait();
615                         continue;
616                 }
617                 if ((v & RW_LOCK_READ) && RW_READERS(v) &&
618                     spintries < rowner_retries) {
619                         if (!(v & RW_LOCK_WRITE_SPINNER)) {
620                                 if (!atomic_cmpset_ptr(&rw->rw_lock, v,
621                                     v | RW_LOCK_WRITE_SPINNER)) {
622                                         cpu_spinwait();
623                                         continue;
624                                 }
625                         }
626                         spintries++;
627                         for (i = 0; i < rowner_loops; i++) {
628                                 if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0)
629                                         break;
630                                 cpu_spinwait();
631                         }
632                         if (i != rowner_loops)
633                                 continue;
634                 }
635 #endif
636                 ts = turnstile_trywait(&rw->lock_object);
637                 v = rw->rw_lock;
638
639 #ifdef ADAPTIVE_RWLOCKS
640                 /*
641                  * If the current owner of the lock is executing on another
642                  * CPU quit the hard path and try to spin.
643                  */
644                 if (!(v & RW_LOCK_READ)) {
645                         owner = (struct thread *)RW_OWNER(v);
646                         if (TD_IS_RUNNING(owner)) {
647                                 turnstile_cancel(ts);
648                                 cpu_spinwait();
649                                 continue;
650                         }
651                 }
652 #endif
653                 /*
654                  * Check for the waiters flags about this rwlock.
655                  * If the lock was released, without maintain any pending
656                  * waiters queue, simply try to acquire it.
657                  * If a pending waiters queue is present, claim the lock
658                  * ownership and maintain the pending queue.
659                  */
660                 x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
661                 if ((v & ~x) == RW_UNLOCKED) {
662                         x &= ~RW_LOCK_WRITE_SPINNER;
663                         if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) {
664                                 if (x)
665                                         turnstile_claim(ts);
666                                 else
667                                         turnstile_cancel(ts);
668                                 break;
669                         }
670                         turnstile_cancel(ts);
671                         cpu_spinwait();
672                         continue;
673                 }
674                 /*
675                  * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
676                  * set it.  If we fail to set it, then loop back and try
677                  * again.
678                  */
679                 if (!(v & RW_LOCK_WRITE_WAITERS)) {
680                         if (!atomic_cmpset_ptr(&rw->rw_lock, v,
681                             v | RW_LOCK_WRITE_WAITERS)) {
682                                 turnstile_cancel(ts);
683                                 cpu_spinwait();
684                                 continue;
685                         }
686                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
687                                 CTR2(KTR_LOCK, "%s: %p set write waiters flag",
688                                     __func__, rw);
689                 }
690                 /*
691                  * We were unable to acquire the lock and the write waiters
692                  * flag is set, so we must block on the turnstile.
693                  */
694                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
695                         CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
696                             rw);
697                 turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
698                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
699                         CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
700                             __func__, rw);
701 #ifdef ADAPTIVE_RWLOCKS
702                 spintries = 0;
703 #endif
704         }
705         lock_profile_obtain_lock_success(&rw->lock_object, contested, waittime,
706             file, line);
707 }
708
709 /*
710  * This function is called if the first try at releasing a write lock failed.
711  * This means that one of the 2 waiter bits must be set indicating that at
712  * least one thread is waiting on this lock.
713  */
714 void
715 _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
716 {
717         struct turnstile *ts;
718         uintptr_t v;
719         int queue;
720
721         if (rw_wlocked(rw) && rw_recursed(rw)) {
722                 rw->rw_recurse--;
723                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
724                         CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
725                 return;
726         }
727         v = rw->rw_lock;
728
729         KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
730             ("%s: neither of the waiter flags are set", __func__));
731
732         if (LOCK_LOG_TEST(&rw->lock_object, 0))
733                 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
734
735         turnstile_chain_lock(&rw->lock_object);
736         ts = turnstile_lookup(&rw->lock_object);
737         MPASS(ts != NULL);
738
739         /*
740          * Use the same algo as sx locks for now.  Prefer waking up shared
741          * waiters if we have any over writers.  This is probably not ideal.
742          *
743          * 'v' is the value we are going to write back to rw_lock.  If we
744          * have waiters on both queues, we need to preserve the state of
745          * the waiter flag for the queue we don't wake up.  For now this is
746          * hardcoded for the algorithm mentioned above.
747          *
748          * In the case of both readers and writers waiting we wakeup the
749          * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
750          * new writer comes in before a reader it will claim the lock up
751          * above.  There is probably a potential priority inversion in
752          * there that could be worked around either by waking both queues
753          * of waiters or doing some complicated lock handoff gymnastics.
754          */
755         v = RW_UNLOCKED;
756         if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) {
757                 queue = TS_EXCLUSIVE_QUEUE;
758                 v |= (rw->rw_lock & RW_LOCK_READ_WAITERS);
759         } else
760                 queue = TS_SHARED_QUEUE;
761
762         /* Wake up all waiters for the specific queue. */
763         if (LOCK_LOG_TEST(&rw->lock_object, 0))
764                 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
765                     queue == TS_SHARED_QUEUE ? "read" : "write");
766         turnstile_broadcast(ts, queue);
767         atomic_store_rel_ptr(&rw->rw_lock, v);
768         turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
769         turnstile_chain_unlock(&rw->lock_object);
770 }
771
772 /*
773  * Attempt to do a non-blocking upgrade from a read lock to a write
774  * lock.  This will only succeed if this thread holds a single read
775  * lock.  Returns true if the upgrade succeeded and false otherwise.
776  */
777 int
778 _rw_try_upgrade(struct rwlock *rw, const char *file, int line)
779 {
780         uintptr_t v, x, tid;
781         struct turnstile *ts;
782         int success;
783
784         KASSERT(rw->rw_lock != RW_DESTROYED,
785             ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
786         _rw_assert(rw, RA_RLOCKED, file, line);
787
788         /*
789          * Attempt to switch from one reader to a writer.  If there
790          * are any write waiters, then we will have to lock the
791          * turnstile first to prevent races with another writer
792          * calling turnstile_wait() before we have claimed this
793          * turnstile.  So, do the simple case of no waiters first.
794          */
795         tid = (uintptr_t)curthread;
796         success = 0;
797         for (;;) {
798                 v = rw->rw_lock;
799                 if (RW_READERS(v) > 1)
800                         break;
801                 if (!(v & RW_LOCK_WAITERS)) {
802                         success = atomic_cmpset_ptr(&rw->rw_lock, v, tid);
803                         if (!success)
804                                 continue;
805                         break;
806                 }
807
808                 /*
809                  * Ok, we think we have waiters, so lock the turnstile.
810                  */
811                 ts = turnstile_trywait(&rw->lock_object);
812                 v = rw->rw_lock;
813                 if (RW_READERS(v) > 1) {
814                         turnstile_cancel(ts);
815                         break;
816                 }
817                 /*
818                  * Try to switch from one reader to a writer again.  This time
819                  * we honor the current state of the waiters flags.
820                  * If we obtain the lock with the flags set, then claim
821                  * ownership of the turnstile.
822                  */
823                 x = rw->rw_lock & RW_LOCK_WAITERS;
824                 success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x);
825                 if (success) {
826                         if (x)
827                                 turnstile_claim(ts);
828                         else
829                                 turnstile_cancel(ts);
830                         break;
831                 }
832                 turnstile_cancel(ts);
833         }
834         LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
835         if (success) {
836                 curthread->td_rw_rlocks--;
837                 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
838                     file, line);
839         }
840         return (success);
841 }
842
843 /*
844  * Downgrade a write lock into a single read lock.
845  */
846 void
847 _rw_downgrade(struct rwlock *rw, const char *file, int line)
848 {
849         struct turnstile *ts;
850         uintptr_t tid, v;
851         int rwait, wwait;
852
853         KASSERT(rw->rw_lock != RW_DESTROYED,
854             ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
855         _rw_assert(rw, RA_WLOCKED | RA_NOTRECURSED, file, line);
856 #ifndef INVARIANTS
857         if (rw_recursed(rw))
858                 panic("downgrade of a recursed lock");
859 #endif
860
861         WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
862
863         /*
864          * Convert from a writer to a single reader.  First we handle
865          * the easy case with no waiters.  If there are any waiters, we
866          * lock the turnstile and "disown" the lock.
867          */
868         tid = (uintptr_t)curthread;
869         if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
870                 goto out;
871
872         /*
873          * Ok, we think we have waiters, so lock the turnstile so we can
874          * read the waiter flags without any races.
875          */
876         turnstile_chain_lock(&rw->lock_object);
877         v = rw->rw_lock & RW_LOCK_WAITERS;
878         rwait = v & RW_LOCK_READ_WAITERS;
879         wwait = v & RW_LOCK_WRITE_WAITERS;
880         MPASS(rwait | wwait);
881
882         /*
883          * Downgrade from a write lock while preserving waiters flag
884          * and give up ownership of the turnstile.
885          */
886         ts = turnstile_lookup(&rw->lock_object);
887         MPASS(ts != NULL);
888         if (!wwait)
889                 v &= ~RW_LOCK_READ_WAITERS;
890         atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
891         /*
892          * Wake other readers if there are no writers pending.  Otherwise they
893          * won't be able to acquire the lock anyway.
894          */
895         if (rwait && !wwait) {
896                 turnstile_broadcast(ts, TS_SHARED_QUEUE);
897                 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
898         } else
899                 turnstile_disown(ts);
900         turnstile_chain_unlock(&rw->lock_object);
901 out:
902         curthread->td_rw_rlocks++;
903         LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
904 }
905
906 #ifdef INVARIANT_SUPPORT
907 #ifndef INVARIANTS
908 #undef _rw_assert
909 #endif
910
911 /*
912  * In the non-WITNESS case, rw_assert() can only detect that at least
913  * *some* thread owns an rlock, but it cannot guarantee that *this*
914  * thread owns an rlock.
915  */
916 void
917 _rw_assert(struct rwlock *rw, int what, const char *file, int line)
918 {
919
920         if (panicstr != NULL)
921                 return;
922         switch (what) {
923         case RA_LOCKED:
924         case RA_LOCKED | RA_RECURSED:
925         case RA_LOCKED | RA_NOTRECURSED:
926         case RA_RLOCKED:
927 #ifdef WITNESS
928                 witness_assert(&rw->lock_object, what, file, line);
929 #else
930                 /*
931                  * If some other thread has a write lock or we have one
932                  * and are asserting a read lock, fail.  Also, if no one
933                  * has a lock at all, fail.
934                  */
935                 if (rw->rw_lock == RW_UNLOCKED ||
936                     (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
937                     rw_wowner(rw) != curthread)))
938                         panic("Lock %s not %slocked @ %s:%d\n",
939                             rw->lock_object.lo_name, (what == RA_RLOCKED) ?
940                             "read " : "", file, line);
941
942                 if (!(rw->rw_lock & RW_LOCK_READ)) {
943                         if (rw_recursed(rw)) {
944                                 if (what & RA_NOTRECURSED)
945                                         panic("Lock %s recursed @ %s:%d\n",
946                                             rw->lock_object.lo_name, file,
947                                             line);
948                         } else if (what & RA_RECURSED)
949                                 panic("Lock %s not recursed @ %s:%d\n",
950                                     rw->lock_object.lo_name, file, line);
951                 }
952 #endif
953                 break;
954         case RA_WLOCKED:
955         case RA_WLOCKED | RA_RECURSED:
956         case RA_WLOCKED | RA_NOTRECURSED:
957                 if (rw_wowner(rw) != curthread)
958                         panic("Lock %s not exclusively locked @ %s:%d\n",
959                             rw->lock_object.lo_name, file, line);
960                 if (rw_recursed(rw)) {
961                         if (what & RA_NOTRECURSED)
962                                 panic("Lock %s recursed @ %s:%d\n",
963                                     rw->lock_object.lo_name, file, line);
964                 } else if (what & RA_RECURSED)
965                         panic("Lock %s not recursed @ %s:%d\n",
966                             rw->lock_object.lo_name, file, line);
967                 break;
968         case RA_UNLOCKED:
969 #ifdef WITNESS
970                 witness_assert(&rw->lock_object, what, file, line);
971 #else
972                 /*
973                  * If we hold a write lock fail.  We can't reliably check
974                  * to see if we hold a read lock or not.
975                  */
976                 if (rw_wowner(rw) == curthread)
977                         panic("Lock %s exclusively locked @ %s:%d\n",
978                             rw->lock_object.lo_name, file, line);
979 #endif
980                 break;
981         default:
982                 panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
983                     line);
984         }
985 }
986 #endif /* INVARIANT_SUPPORT */
987
988 #ifdef DDB
989 void
990 db_show_rwlock(struct lock_object *lock)
991 {
992         struct rwlock *rw;
993         struct thread *td;
994
995         rw = (struct rwlock *)lock;
996
997         db_printf(" state: ");
998         if (rw->rw_lock == RW_UNLOCKED)
999                 db_printf("UNLOCKED\n");
1000         else if (rw->rw_lock == RW_DESTROYED) {
1001                 db_printf("DESTROYED\n");
1002                 return;
1003         } else if (rw->rw_lock & RW_LOCK_READ)
1004                 db_printf("RLOCK: %ju locks\n",
1005                     (uintmax_t)(RW_READERS(rw->rw_lock)));
1006         else {
1007                 td = rw_wowner(rw);
1008                 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1009                     td->td_tid, td->td_proc->p_pid, td->td_name);
1010                 if (rw_recursed(rw))
1011                         db_printf(" recursed: %u\n", rw->rw_recurse);
1012         }
1013         db_printf(" waiters: ");
1014         switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
1015         case RW_LOCK_READ_WAITERS:
1016                 db_printf("readers\n");
1017                 break;
1018         case RW_LOCK_WRITE_WAITERS:
1019                 db_printf("writers\n");
1020                 break;
1021         case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
1022                 db_printf("readers and writers\n");
1023                 break;
1024         default:
1025                 db_printf("none\n");
1026                 break;
1027         }
1028 }
1029
1030 #endif