]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_rwlock.c
- Move softdep from using a global worklist to per-mount worklists. This
[FreeBSD/FreeBSD.git] / sys / kern / kern_rwlock.c
1 /*-
2  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the author nor the names of any co-contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 /*
31  * Machine independent bits of reader/writer lock implementation.
32  */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include "opt_ddb.h"
38
39 #include <sys/param.h>
40 #include <sys/ktr.h>
41 #include <sys/lock.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/rwlock.h>
45 #include <sys/systm.h>
46 #include <sys/turnstile.h>
47
48 #include <machine/cpu.h>
49
50 #ifdef DDB
51 #include <ddb/ddb.h>
52
53 static void     db_show_rwlock(struct lock_object *lock);
54 #endif
55
56 struct lock_class lock_class_rw = {
57         "rw",
58         LC_SLEEPLOCK | LC_RECURSABLE /* | LC_UPGRADABLE */,
59 #ifdef DDB
60         db_show_rwlock
61 #endif
62 };
63
64 #define rw_owner(rw)                                                    \
65         ((rw)->rw_lock & RW_LOCK_READ ? NULL :                          \
66             (struct thread *)RW_OWNER((rw)->rw_lock))
67
68 #ifndef INVARIANTS
69 #define _rw_assert(rw, what, file, line)
70 #endif
71
72 void
73 rw_init(struct rwlock *rw, const char *name)
74 {
75
76         rw->rw_lock = RW_UNLOCKED;
77
78         lock_init(&rw->rw_object, &lock_class_rw, name, NULL, LO_WITNESS |
79             LO_RECURSABLE /* | LO_UPGRADABLE */);
80 }
81
82 void
83 rw_destroy(struct rwlock *rw)
84 {
85
86         KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked"));
87         lock_destroy(&rw->rw_object);
88 }
89
90 void
91 rw_sysinit(void *arg)
92 {
93         struct rw_args *args = arg;
94
95         rw_init(args->ra_rw, args->ra_desc);
96 }
97
98 void
99 _rw_wlock(struct rwlock *rw, const char *file, int line)
100 {
101
102         MPASS(curthread != NULL);
103         KASSERT(rw_owner(rw) != curthread,
104             ("%s (%s): wlock already held @ %s:%d", __func__,
105             rw->rw_object.lo_name, file, line));
106         WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
107             line);
108         __rw_wlock(rw, curthread, file, line);
109         LOCK_LOG_LOCK("WLOCK", &rw->rw_object, 0, 0, file, line);
110         WITNESS_LOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line);
111 }
112
113 void
114 _rw_wunlock(struct rwlock *rw, const char *file, int line)
115 {
116
117         MPASS(curthread != NULL);
118         _rw_assert(rw, RA_WLOCKED, file, line);
119         WITNESS_UNLOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line);
120         LOCK_LOG_LOCK("WUNLOCK", &rw->rw_object, 0, 0, file, line);
121         __rw_wunlock(rw, curthread, file, line);
122 }
123
124 void
125 _rw_rlock(struct rwlock *rw, const char *file, int line)
126 {
127         uintptr_t x;
128
129         KASSERT(rw_owner(rw) != curthread,
130             ("%s (%s): wlock already held @ %s:%d", __func__,
131             rw->rw_object.lo_name, file, line));
132         WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER, file, line);
133
134         /*
135          * Note that we don't make any attempt to try to block read
136          * locks once a writer has blocked on the lock.  The reason is
137          * that we currently allow for read locks to recurse and we
138          * don't keep track of all the holders of read locks.  Thus, if
139          * we were to block readers once a writer blocked and a reader
140          * tried to recurse on their reader lock after a writer had
141          * blocked we would end up in a deadlock since the reader would
142          * be blocked on the writer, and the writer would be blocked
143          * waiting for the reader to release its original read lock.
144          */
145         for (;;) {
146                 /*
147                  * Handle the easy case.  If no other thread has a write
148                  * lock, then try to bump up the count of read locks.  Note
149                  * that we have to preserve the current state of the
150                  * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
151                  * read lock, then rw_lock must have changed, so restart
152                  * the loop.  Note that this handles the case of a
153                  * completely unlocked rwlock since such a lock is encoded
154                  * as a read lock with no waiters.
155                  */
156                 x = rw->rw_lock;
157                 if (x & RW_LOCK_READ) {
158
159                         /*
160                          * The RW_LOCK_READ_WAITERS flag should only be set
161                          * if another thread currently holds a write lock,
162                          * and in that case RW_LOCK_READ should be clear.
163                          */
164                         MPASS((x & RW_LOCK_READ_WAITERS) == 0);
165                         if (atomic_cmpset_acq_ptr(&rw->rw_lock, x,
166                             x + RW_ONE_READER)) {
167                                 if (LOCK_LOG_TEST(&rw->rw_object, 0))
168                                         CTR4(KTR_LOCK,
169                                             "%s: %p succeed %p -> %p", __func__,
170                                             rw, (void *)x,
171                                             (void *)(x + RW_ONE_READER));
172                                 break;
173                         }
174                         continue;
175                 }
176
177                 /*
178                  * Okay, now it's the hard case.  Some other thread already
179                  * has a write lock, so acquire the turnstile lock so we can
180                  * begin the process of blocking.
181                  */
182                 turnstile_lock(&rw->rw_object);
183
184                 /*
185                  * The lock might have been released while we spun, so
186                  * recheck its state and restart the loop if there is no
187                  * longer a write lock.
188                  */
189                 x = rw->rw_lock;
190                 if (x & RW_LOCK_READ) {
191                         turnstile_release(&rw->rw_object);
192                         continue;
193                 }
194
195                 /*
196                  * Ok, it's still a write lock.  If the RW_LOCK_READ_WAITERS
197                  * flag is already set, then we can go ahead and block.  If
198                  * it is not set then try to set it.  If we fail to set it
199                  * drop the turnstile lock and restart the loop.
200                  */
201                 if (!(x & RW_LOCK_READ_WAITERS) &&
202                     !atomic_cmpset_ptr(&rw->rw_lock, x,
203                     x | RW_LOCK_READ_WAITERS)) {
204                         turnstile_release(&rw->rw_object);
205                         continue;
206                 }
207                 if (!(x & RW_LOCK_READ_WAITERS) &&
208                     LOCK_LOG_TEST(&rw->rw_object, 0))
209                         CTR2(KTR_LOCK, "%s: %p set read waiters flag", __func__,
210                                 rw);
211
212                 /*
213                  * We were unable to acquire the lock and the read waiters
214                  * flag is set, so we must block on the turnstile.
215                  */
216                 if (LOCK_LOG_TEST(&rw->rw_object, 0))
217                         CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
218                             rw);
219                 turnstile_wait(&rw->rw_object, rw_owner(rw), TS_SHARED_QUEUE);
220                 if (LOCK_LOG_TEST(&rw->rw_object, 0))
221                         CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
222                             __func__, rw);
223         }
224
225         /*
226          * TODO: acquire "owner of record" here.  Here be turnstile dragons
227          * however.  turnstiles don't like owners changing between calls to
228          * turnstile_wait() currently.
229          */
230
231         LOCK_LOG_LOCK("RLOCK", &rw->rw_object, 0, 0, file, line);
232         WITNESS_LOCK(&rw->rw_object, 0, file, line);
233 }
234
235 void
236 _rw_runlock(struct rwlock *rw, const char *file, int line)
237 {
238         struct turnstile *ts;
239         uintptr_t x;
240
241         _rw_assert(rw, RA_RLOCKED, file, line);
242         WITNESS_UNLOCK(&rw->rw_object, 0, file, line);
243         LOCK_LOG_LOCK("RUNLOCK", &rw->rw_object, 0, 0, file, line);
244
245         /* TODO: drop "owner of record" here. */
246
247         for (;;) {
248                 /*
249                  * See if there is more than one read lock held.  If so,
250                  * just drop one and return.
251                  */
252                 x = rw->rw_lock;
253                 if (RW_READERS(x) > 1) {
254                         if (atomic_cmpset_ptr(&rw->rw_lock, x,
255                             x - RW_ONE_READER)) {
256                                 if (LOCK_LOG_TEST(&rw->rw_object, 0))
257                                         CTR4(KTR_LOCK,
258                                             "%s: %p succeeded %p -> %p",
259                                             __func__, rw, (void *)x,
260                                             (void *)(x - RW_ONE_READER));
261                                 break;
262                         }
263                         continue;
264                 }
265
266                 /*
267                  * We should never have read waiters while at least one
268                  * thread holds a read lock.  (See note above)
269                  */
270                 KASSERT(!(x & RW_LOCK_READ_WAITERS),
271                     ("%s: waiting readers", __func__));
272
273                 /*
274                  * If there aren't any waiters for a write lock, then try
275                  * to drop it quickly.
276                  */
277                 if (!(x & RW_LOCK_WRITE_WAITERS)) {
278
279                         /*
280                          * There shouldn't be any flags set and we should
281                          * be the only read lock.  If we fail to release
282                          * the single read lock, then another thread might
283                          * have just acquired a read lock, so go back up
284                          * to the multiple read locks case.
285                          */
286                         MPASS(x == RW_READERS_LOCK(1));
287                         if (atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1),
288                             RW_UNLOCKED)) {
289                                 if (LOCK_LOG_TEST(&rw->rw_object, 0))
290                                         CTR2(KTR_LOCK, "%s: %p last succeeded",
291                                             __func__, rw);
292                                 break;
293                         }
294                         continue;
295                 }
296
297                 /*
298                  * There should just be one reader with one or more
299                  * writers waiting.
300                  */
301                 MPASS(x == (RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS));
302
303                 /*
304                  * Ok, we know we have a waiting writer and we think we
305                  * are the last reader, so grab the turnstile lock.
306                  */
307                 turnstile_lock(&rw->rw_object);
308
309                 /*
310                  * Try to drop our lock leaving the lock in a unlocked
311                  * state.
312                  *
313                  * If you wanted to do explicit lock handoff you'd have to
314                  * do it here.  You'd also want to use turnstile_signal()
315                  * and you'd have to handle the race where a higher
316                  * priority thread blocks on the write lock before the
317                  * thread you wakeup actually runs and have the new thread
318                  * "steal" the lock.  For now it's a lot simpler to just
319                  * wakeup all of the waiters.
320                  *
321                  * As above, if we fail, then another thread might have
322                  * acquired a read lock, so drop the turnstile lock and
323                  * restart.
324                  */
325                 if (!atomic_cmpset_ptr(&rw->rw_lock,
326                     RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) {
327                         turnstile_release(&rw->rw_object);
328                         continue;
329                 }
330                 if (LOCK_LOG_TEST(&rw->rw_object, 0))
331                         CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
332                             __func__, rw);
333
334                 /*
335                  * Ok.  The lock is released and all that's left is to
336                  * wake up the waiters.  Note that the lock might not be
337                  * free anymore, but in that case the writers will just
338                  * block again if they run before the new lock holder(s)
339                  * release the lock.
340                  */
341                 ts = turnstile_lookup(&rw->rw_object);
342                 turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
343                 turnstile_unpend(ts, TS_SHARED_LOCK);
344                 break;
345         }
346 }
347
348 /*
349  * This function is called when we are unable to obtain a write lock on the
350  * first try.  This means that at least one other thread holds either a
351  * read or write lock.
352  */
353 void
354 _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
355 {
356         uintptr_t v;
357
358         if (LOCK_LOG_TEST(&rw->rw_object, 0))
359                 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
360                     rw->rw_object.lo_name, (void *)rw->rw_lock, file, line);
361
362         while (!_rw_write_lock(rw, tid)) {
363                 turnstile_lock(&rw->rw_object);
364                 v = rw->rw_lock;
365
366                 /*
367                  * If the lock was released while spinning on the
368                  * turnstile chain lock, try again.
369                  */
370                 if (v == RW_UNLOCKED) {
371                         turnstile_release(&rw->rw_object);
372                         cpu_spinwait();
373                         continue;
374                 }
375
376                 /*
377                  * If the lock was released by a writer with both readers
378                  * and writers waiting and a reader hasn't woken up and
379                  * acquired the lock yet, rw_lock will be set to the
380                  * value RW_UNLOCKED | RW_LOCK_WRITE_WAITERS.  If we see
381                  * that value, try to acquire it once.  Note that we have
382                  * to preserve the RW_LOCK_WRITE_WAITERS flag as there are
383                  * other writers waiting still. If we fail, restart the
384                  * loop.
385                  */
386                 if (v == (RW_UNLOCKED | RW_LOCK_WRITE_WAITERS)) {
387                         if (atomic_cmpset_acq_ptr(&rw->rw_lock,
388                             RW_UNLOCKED | RW_LOCK_WRITE_WAITERS,
389                             tid | RW_LOCK_WRITE_WAITERS)) {
390                                 turnstile_claim(&rw->rw_object);
391                                 CTR2(KTR_LOCK, "%s: %p claimed by new writer",
392                                     __func__, rw);
393                                 break;
394                         }
395                         turnstile_release(&rw->rw_object);
396                         cpu_spinwait();
397                         continue;
398                 }
399
400                 /*
401                  * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
402                  * set it.  If we fail to set it, then loop back and try
403                  * again.
404                  */
405                 if (!(v & RW_LOCK_WRITE_WAITERS) &&
406                     !atomic_cmpset_ptr(&rw->rw_lock, v,
407                     v | RW_LOCK_WRITE_WAITERS)) {
408                         turnstile_release(&rw->rw_object);
409                         cpu_spinwait();
410                         continue;
411                 }
412                 if (!(v & RW_LOCK_WRITE_WAITERS) &&
413                     LOCK_LOG_TEST(&rw->rw_object, 0))
414                         CTR2(KTR_LOCK, "%s: %p set write waiters flag",
415                             __func__, rw);
416
417                 /* XXX: Adaptively spin if current wlock owner on another CPU? */
418
419                 /*
420                  * We were unable to acquire the lock and the write waiters
421                  * flag is set, so we must block on the turnstile.
422                  */
423                 if (LOCK_LOG_TEST(&rw->rw_object, 0))
424                         CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
425                             rw);
426                 turnstile_wait(&rw->rw_object, rw_owner(rw),
427                     TS_EXCLUSIVE_QUEUE);
428                 if (LOCK_LOG_TEST(&rw->rw_object, 0))
429                         CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
430                             __func__, rw);
431         }
432 }
433
434 /*
435  * This function is called if the first try at releasing a write lock failed.
436  * This means that one of the 2 waiter bits must be set indicating that at
437  * least one thread is waiting on this lock.
438  */
439 void
440 _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
441 {
442         struct turnstile *ts;
443         uintptr_t v;
444         int queue;
445
446         KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
447             ("%s: neither of the waiter flags are set", __func__));
448
449         if (LOCK_LOG_TEST(&rw->rw_object, 0))
450                 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
451
452         turnstile_lock(&rw->rw_object);
453         ts = turnstile_lookup(&rw->rw_object);
454
455         /* XXX: Adaptive fixup would be required here. */
456         MPASS(ts != NULL);
457
458         /*
459          * Use the same algo as sx locks for now.  Prefer waking up shared
460          * waiters if we have any over writers.  This is probably not ideal.
461          *
462          * 'v' is the value we are going to write back to rw_lock.  If we
463          * have waiters on both queues, we need to preserve the state of
464          * the waiter flag for the queue we don't wake up.  For now this is
465          * hardcoded for the algorithm mentioned above.
466          *
467          * In the case of both readers and writers waiting we wakeup the
468          * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
469          * new writer comes in before a reader it will claim the lock up
470          * above.  There is probably a potential priority inversion in
471          * there that could be worked around either by waking both queues
472          * of waiters or doing some complicated lock handoff gymnastics.
473          */
474         if (rw->rw_lock & RW_LOCK_READ_WAITERS) {
475                 queue = TS_SHARED_QUEUE;
476                 v = RW_UNLOCKED | (rw->rw_lock & RW_LOCK_WRITE_WAITERS);
477         } else {
478                 queue = TS_EXCLUSIVE_QUEUE;
479                 v = RW_UNLOCKED;
480         }
481         if (LOCK_LOG_TEST(&rw->rw_object, 0))
482                 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
483                     queue == TS_SHARED_QUEUE ? "read" : "write");
484
485         /* Wake up all waiters for the specific queue. */
486         turnstile_broadcast(ts, queue);
487         atomic_store_rel_ptr(&rw->rw_lock, v);
488         turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
489 }
490
491 #ifdef INVARIANT_SUPPORT
492 #ifndef INVARIANTS
493 #undef _rw_assert
494 #endif
495
496 /*
497  * In the non-WITNESS case, rw_assert() can only detect that at least
498  * *some* thread owns an rlock, but it cannot guarantee that *this*
499  * thread owns an rlock.
500  */
501 void
502 _rw_assert(struct rwlock *rw, int what, const char *file, int line)
503 {
504
505         if (panicstr != NULL)
506                 return;
507         switch (what) {
508         case RA_LOCKED:
509         case RA_RLOCKED:
510 #ifdef WITNESS
511                 witness_assert(&rw->rw_object, what, file, line);
512 #else
513                 /*
514                  * If some other thread has a write lock or we have one
515                  * and are asserting a read lock, fail.  Also, if no one
516                  * has a lock at all, fail.
517                  */
518                 if (rw->rw_lock == RW_UNLOCKED ||
519                     (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
520                     rw_owner(rw) != curthread)))
521                         panic("Lock %s not %slocked @ %s:%d\n",
522                             rw->rw_object.lo_name, (what == RA_RLOCKED) ?
523                             "read " : "", file, line);
524 #endif
525                 break;
526         case RA_WLOCKED:
527                 if (rw_owner(rw) != curthread)
528                         panic("Lock %s not exclusively locked @ %s:%d\n",
529                             rw->rw_object.lo_name, file, line);
530                 break;
531         case RA_UNLOCKED:
532 #ifdef WITNESS
533                 witness_assert(&rw->rw_object, what, file, line);
534 #else
535                 /*
536                  * If we hold a write lock fail.  We can't reliably check
537                  * to see if we hold a read lock or not.
538                  */
539                 if (rw_owner(rw) == curthread)
540                         panic("Lock %s exclusively locked @ %s:%d\n",
541                             rw->rw_object.lo_name, file, line);
542 #endif
543                 break;
544         default:
545                 panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
546                     line);
547         }
548 }
549 #endif /* INVARIANT_SUPPORT */
550
551 #ifdef DDB
552 void
553 db_show_rwlock(struct lock_object *lock)
554 {
555         struct rwlock *rw;
556         struct thread *td;
557
558         rw = (struct rwlock *)lock;
559
560         db_printf(" state: ");
561         if (rw->rw_lock == RW_UNLOCKED)
562                 db_printf("UNLOCKED\n");
563         else if (rw->rw_lock & RW_LOCK_READ)
564                 db_printf("RLOCK: %jd locks\n",
565                     (intmax_t)(RW_READERS(rw->rw_lock)));
566         else {
567                 td = rw_owner(rw);
568                 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
569                     td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm);
570         }
571         db_printf(" waiters: ");
572         switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
573         case RW_LOCK_READ_WAITERS:
574                 db_printf("readers\n");
575                 break;
576         case RW_LOCK_WRITE_WAITERS:
577                 db_printf("writers\n");
578                 break;
579         case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
580                 db_printf("readers and waiters\n");
581                 break;
582         default:
583                 db_printf("none\n");
584                 break;
585         }
586 }
587
588 #endif