]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - contrib/bind9/lib/isc/rwlock.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / contrib / bind9 / lib / isc / rwlock.c
1 /*
2  * Copyright (C) 2004, 2005, 2007, 2009, 2011, 2012  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1998-2001, 2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id$ */
19
20 /*! \file */
21
22 #include <config.h>
23
24 #include <stddef.h>
25
26 #include <isc/atomic.h>
27 #include <isc/magic.h>
28 #include <isc/msgs.h>
29 #include <isc/platform.h>
30 #include <isc/rwlock.h>
31 #include <isc/util.h>
32
33 #define RWLOCK_MAGIC            ISC_MAGIC('R', 'W', 'L', 'k')
34 #define VALID_RWLOCK(rwl)       ISC_MAGIC_VALID(rwl, RWLOCK_MAGIC)
35
36 #ifdef ISC_PLATFORM_USETHREADS
37
38 #ifndef RWLOCK_DEFAULT_READ_QUOTA
39 #define RWLOCK_DEFAULT_READ_QUOTA 4
40 #endif
41
42 #ifndef RWLOCK_DEFAULT_WRITE_QUOTA
43 #define RWLOCK_DEFAULT_WRITE_QUOTA 4
44 #endif
45
46 #ifdef ISC_RWLOCK_TRACE
47 #include <stdio.h>              /* Required for fprintf/stderr. */
48 #include <isc/thread.h>         /* Required for isc_thread_self(). */
49
50 static void
51 print_lock(const char *operation, isc_rwlock_t *rwl, isc_rwlocktype_t type) {
52         fprintf(stderr,
53                 isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
54                                ISC_MSG_PRINTLOCK,
55                                "rwlock %p thread %lu %s(%s): %s, %u active, "
56                                "%u granted, %u rwaiting, %u wwaiting\n"),
57                 rwl, isc_thread_self(), operation,
58                 (type == isc_rwlocktype_read ?
59                  isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
60                                 ISC_MSG_READ, "read") :
61                  isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
62                                 ISC_MSG_WRITE, "write")),
63                 (rwl->type == isc_rwlocktype_read ?
64                  isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
65                                 ISC_MSG_READING, "reading") :
66                  isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
67                                 ISC_MSG_WRITING, "writing")),
68                 rwl->active, rwl->granted, rwl->readers_waiting,
69                 rwl->writers_waiting);
70 }
71 #endif
72
73 isc_result_t
74 isc_rwlock_init(isc_rwlock_t *rwl, unsigned int read_quota,
75                 unsigned int write_quota)
76 {
77         isc_result_t result;
78
79         REQUIRE(rwl != NULL);
80
81         /*
82          * In case there's trouble initializing, we zero magic now.  If all
83          * goes well, we'll set it to RWLOCK_MAGIC.
84          */
85         rwl->magic = 0;
86
87 #if defined(ISC_PLATFORM_HAVEXADD) && defined(ISC_PLATFORM_HAVECMPXCHG)
88         rwl->write_requests = 0;
89         rwl->write_completions = 0;
90         rwl->cnt_and_flag = 0;
91         rwl->readers_waiting = 0;
92         rwl->write_granted = 0;
93         if (read_quota != 0) {
94                 UNEXPECTED_ERROR(__FILE__, __LINE__,
95                                  "read quota is not supported");
96         }
97         if (write_quota == 0)
98                 write_quota = RWLOCK_DEFAULT_WRITE_QUOTA;
99         rwl->write_quota = write_quota;
100 #else
101         rwl->type = isc_rwlocktype_read;
102         rwl->original = isc_rwlocktype_none;
103         rwl->active = 0;
104         rwl->granted = 0;
105         rwl->readers_waiting = 0;
106         rwl->writers_waiting = 0;
107         if (read_quota == 0)
108                 read_quota = RWLOCK_DEFAULT_READ_QUOTA;
109         rwl->read_quota = read_quota;
110         if (write_quota == 0)
111                 write_quota = RWLOCK_DEFAULT_WRITE_QUOTA;
112         rwl->write_quota = write_quota;
113 #endif
114
115         result = isc_mutex_init(&rwl->lock);
116         if (result != ISC_R_SUCCESS)
117                 return (result);
118
119         result = isc_condition_init(&rwl->readable);
120         if (result != ISC_R_SUCCESS) {
121                 UNEXPECTED_ERROR(__FILE__, __LINE__,
122                                  "isc_condition_init(readable) %s: %s",
123                                  isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
124                                                 ISC_MSG_FAILED, "failed"),
125                                  isc_result_totext(result));
126                 result = ISC_R_UNEXPECTED;
127                 goto destroy_lock;
128         }
129         result = isc_condition_init(&rwl->writeable);
130         if (result != ISC_R_SUCCESS) {
131                 UNEXPECTED_ERROR(__FILE__, __LINE__,
132                                  "isc_condition_init(writeable) %s: %s",
133                                  isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
134                                                 ISC_MSG_FAILED, "failed"),
135                                  isc_result_totext(result));
136                 result = ISC_R_UNEXPECTED;
137                 goto destroy_rcond;
138         }
139
140         rwl->magic = RWLOCK_MAGIC;
141
142         return (ISC_R_SUCCESS);
143
144   destroy_rcond:
145         (void)isc_condition_destroy(&rwl->readable);
146   destroy_lock:
147         DESTROYLOCK(&rwl->lock);
148
149         return (result);
150 }
151
152 void
153 isc_rwlock_destroy(isc_rwlock_t *rwl) {
154         REQUIRE(VALID_RWLOCK(rwl));
155
156 #if defined(ISC_PLATFORM_HAVEXADD) && defined(ISC_PLATFORM_HAVECMPXCHG)
157         REQUIRE(rwl->write_requests == rwl->write_completions &&
158                 rwl->cnt_and_flag == 0 && rwl->readers_waiting == 0);
159 #else
160         LOCK(&rwl->lock);
161         REQUIRE(rwl->active == 0 &&
162                 rwl->readers_waiting == 0 &&
163                 rwl->writers_waiting == 0);
164         UNLOCK(&rwl->lock);
165 #endif
166
167         rwl->magic = 0;
168         (void)isc_condition_destroy(&rwl->readable);
169         (void)isc_condition_destroy(&rwl->writeable);
170         DESTROYLOCK(&rwl->lock);
171 }
172
173 #if defined(ISC_PLATFORM_HAVEXADD) && defined(ISC_PLATFORM_HAVECMPXCHG)
174
175 /*
176  * When some architecture-dependent atomic operations are available,
177  * rwlock can be more efficient than the generic algorithm defined below.
178  * The basic algorithm is described in the following URL:
179  *   http://www.cs.rochester.edu/u/scott/synchronization/pseudocode/rw.html
180  *
181  * The key is to use the following integer variables modified atomically:
182  *   write_requests, write_completions, and cnt_and_flag.
183  *
184  * write_requests and write_completions act as a waiting queue for writers
185  * in order to ensure the FIFO order.  Both variables begin with the initial
186  * value of 0.  When a new writer tries to get a write lock, it increments
187  * write_requests and gets the previous value of the variable as a "ticket".
188  * When write_completions reaches the ticket number, the new writer can start
189  * writing.  When the writer completes its work, it increments
190  * write_completions so that another new writer can start working.  If the
191  * write_requests is not equal to write_completions, it means a writer is now
192  * working or waiting.  In this case, a new readers cannot start reading, or
193  * in other words, this algorithm basically prefers writers.
194  *
195  * cnt_and_flag is a "lock" shared by all readers and writers.  This integer
196  * variable is a kind of structure with two members: writer_flag (1 bit) and
197  * reader_count (31 bits).  The writer_flag shows whether a writer is working,
198  * and the reader_count shows the number of readers currently working or almost
199  * ready for working.  A writer who has the current "ticket" tries to get the
200  * lock by exclusively setting the writer_flag to 1, provided that the whole
201  * 32-bit is 0 (meaning no readers or writers working).  On the other hand,
202  * a new reader tries to increment the "reader_count" field provided that
203  * the writer_flag is 0 (meaning there is no writer working).
204  *
205  * If some of the above operations fail, the reader or the writer sleeps
206  * until the related condition changes.  When a working reader or writer
207  * completes its work, some readers or writers are sleeping, and the condition
208  * that suspended the reader or writer has changed, it wakes up the sleeping
209  * readers or writers.
210  *
211  * As already noted, this algorithm basically prefers writers.  In order to
212  * prevent readers from starving, however, the algorithm also introduces the
213  * "writer quota" (Q).  When Q consecutive writers have completed their work,
214  * suspending readers, the last writer will wake up the readers, even if a new
215  * writer is waiting.
216  *
217  * Implementation specific note: due to the combination of atomic operations
218  * and a mutex lock, ordering between the atomic operation and locks can be
219  * very sensitive in some cases.  In particular, it is generally very important
220  * to check the atomic variable that requires a reader or writer to sleep after
221  * locking the mutex and before actually sleeping; otherwise, it could be very
222  * likely to cause a deadlock.  For example, assume "var" is a variable
223  * atomically modified, then the corresponding code would be:
224  *      if (var == need_sleep) {
225  *              LOCK(lock);
226  *              if (var == need_sleep)
227  *                      WAIT(cond, lock);
228  *              UNLOCK(lock);
229  *      }
230  * The second check is important, since "var" is protected by the atomic
231  * operation, not by the mutex, and can be changed just before sleeping.
232  * (The first "if" could be omitted, but this is also important in order to
233  * make the code efficient by avoiding the use of the mutex unless it is
234  * really necessary.)
235  */
236
237 #define WRITER_ACTIVE   0x1
238 #define READER_INCR     0x2
239
240 isc_result_t
241 isc_rwlock_lock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
242         isc_int32_t cntflag;
243
244         REQUIRE(VALID_RWLOCK(rwl));
245
246 #ifdef ISC_RWLOCK_TRACE
247         print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
248                                   ISC_MSG_PRELOCK, "prelock"), rwl, type);
249 #endif
250
251         if (type == isc_rwlocktype_read) {
252                 if (rwl->write_requests != rwl->write_completions) {
253                         /* there is a waiting or active writer */
254                         LOCK(&rwl->lock);
255                         if (rwl->write_requests != rwl->write_completions) {
256                                 rwl->readers_waiting++;
257                                 WAIT(&rwl->readable, &rwl->lock);
258                                 rwl->readers_waiting--;
259                         }
260                         UNLOCK(&rwl->lock);
261                 }
262
263                 cntflag = isc_atomic_xadd(&rwl->cnt_and_flag, READER_INCR);
264                 POST(cntflag);
265                 while (1) {
266                         if ((rwl->cnt_and_flag & WRITER_ACTIVE) == 0)
267                                 break;
268
269                         /* A writer is still working */
270                         LOCK(&rwl->lock);
271                         rwl->readers_waiting++;
272                         if ((rwl->cnt_and_flag & WRITER_ACTIVE) != 0)
273                                 WAIT(&rwl->readable, &rwl->lock);
274                         rwl->readers_waiting--;
275                         UNLOCK(&rwl->lock);
276
277                         /*
278                          * Typically, the reader should be able to get a lock
279                          * at this stage:
280                          *   (1) there should have been no pending writer when
281                          *       the reader was trying to increment the
282                          *       counter; otherwise, the writer should be in
283                          *       the waiting queue, preventing the reader from
284                          *       proceeding to this point.
285                          *   (2) once the reader increments the counter, no
286                          *       more writer can get a lock.
287                          * Still, it is possible another writer can work at
288                          * this point, e.g. in the following scenario:
289                          *   A previous writer unlocks the writer lock.
290                          *   This reader proceeds to point (1).
291                          *   A new writer appears, and gets a new lock before
292                          *   the reader increments the counter.
293                          *   The reader then increments the counter.
294                          *   The previous writer notices there is a waiting
295                          *   reader who is almost ready, and wakes it up.
296                          * So, the reader needs to confirm whether it can now
297                          * read explicitly (thus we loop).  Note that this is
298                          * not an infinite process, since the reader has
299                          * incremented the counter at this point.
300                          */
301                 }
302
303                 /*
304                  * If we are temporarily preferred to writers due to the writer
305                  * quota, reset the condition (race among readers doesn't
306                  * matter).
307                  */
308                 rwl->write_granted = 0;
309         } else {
310                 isc_int32_t prev_writer;
311
312                 /* enter the waiting queue, and wait for our turn */
313                 prev_writer = isc_atomic_xadd(&rwl->write_requests, 1);
314                 while (rwl->write_completions != prev_writer) {
315                         LOCK(&rwl->lock);
316                         if (rwl->write_completions != prev_writer) {
317                                 WAIT(&rwl->writeable, &rwl->lock);
318                                 UNLOCK(&rwl->lock);
319                                 continue;
320                         }
321                         UNLOCK(&rwl->lock);
322                         break;
323                 }
324
325                 while (1) {
326                         cntflag = isc_atomic_cmpxchg(&rwl->cnt_and_flag, 0,
327                                                      WRITER_ACTIVE);
328                         if (cntflag == 0)
329                                 break;
330
331                         /* Another active reader or writer is working. */
332                         LOCK(&rwl->lock);
333                         if (rwl->cnt_and_flag != 0)
334                                 WAIT(&rwl->writeable, &rwl->lock);
335                         UNLOCK(&rwl->lock);
336                 }
337
338                 INSIST((rwl->cnt_and_flag & WRITER_ACTIVE) != 0);
339                 rwl->write_granted++;
340         }
341
342 #ifdef ISC_RWLOCK_TRACE
343         print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
344                                   ISC_MSG_POSTLOCK, "postlock"), rwl, type);
345 #endif
346
347         return (ISC_R_SUCCESS);
348 }
349
350 isc_result_t
351 isc_rwlock_trylock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
352         isc_int32_t cntflag;
353
354         REQUIRE(VALID_RWLOCK(rwl));
355
356 #ifdef ISC_RWLOCK_TRACE
357         print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
358                                   ISC_MSG_PRELOCK, "prelock"), rwl, type);
359 #endif
360
361         if (type == isc_rwlocktype_read) {
362                 /* If a writer is waiting or working, we fail. */
363                 if (rwl->write_requests != rwl->write_completions)
364                         return (ISC_R_LOCKBUSY);
365
366                 /* Otherwise, be ready for reading. */
367                 cntflag = isc_atomic_xadd(&rwl->cnt_and_flag, READER_INCR);
368                 if ((cntflag & WRITER_ACTIVE) != 0) {
369                         /*
370                          * A writer is working.  We lose, and cancel the read
371                          * request.
372                          */
373                         cntflag = isc_atomic_xadd(&rwl->cnt_and_flag,
374                                                   -READER_INCR);
375                         /*
376                          * If no other readers are waiting and we've suspended
377                          * new writers in this short period, wake them up.
378                          */
379                         if (cntflag == READER_INCR &&
380                             rwl->write_completions != rwl->write_requests) {
381                                 LOCK(&rwl->lock);
382                                 BROADCAST(&rwl->writeable);
383                                 UNLOCK(&rwl->lock);
384                         }
385
386                         return (ISC_R_LOCKBUSY);
387                 }
388         } else {
389                 /* Try locking without entering the waiting queue. */
390                 cntflag = isc_atomic_cmpxchg(&rwl->cnt_and_flag, 0,
391                                              WRITER_ACTIVE);
392                 if (cntflag != 0)
393                         return (ISC_R_LOCKBUSY);
394
395                 /*
396                  * XXXJT: jump into the queue, possibly breaking the writer
397                  * order.
398                  */
399                 (void)isc_atomic_xadd(&rwl->write_completions, -1);
400
401                 rwl->write_granted++;
402         }
403
404 #ifdef ISC_RWLOCK_TRACE
405         print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
406                                   ISC_MSG_POSTLOCK, "postlock"), rwl, type);
407 #endif
408
409         return (ISC_R_SUCCESS);
410 }
411
412 isc_result_t
413 isc_rwlock_tryupgrade(isc_rwlock_t *rwl) {
414         isc_int32_t prevcnt;
415
416         REQUIRE(VALID_RWLOCK(rwl));
417
418         /* Try to acquire write access. */
419         prevcnt = isc_atomic_cmpxchg(&rwl->cnt_and_flag,
420                                      READER_INCR, WRITER_ACTIVE);
421         /*
422          * There must have been no writer, and there must have been at least
423          * one reader.
424          */
425         INSIST((prevcnt & WRITER_ACTIVE) == 0 &&
426                (prevcnt & ~WRITER_ACTIVE) != 0);
427
428         if (prevcnt == READER_INCR) {
429                 /*
430                  * We are the only reader and have been upgraded.
431                  * Now jump into the head of the writer waiting queue.
432                  */
433                 (void)isc_atomic_xadd(&rwl->write_completions, -1);
434         } else
435                 return (ISC_R_LOCKBUSY);
436
437         return (ISC_R_SUCCESS);
438
439 }
440
441 void
442 isc_rwlock_downgrade(isc_rwlock_t *rwl) {
443         isc_int32_t prev_readers;
444
445         REQUIRE(VALID_RWLOCK(rwl));
446
447         /* Become an active reader. */
448         prev_readers = isc_atomic_xadd(&rwl->cnt_and_flag, READER_INCR);
449         /* We must have been a writer. */
450         INSIST((prev_readers & WRITER_ACTIVE) != 0);
451
452         /* Complete write */
453         (void)isc_atomic_xadd(&rwl->cnt_and_flag, -WRITER_ACTIVE);
454         (void)isc_atomic_xadd(&rwl->write_completions, 1);
455
456         /* Resume other readers */
457         LOCK(&rwl->lock);
458         if (rwl->readers_waiting > 0)
459                 BROADCAST(&rwl->readable);
460         UNLOCK(&rwl->lock);
461 }
462
463 isc_result_t
464 isc_rwlock_unlock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
465         isc_int32_t prev_cnt;
466
467         REQUIRE(VALID_RWLOCK(rwl));
468
469 #ifdef ISC_RWLOCK_TRACE
470         print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
471                                   ISC_MSG_PREUNLOCK, "preunlock"), rwl, type);
472 #endif
473
474         if (type == isc_rwlocktype_read) {
475                 prev_cnt = isc_atomic_xadd(&rwl->cnt_and_flag, -READER_INCR);
476
477                 /*
478                  * If we're the last reader and any writers are waiting, wake
479                  * them up.  We need to wake up all of them to ensure the
480                  * FIFO order.
481                  */
482                 if (prev_cnt == READER_INCR &&
483                     rwl->write_completions != rwl->write_requests) {
484                         LOCK(&rwl->lock);
485                         BROADCAST(&rwl->writeable);
486                         UNLOCK(&rwl->lock);
487                 }
488         } else {
489                 isc_boolean_t wakeup_writers = ISC_TRUE;
490
491                 /*
492                  * Reset the flag, and (implicitly) tell other writers
493                  * we are done.
494                  */
495                 (void)isc_atomic_xadd(&rwl->cnt_and_flag, -WRITER_ACTIVE);
496                 (void)isc_atomic_xadd(&rwl->write_completions, 1);
497
498                 if (rwl->write_granted >= rwl->write_quota ||
499                     rwl->write_requests == rwl->write_completions ||
500                     (rwl->cnt_and_flag & ~WRITER_ACTIVE) != 0) {
501                         /*
502                          * We have passed the write quota, no writer is
503                          * waiting, or some readers are almost ready, pending
504                          * possible writers.  Note that the last case can
505                          * happen even if write_requests != write_completions
506                          * (which means a new writer in the queue), so we need
507                          * to catch the case explicitly.
508                          */
509                         LOCK(&rwl->lock);
510                         if (rwl->readers_waiting > 0) {
511                                 wakeup_writers = ISC_FALSE;
512                                 BROADCAST(&rwl->readable);
513                         }
514                         UNLOCK(&rwl->lock);
515                 }
516
517                 if (rwl->write_requests != rwl->write_completions &&
518                     wakeup_writers) {
519                         LOCK(&rwl->lock);
520                         BROADCAST(&rwl->writeable);
521                         UNLOCK(&rwl->lock);
522                 }
523         }
524
525 #ifdef ISC_RWLOCK_TRACE
526         print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
527                                   ISC_MSG_POSTUNLOCK, "postunlock"),
528                    rwl, type);
529 #endif
530
531         return (ISC_R_SUCCESS);
532 }
533
534 #else /* ISC_PLATFORM_HAVEXADD && ISC_PLATFORM_HAVECMPXCHG */
535
536 static isc_result_t
537 doit(isc_rwlock_t *rwl, isc_rwlocktype_t type, isc_boolean_t nonblock) {
538         isc_boolean_t skip = ISC_FALSE;
539         isc_boolean_t done = ISC_FALSE;
540         isc_result_t result = ISC_R_SUCCESS;
541
542         REQUIRE(VALID_RWLOCK(rwl));
543
544         LOCK(&rwl->lock);
545
546 #ifdef ISC_RWLOCK_TRACE
547         print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
548                                   ISC_MSG_PRELOCK, "prelock"), rwl, type);
549 #endif
550
551         if (type == isc_rwlocktype_read) {
552                 if (rwl->readers_waiting != 0)
553                         skip = ISC_TRUE;
554                 while (!done) {
555                         if (!skip &&
556                             ((rwl->active == 0 ||
557                               (rwl->type == isc_rwlocktype_read &&
558                                (rwl->writers_waiting == 0 ||
559                                 rwl->granted < rwl->read_quota)))))
560                         {
561                                 rwl->type = isc_rwlocktype_read;
562                                 rwl->active++;
563                                 rwl->granted++;
564                                 done = ISC_TRUE;
565                         } else if (nonblock) {
566                                 result = ISC_R_LOCKBUSY;
567                                 done = ISC_TRUE;
568                         } else {
569                                 skip = ISC_FALSE;
570                                 rwl->readers_waiting++;
571                                 WAIT(&rwl->readable, &rwl->lock);
572                                 rwl->readers_waiting--;
573                         }
574                 }
575         } else {
576                 if (rwl->writers_waiting != 0)
577                         skip = ISC_TRUE;
578                 while (!done) {
579                         if (!skip && rwl->active == 0) {
580                                 rwl->type = isc_rwlocktype_write;
581                                 rwl->active = 1;
582                                 rwl->granted++;
583                                 done = ISC_TRUE;
584                         } else if (nonblock) {
585                                 result = ISC_R_LOCKBUSY;
586                                 done = ISC_TRUE;
587                         } else {
588                                 skip = ISC_FALSE;
589                                 rwl->writers_waiting++;
590                                 WAIT(&rwl->writeable, &rwl->lock);
591                                 rwl->writers_waiting--;
592                         }
593                 }
594         }
595
596 #ifdef ISC_RWLOCK_TRACE
597         print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
598                                   ISC_MSG_POSTLOCK, "postlock"), rwl, type);
599 #endif
600
601         UNLOCK(&rwl->lock);
602
603         return (result);
604 }
605
606 isc_result_t
607 isc_rwlock_lock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
608         return (doit(rwl, type, ISC_FALSE));
609 }
610
611 isc_result_t
612 isc_rwlock_trylock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
613         return (doit(rwl, type, ISC_TRUE));
614 }
615
616 isc_result_t
617 isc_rwlock_tryupgrade(isc_rwlock_t *rwl) {
618         isc_result_t result = ISC_R_SUCCESS;
619
620         REQUIRE(VALID_RWLOCK(rwl));
621         LOCK(&rwl->lock);
622         REQUIRE(rwl->type == isc_rwlocktype_read);
623         REQUIRE(rwl->active != 0);
624
625         /* If we are the only reader then succeed. */
626         if (rwl->active == 1) {
627                 rwl->original = (rwl->original == isc_rwlocktype_none) ?
628                                 isc_rwlocktype_read : isc_rwlocktype_none;
629                 rwl->type = isc_rwlocktype_write;
630         } else
631                 result = ISC_R_LOCKBUSY;
632
633         UNLOCK(&rwl->lock);
634         return (result);
635 }
636
637 void
638 isc_rwlock_downgrade(isc_rwlock_t *rwl) {
639
640         REQUIRE(VALID_RWLOCK(rwl));
641         LOCK(&rwl->lock);
642         REQUIRE(rwl->type == isc_rwlocktype_write);
643         REQUIRE(rwl->active == 1);
644
645         rwl->type = isc_rwlocktype_read;
646         rwl->original = (rwl->original == isc_rwlocktype_none) ?
647                         isc_rwlocktype_write : isc_rwlocktype_none;
648         /*
649          * Resume processing any read request that were blocked when
650          * we upgraded.
651          */
652         if (rwl->original == isc_rwlocktype_none &&
653             (rwl->writers_waiting == 0 || rwl->granted < rwl->read_quota) &&
654             rwl->readers_waiting > 0)
655                 BROADCAST(&rwl->readable);
656
657         UNLOCK(&rwl->lock);
658 }
659
660 isc_result_t
661 isc_rwlock_unlock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
662
663         REQUIRE(VALID_RWLOCK(rwl));
664         LOCK(&rwl->lock);
665         REQUIRE(rwl->type == type);
666
667         UNUSED(type);
668
669 #ifdef ISC_RWLOCK_TRACE
670         print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
671                                   ISC_MSG_PREUNLOCK, "preunlock"), rwl, type);
672 #endif
673
674         INSIST(rwl->active > 0);
675         rwl->active--;
676         if (rwl->active == 0) {
677                 if (rwl->original != isc_rwlocktype_none) {
678                         rwl->type = rwl->original;
679                         rwl->original = isc_rwlocktype_none;
680                 }
681                 if (rwl->type == isc_rwlocktype_read) {
682                         rwl->granted = 0;
683                         if (rwl->writers_waiting > 0) {
684                                 rwl->type = isc_rwlocktype_write;
685                                 SIGNAL(&rwl->writeable);
686                         } else if (rwl->readers_waiting > 0) {
687                                 /* Does this case ever happen? */
688                                 BROADCAST(&rwl->readable);
689                         }
690                 } else {
691                         if (rwl->readers_waiting > 0) {
692                                 if (rwl->writers_waiting > 0 &&
693                                     rwl->granted < rwl->write_quota) {
694                                         SIGNAL(&rwl->writeable);
695                                 } else {
696                                         rwl->granted = 0;
697                                         rwl->type = isc_rwlocktype_read;
698                                         BROADCAST(&rwl->readable);
699                                 }
700                         } else if (rwl->writers_waiting > 0) {
701                                 rwl->granted = 0;
702                                 SIGNAL(&rwl->writeable);
703                         } else {
704                                 rwl->granted = 0;
705                         }
706                 }
707         }
708         INSIST(rwl->original == isc_rwlocktype_none);
709
710 #ifdef ISC_RWLOCK_TRACE
711         print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
712                                   ISC_MSG_POSTUNLOCK, "postunlock"),
713                    rwl, type);
714 #endif
715
716         UNLOCK(&rwl->lock);
717
718         return (ISC_R_SUCCESS);
719 }
720
721 #endif /* ISC_PLATFORM_HAVEXADD && ISC_PLATFORM_HAVECMPXCHG */
722 #else /* ISC_PLATFORM_USETHREADS */
723
724 isc_result_t
725 isc_rwlock_init(isc_rwlock_t *rwl, unsigned int read_quota,
726                 unsigned int write_quota)
727 {
728         REQUIRE(rwl != NULL);
729
730         UNUSED(read_quota);
731         UNUSED(write_quota);
732
733         rwl->type = isc_rwlocktype_read;
734         rwl->active = 0;
735         rwl->magic = RWLOCK_MAGIC;
736
737         return (ISC_R_SUCCESS);
738 }
739
740 isc_result_t
741 isc_rwlock_lock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
742         REQUIRE(VALID_RWLOCK(rwl));
743
744         if (type == isc_rwlocktype_read) {
745                 if (rwl->type != isc_rwlocktype_read && rwl->active != 0)
746                         return (ISC_R_LOCKBUSY);
747                 rwl->type = isc_rwlocktype_read;
748                 rwl->active++;
749         } else {
750                 if (rwl->active != 0)
751                         return (ISC_R_LOCKBUSY);
752                 rwl->type = isc_rwlocktype_write;
753                 rwl->active = 1;
754         }
755         return (ISC_R_SUCCESS);
756 }
757
758 isc_result_t
759 isc_rwlock_trylock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
760         return (isc_rwlock_lock(rwl, type));
761 }
762
763 isc_result_t
764 isc_rwlock_tryupgrade(isc_rwlock_t *rwl) {
765         isc_result_t result = ISC_R_SUCCESS;
766
767         REQUIRE(VALID_RWLOCK(rwl));
768         REQUIRE(rwl->type == isc_rwlocktype_read);
769         REQUIRE(rwl->active != 0);
770
771         /* If we are the only reader then succeed. */
772         if (rwl->active == 1)
773                 rwl->type = isc_rwlocktype_write;
774         else
775                 result = ISC_R_LOCKBUSY;
776         return (result);
777 }
778
779 void
780 isc_rwlock_downgrade(isc_rwlock_t *rwl) {
781
782         REQUIRE(VALID_RWLOCK(rwl));
783         REQUIRE(rwl->type == isc_rwlocktype_write);
784         REQUIRE(rwl->active == 1);
785
786         rwl->type = isc_rwlocktype_read;
787 }
788
789 isc_result_t
790 isc_rwlock_unlock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
791         REQUIRE(VALID_RWLOCK(rwl));
792         REQUIRE(rwl->type == type);
793
794         UNUSED(type);
795
796         INSIST(rwl->active > 0);
797         rwl->active--;
798
799         return (ISC_R_SUCCESS);
800 }
801
802 void
803 isc_rwlock_destroy(isc_rwlock_t *rwl) {
804         REQUIRE(rwl != NULL);
805         REQUIRE(rwl->active == 0);
806         rwl->magic = 0;
807 }
808
809 #endif /* ISC_PLATFORM_USETHREADS */