]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/kern/kern_sx.c
Copy head to stable/9 as part of 9.0-RELEASE release cycle.
[FreeBSD/stable/9.git] / sys / kern / kern_sx.c
1 /*-
2  * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
3  * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice(s), this list of conditions and the following disclaimer as
11  *    the first lines of this file unmodified other than the possible
12  *    addition of one or more copyright notices.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice(s), this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
27  * DAMAGE.
28  */
29
30 /*
31  * Shared/exclusive locks.  This implementation attempts to ensure
32  * deterministic lock granting behavior, so that slocks and xlocks are
33  * interleaved.
34  *
35  * Priority propagation will not generally raise the priority of lock holders,
36  * so should not be relied upon in combination with sx locks.
37  */
38
39 #include "opt_ddb.h"
40 #include "opt_kdtrace.h"
41 #include "opt_no_adaptive_sx.h"
42
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
45
46 #include <sys/param.h>
47 #include <sys/ktr.h>
48 #include <sys/lock.h>
49 #include <sys/mutex.h>
50 #include <sys/proc.h>
51 #include <sys/sleepqueue.h>
52 #include <sys/sx.h>
53 #include <sys/sysctl.h>
54 #include <sys/systm.h>
55
56 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
57 #include <machine/cpu.h>
58 #endif
59
60 #ifdef DDB
61 #include <ddb/ddb.h>
62 #endif
63
64 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
65 #define ADAPTIVE_SX
66 #endif
67
68 CTASSERT((SX_NOADAPTIVE & LO_CLASSFLAGS) == SX_NOADAPTIVE);
69
70 /* Handy macros for sleep queues. */
71 #define SQ_EXCLUSIVE_QUEUE      0
72 #define SQ_SHARED_QUEUE         1
73
74 /*
75  * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
76  * drop Giant anytime we have to sleep or if we adaptively spin.
77  */
78 #define GIANT_DECLARE                                                   \
79         int _giantcnt = 0;                                              \
80         WITNESS_SAVE_DECL(Giant)                                        \
81
82 #define GIANT_SAVE() do {                                               \
83         if (mtx_owned(&Giant)) {                                        \
84                 WITNESS_SAVE(&Giant.lock_object, Giant);                \
85                 while (mtx_owned(&Giant)) {                             \
86                         _giantcnt++;                                    \
87                         mtx_unlock(&Giant);                             \
88                 }                                                       \
89         }                                                               \
90 } while (0)
91
92 #define GIANT_RESTORE() do {                                            \
93         if (_giantcnt > 0) {                                            \
94                 mtx_assert(&Giant, MA_NOTOWNED);                        \
95                 while (_giantcnt--)                                     \
96                         mtx_lock(&Giant);                               \
97                 WITNESS_RESTORE(&Giant.lock_object, Giant);             \
98         }                                                               \
99 } while (0)
100
101 /*
102  * Returns true if an exclusive lock is recursed.  It assumes
103  * curthread currently has an exclusive lock.
104  */
105 #define sx_recurse              lock_object.lo_data
106 #define sx_recursed(sx)         ((sx)->sx_recurse != 0)
107
108 static void     assert_sx(struct lock_object *lock, int what);
109 #ifdef DDB
110 static void     db_show_sx(struct lock_object *lock);
111 #endif
112 static void     lock_sx(struct lock_object *lock, int how);
113 #ifdef KDTRACE_HOOKS
114 static int      owner_sx(struct lock_object *lock, struct thread **owner);
115 #endif
116 static int      unlock_sx(struct lock_object *lock);
117
118 struct lock_class lock_class_sx = {
119         .lc_name = "sx",
120         .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
121         .lc_assert = assert_sx,
122 #ifdef DDB
123         .lc_ddb_show = db_show_sx,
124 #endif
125         .lc_lock = lock_sx,
126         .lc_unlock = unlock_sx,
127 #ifdef KDTRACE_HOOKS
128         .lc_owner = owner_sx,
129 #endif
130 };
131
132 #ifndef INVARIANTS
133 #define _sx_assert(sx, what, file, line)
134 #endif
135
136 #ifdef ADAPTIVE_SX
137 static u_int asx_retries = 10;
138 static u_int asx_loops = 10000;
139 SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging");
140 SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
141 SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
142 #endif
143
144 void
145 assert_sx(struct lock_object *lock, int what)
146 {
147
148         sx_assert((struct sx *)lock, what);
149 }
150
151 void
152 lock_sx(struct lock_object *lock, int how)
153 {
154         struct sx *sx;
155
156         sx = (struct sx *)lock;
157         if (how)
158                 sx_xlock(sx);
159         else
160                 sx_slock(sx);
161 }
162
163 int
164 unlock_sx(struct lock_object *lock)
165 {
166         struct sx *sx;
167
168         sx = (struct sx *)lock;
169         sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
170         if (sx_xlocked(sx)) {
171                 sx_xunlock(sx);
172                 return (1);
173         } else {
174                 sx_sunlock(sx);
175                 return (0);
176         }
177 }
178
179 #ifdef KDTRACE_HOOKS
180 int
181 owner_sx(struct lock_object *lock, struct thread **owner)
182 {
183         struct sx *sx = (struct sx *)lock;
184         uintptr_t x = sx->sx_lock;
185
186         *owner = (struct thread *)SX_OWNER(x);
187         return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) :
188             (*owner != NULL));
189 }
190 #endif
191
192 void
193 sx_sysinit(void *arg)
194 {
195         struct sx_args *sargs = arg;
196
197         sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags);
198 }
199
200 void
201 sx_init_flags(struct sx *sx, const char *description, int opts)
202 {
203         int flags;
204
205         MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
206             SX_NOPROFILE | SX_NOADAPTIVE)) == 0);
207         ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock,
208             ("%s: sx_lock not aligned for %s: %p", __func__, description,
209             &sx->sx_lock));
210
211         flags = LO_SLEEPABLE | LO_UPGRADABLE;
212         if (opts & SX_DUPOK)
213                 flags |= LO_DUPOK;
214         if (opts & SX_NOPROFILE)
215                 flags |= LO_NOPROFILE;
216         if (!(opts & SX_NOWITNESS))
217                 flags |= LO_WITNESS;
218         if (opts & SX_RECURSE)
219                 flags |= LO_RECURSABLE;
220         if (opts & SX_QUIET)
221                 flags |= LO_QUIET;
222
223         flags |= opts & SX_NOADAPTIVE;
224         sx->sx_lock = SX_LOCK_UNLOCKED;
225         sx->sx_recurse = 0;
226         lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
227 }
228
229 void
230 sx_destroy(struct sx *sx)
231 {
232
233         KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
234         KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
235         sx->sx_lock = SX_LOCK_DESTROYED;
236         lock_destroy(&sx->lock_object);
237 }
238
239 int
240 _sx_slock(struct sx *sx, int opts, const char *file, int line)
241 {
242         int error = 0;
243
244         MPASS(curthread != NULL);
245         KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
246             ("sx_slock() of destroyed sx @ %s:%d", file, line));
247         WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
248         error = __sx_slock(sx, opts, file, line);
249         if (!error) {
250                 LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
251                 WITNESS_LOCK(&sx->lock_object, 0, file, line);
252                 curthread->td_locks++;
253         }
254
255         return (error);
256 }
257
258 int
259 _sx_try_slock(struct sx *sx, const char *file, int line)
260 {
261         uintptr_t x;
262
263         for (;;) {
264                 x = sx->sx_lock;
265                 KASSERT(x != SX_LOCK_DESTROYED,
266                     ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
267                 if (!(x & SX_LOCK_SHARED))
268                         break;
269                 if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) {
270                         LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
271                         WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
272                         curthread->td_locks++;
273                         return (1);
274                 }
275         }
276
277         LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
278         return (0);
279 }
280
281 int
282 _sx_xlock(struct sx *sx, int opts, const char *file, int line)
283 {
284         int error = 0;
285
286         MPASS(curthread != NULL);
287         KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
288             ("sx_xlock() of destroyed sx @ %s:%d", file, line));
289         WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
290             line, NULL);
291         error = __sx_xlock(sx, curthread, opts, file, line);
292         if (!error) {
293                 LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
294                     file, line);
295                 WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
296                 curthread->td_locks++;
297         }
298
299         return (error);
300 }
301
302 int
303 _sx_try_xlock(struct sx *sx, const char *file, int line)
304 {
305         int rval;
306
307         MPASS(curthread != NULL);
308         KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
309             ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
310
311         if (sx_xlocked(sx) &&
312             (sx->lock_object.lo_flags & LO_RECURSABLE) != 0) {
313                 sx->sx_recurse++;
314                 atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
315                 rval = 1;
316         } else
317                 rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED,
318                     (uintptr_t)curthread);
319         LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
320         if (rval) {
321                 WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
322                     file, line);
323                 curthread->td_locks++;
324         }
325
326         return (rval);
327 }
328
329 void
330 _sx_sunlock(struct sx *sx, const char *file, int line)
331 {
332
333         MPASS(curthread != NULL);
334         KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
335             ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
336         _sx_assert(sx, SA_SLOCKED, file, line);
337         curthread->td_locks--;
338         WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
339         LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
340         __sx_sunlock(sx, file, line);
341         LOCKSTAT_PROFILE_RELEASE_LOCK(LS_SX_SUNLOCK_RELEASE, sx);
342 }
343
344 void
345 _sx_xunlock(struct sx *sx, const char *file, int line)
346 {
347
348         MPASS(curthread != NULL);
349         KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
350             ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
351         _sx_assert(sx, SA_XLOCKED, file, line);
352         curthread->td_locks--;
353         WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
354         LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
355             line);
356         if (!sx_recursed(sx))
357                 LOCKSTAT_PROFILE_RELEASE_LOCK(LS_SX_XUNLOCK_RELEASE, sx);
358         __sx_xunlock(sx, curthread, file, line);
359 }
360
361 /*
362  * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
363  * This will only succeed if this thread holds a single shared lock.
364  * Return 1 if if the upgrade succeed, 0 otherwise.
365  */
366 int
367 _sx_try_upgrade(struct sx *sx, const char *file, int line)
368 {
369         uintptr_t x;
370         int success;
371
372         KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
373             ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
374         _sx_assert(sx, SA_SLOCKED, file, line);
375
376         /*
377          * Try to switch from one shared lock to an exclusive lock.  We need
378          * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
379          * we will wake up the exclusive waiters when we drop the lock.
380          */
381         x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
382         success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
383             (uintptr_t)curthread | x);
384         LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
385         if (success) {
386                 WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
387                     file, line);
388                 LOCKSTAT_RECORD0(LS_SX_TRYUPGRADE_UPGRADE, sx);
389         }
390         return (success);
391 }
392
393 /*
394  * Downgrade an unrecursed exclusive lock into a single shared lock.
395  */
396 void
397 _sx_downgrade(struct sx *sx, const char *file, int line)
398 {
399         uintptr_t x;
400         int wakeup_swapper;
401
402         KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
403             ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
404         _sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
405 #ifndef INVARIANTS
406         if (sx_recursed(sx))
407                 panic("downgrade of a recursed lock");
408 #endif
409
410         WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
411
412         /*
413          * Try to switch from an exclusive lock with no shared waiters
414          * to one sharer with no shared waiters.  If there are
415          * exclusive waiters, we don't need to lock the sleep queue so
416          * long as we preserve the flag.  We do one quick try and if
417          * that fails we grab the sleepq lock to keep the flags from
418          * changing and do it the slow way.
419          *
420          * We have to lock the sleep queue if there are shared waiters
421          * so we can wake them up.
422          */
423         x = sx->sx_lock;
424         if (!(x & SX_LOCK_SHARED_WAITERS) &&
425             atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
426             (x & SX_LOCK_EXCLUSIVE_WAITERS))) {
427                 LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
428                 return;
429         }
430
431         /*
432          * Lock the sleep queue so we can read the waiters bits
433          * without any races and wakeup any shared waiters.
434          */
435         sleepq_lock(&sx->lock_object);
436
437         /*
438          * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
439          * shared lock.  If there are any shared waiters, wake them up.
440          */
441         wakeup_swapper = 0;
442         x = sx->sx_lock;
443         atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
444             (x & SX_LOCK_EXCLUSIVE_WAITERS));
445         if (x & SX_LOCK_SHARED_WAITERS)
446                 wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
447                     0, SQ_SHARED_QUEUE);
448         sleepq_release(&sx->lock_object);
449
450         LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
451         LOCKSTAT_RECORD0(LS_SX_DOWNGRADE_DOWNGRADE, sx);
452
453         if (wakeup_swapper)
454                 kick_proc0();
455 }
456
457 /*
458  * This function represents the so-called 'hard case' for sx_xlock
459  * operation.  All 'easy case' failures are redirected to this.  Note
460  * that ideally this would be a static function, but it needs to be
461  * accessible from at least sx.h.
462  */
463 int
464 _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
465     int line)
466 {
467         GIANT_DECLARE;
468 #ifdef ADAPTIVE_SX
469         volatile struct thread *owner;
470         u_int i, spintries = 0;
471 #endif
472         uintptr_t x;
473 #ifdef LOCK_PROFILING
474         uint64_t waittime = 0;
475         int contested = 0;
476 #endif
477         int error = 0;
478 #ifdef  KDTRACE_HOOKS
479         uint64_t spin_cnt = 0;
480         uint64_t sleep_cnt = 0;
481         int64_t sleep_time = 0;
482 #endif
483
484         /* If we already hold an exclusive lock, then recurse. */
485         if (sx_xlocked(sx)) {
486                 KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
487             ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
488                     sx->lock_object.lo_name, file, line));
489                 sx->sx_recurse++;
490                 atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
491                 if (LOCK_LOG_TEST(&sx->lock_object, 0))
492                         CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
493                 return (0);
494         }
495
496         if (LOCK_LOG_TEST(&sx->lock_object, 0))
497                 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
498                     sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
499
500         while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
501 #ifdef KDTRACE_HOOKS
502                 spin_cnt++;
503 #endif
504                 lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
505                     &waittime);
506 #ifdef ADAPTIVE_SX
507                 /*
508                  * If the lock is write locked and the owner is
509                  * running on another CPU, spin until the owner stops
510                  * running or the state of the lock changes.
511                  */
512                 x = sx->sx_lock;
513                 if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
514                         if ((x & SX_LOCK_SHARED) == 0) {
515                                 x = SX_OWNER(x);
516                                 owner = (struct thread *)x;
517                                 if (TD_IS_RUNNING(owner)) {
518                                         if (LOCK_LOG_TEST(&sx->lock_object, 0))
519                                                 CTR3(KTR_LOCK,
520                                             "%s: spinning on %p held by %p",
521                                                     __func__, sx, owner);
522                                         GIANT_SAVE();
523                                         while (SX_OWNER(sx->sx_lock) == x &&
524                                             TD_IS_RUNNING(owner)) {
525                                                 cpu_spinwait();
526 #ifdef KDTRACE_HOOKS
527                                                 spin_cnt++;
528 #endif
529                                         }
530                                         continue;
531                                 }
532                         } else if (SX_SHARERS(x) && spintries < asx_retries) {
533                                 GIANT_SAVE();
534                                 spintries++;
535                                 for (i = 0; i < asx_loops; i++) {
536                                         if (LOCK_LOG_TEST(&sx->lock_object, 0))
537                                                 CTR4(KTR_LOCK,
538                                     "%s: shared spinning on %p with %u and %u",
539                                                     __func__, sx, spintries, i);
540                                         x = sx->sx_lock;
541                                         if ((x & SX_LOCK_SHARED) == 0 ||
542                                             SX_SHARERS(x) == 0)
543                                                 break;
544                                         cpu_spinwait();
545 #ifdef KDTRACE_HOOKS
546                                         spin_cnt++;
547 #endif
548                                 }
549                                 if (i != asx_loops)
550                                         continue;
551                         }
552                 }
553 #endif
554
555                 sleepq_lock(&sx->lock_object);
556                 x = sx->sx_lock;
557
558                 /*
559                  * If the lock was released while spinning on the
560                  * sleep queue chain lock, try again.
561                  */
562                 if (x == SX_LOCK_UNLOCKED) {
563                         sleepq_release(&sx->lock_object);
564                         continue;
565                 }
566
567 #ifdef ADAPTIVE_SX
568                 /*
569                  * The current lock owner might have started executing
570                  * on another CPU (or the lock could have changed
571                  * owners) while we were waiting on the sleep queue
572                  * chain lock.  If so, drop the sleep queue lock and try
573                  * again.
574                  */
575                 if (!(x & SX_LOCK_SHARED) &&
576                     (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
577                         owner = (struct thread *)SX_OWNER(x);
578                         if (TD_IS_RUNNING(owner)) {
579                                 sleepq_release(&sx->lock_object);
580                                 continue;
581                         }
582                 }
583 #endif
584
585                 /*
586                  * If an exclusive lock was released with both shared
587                  * and exclusive waiters and a shared waiter hasn't
588                  * woken up and acquired the lock yet, sx_lock will be
589                  * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
590                  * If we see that value, try to acquire it once.  Note
591                  * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
592                  * as there are other exclusive waiters still.  If we
593                  * fail, restart the loop.
594                  */
595                 if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
596                         if (atomic_cmpset_acq_ptr(&sx->sx_lock,
597                             SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS,
598                             tid | SX_LOCK_EXCLUSIVE_WAITERS)) {
599                                 sleepq_release(&sx->lock_object);
600                                 CTR2(KTR_LOCK, "%s: %p claimed by new writer",
601                                     __func__, sx);
602                                 break;
603                         }
604                         sleepq_release(&sx->lock_object);
605                         continue;
606                 }
607
608                 /*
609                  * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
610                  * than loop back and retry.
611                  */
612                 if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
613                         if (!atomic_cmpset_ptr(&sx->sx_lock, x,
614                             x | SX_LOCK_EXCLUSIVE_WAITERS)) {
615                                 sleepq_release(&sx->lock_object);
616                                 continue;
617                         }
618                         if (LOCK_LOG_TEST(&sx->lock_object, 0))
619                                 CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
620                                     __func__, sx);
621                 }
622
623                 /*
624                  * Since we have been unable to acquire the exclusive
625                  * lock and the exclusive waiters flag is set, we have
626                  * to sleep.
627                  */
628                 if (LOCK_LOG_TEST(&sx->lock_object, 0))
629                         CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
630                             __func__, sx);
631
632 #ifdef KDTRACE_HOOKS
633                 sleep_time -= lockstat_nsecs();
634 #endif
635                 GIANT_SAVE();
636                 sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
637                     SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
638                     SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
639                 if (!(opts & SX_INTERRUPTIBLE))
640                         sleepq_wait(&sx->lock_object, 0);
641                 else
642                         error = sleepq_wait_sig(&sx->lock_object, 0);
643 #ifdef KDTRACE_HOOKS
644                 sleep_time += lockstat_nsecs();
645                 sleep_cnt++;
646 #endif
647                 if (error) {
648                         if (LOCK_LOG_TEST(&sx->lock_object, 0))
649                                 CTR2(KTR_LOCK,
650                         "%s: interruptible sleep by %p suspended by signal",
651                                     __func__, sx);
652                         break;
653                 }
654                 if (LOCK_LOG_TEST(&sx->lock_object, 0))
655                         CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
656                             __func__, sx);
657         }
658
659         GIANT_RESTORE();
660         if (!error)
661                 LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_XLOCK_ACQUIRE, sx,
662                     contested, waittime, file, line);
663 #ifdef KDTRACE_HOOKS
664         if (sleep_time)
665                 LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time);
666         if (spin_cnt > sleep_cnt)
667                 LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt));
668 #endif
669         return (error);
670 }
671
672 /*
673  * This function represents the so-called 'hard case' for sx_xunlock
674  * operation.  All 'easy case' failures are redirected to this.  Note
675  * that ideally this would be a static function, but it needs to be
676  * accessible from at least sx.h.
677  */
678 void
679 _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
680 {
681         uintptr_t x;
682         int queue, wakeup_swapper;
683
684         MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
685
686         /* If the lock is recursed, then unrecurse one level. */
687         if (sx_xlocked(sx) && sx_recursed(sx)) {
688                 if ((--sx->sx_recurse) == 0)
689                         atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
690                 if (LOCK_LOG_TEST(&sx->lock_object, 0))
691                         CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
692                 return;
693         }
694         MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
695             SX_LOCK_EXCLUSIVE_WAITERS));
696         if (LOCK_LOG_TEST(&sx->lock_object, 0))
697                 CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
698
699         sleepq_lock(&sx->lock_object);
700         x = SX_LOCK_UNLOCKED;
701
702         /*
703          * The wake up algorithm here is quite simple and probably not
704          * ideal.  It gives precedence to shared waiters if they are
705          * present.  For this condition, we have to preserve the
706          * state of the exclusive waiters flag.
707          * If interruptible sleeps left the shared queue empty avoid a
708          * starvation for the threads sleeping on the exclusive queue by giving
709          * them precedence and cleaning up the shared waiters bit anyway.
710          */
711         if ((sx->sx_lock & SX_LOCK_SHARED_WAITERS) != 0 &&
712             sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) {
713                 queue = SQ_SHARED_QUEUE;
714                 x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS);
715         } else
716                 queue = SQ_EXCLUSIVE_QUEUE;
717
718         /* Wake up all the waiters for the specific queue. */
719         if (LOCK_LOG_TEST(&sx->lock_object, 0))
720                 CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
721                     __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
722                     "exclusive");
723         atomic_store_rel_ptr(&sx->sx_lock, x);
724         wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
725             queue);
726         sleepq_release(&sx->lock_object);
727         if (wakeup_swapper)
728                 kick_proc0();
729 }
730
731 /*
732  * This function represents the so-called 'hard case' for sx_slock
733  * operation.  All 'easy case' failures are redirected to this.  Note
734  * that ideally this would be a static function, but it needs to be
735  * accessible from at least sx.h.
736  */
737 int
738 _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
739 {
740         GIANT_DECLARE;
741 #ifdef ADAPTIVE_SX
742         volatile struct thread *owner;
743 #endif
744 #ifdef LOCK_PROFILING
745         uint64_t waittime = 0;
746         int contested = 0;
747 #endif
748         uintptr_t x;
749         int error = 0;
750 #ifdef KDTRACE_HOOKS
751         uint64_t spin_cnt = 0;
752         uint64_t sleep_cnt = 0;
753         int64_t sleep_time = 0;
754 #endif
755
756         /*
757          * As with rwlocks, we don't make any attempt to try to block
758          * shared locks once there is an exclusive waiter.
759          */
760         for (;;) {
761 #ifdef KDTRACE_HOOKS
762                 spin_cnt++;
763 #endif
764                 x = sx->sx_lock;
765
766                 /*
767                  * If no other thread has an exclusive lock then try to bump up
768                  * the count of sharers.  Since we have to preserve the state
769                  * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
770                  * shared lock loop back and retry.
771                  */
772                 if (x & SX_LOCK_SHARED) {
773                         MPASS(!(x & SX_LOCK_SHARED_WAITERS));
774                         if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
775                             x + SX_ONE_SHARER)) {
776                                 if (LOCK_LOG_TEST(&sx->lock_object, 0))
777                                         CTR4(KTR_LOCK,
778                                             "%s: %p succeed %p -> %p", __func__,
779                                             sx, (void *)x,
780                                             (void *)(x + SX_ONE_SHARER));
781                                 break;
782                         }
783                         continue;
784                 }
785                 lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
786                     &waittime);
787
788 #ifdef ADAPTIVE_SX
789                 /*
790                  * If the owner is running on another CPU, spin until
791                  * the owner stops running or the state of the lock
792                  * changes.
793                  */
794                 if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
795                         x = SX_OWNER(x);
796                         owner = (struct thread *)x;
797                         if (TD_IS_RUNNING(owner)) {
798                                 if (LOCK_LOG_TEST(&sx->lock_object, 0))
799                                         CTR3(KTR_LOCK,
800                                             "%s: spinning on %p held by %p",
801                                             __func__, sx, owner);
802                                 GIANT_SAVE();
803                                 while (SX_OWNER(sx->sx_lock) == x &&
804                                     TD_IS_RUNNING(owner)) {
805 #ifdef KDTRACE_HOOKS
806                                         spin_cnt++;
807 #endif
808                                         cpu_spinwait();
809                                 }
810                                 continue;
811                         }
812                 }
813 #endif
814
815                 /*
816                  * Some other thread already has an exclusive lock, so
817                  * start the process of blocking.
818                  */
819                 sleepq_lock(&sx->lock_object);
820                 x = sx->sx_lock;
821
822                 /*
823                  * The lock could have been released while we spun.
824                  * In this case loop back and retry.
825                  */
826                 if (x & SX_LOCK_SHARED) {
827                         sleepq_release(&sx->lock_object);
828                         continue;
829                 }
830
831 #ifdef ADAPTIVE_SX
832                 /*
833                  * If the owner is running on another CPU, spin until
834                  * the owner stops running or the state of the lock
835                  * changes.
836                  */
837                 if (!(x & SX_LOCK_SHARED) &&
838                     (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
839                         owner = (struct thread *)SX_OWNER(x);
840                         if (TD_IS_RUNNING(owner)) {
841                                 sleepq_release(&sx->lock_object);
842                                 continue;
843                         }
844                 }
845 #endif
846
847                 /*
848                  * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
849                  * fail to set it drop the sleep queue lock and loop
850                  * back.
851                  */
852                 if (!(x & SX_LOCK_SHARED_WAITERS)) {
853                         if (!atomic_cmpset_ptr(&sx->sx_lock, x,
854                             x | SX_LOCK_SHARED_WAITERS)) {
855                                 sleepq_release(&sx->lock_object);
856                                 continue;
857                         }
858                         if (LOCK_LOG_TEST(&sx->lock_object, 0))
859                                 CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
860                                     __func__, sx);
861                 }
862
863                 /*
864                  * Since we have been unable to acquire the shared lock,
865                  * we have to sleep.
866                  */
867                 if (LOCK_LOG_TEST(&sx->lock_object, 0))
868                         CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
869                             __func__, sx);
870
871 #ifdef KDTRACE_HOOKS
872                 sleep_time -= lockstat_nsecs();
873 #endif
874                 GIANT_SAVE();
875                 sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
876                     SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
877                     SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
878                 if (!(opts & SX_INTERRUPTIBLE))
879                         sleepq_wait(&sx->lock_object, 0);
880                 else
881                         error = sleepq_wait_sig(&sx->lock_object, 0);
882 #ifdef KDTRACE_HOOKS
883                 sleep_time += lockstat_nsecs();
884                 sleep_cnt++;
885 #endif
886                 if (error) {
887                         if (LOCK_LOG_TEST(&sx->lock_object, 0))
888                                 CTR2(KTR_LOCK,
889                         "%s: interruptible sleep by %p suspended by signal",
890                                     __func__, sx);
891                         break;
892                 }
893                 if (LOCK_LOG_TEST(&sx->lock_object, 0))
894                         CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
895                             __func__, sx);
896         }
897         if (error == 0)
898                 LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_SLOCK_ACQUIRE, sx,
899                     contested, waittime, file, line);
900 #ifdef KDTRACE_HOOKS
901         if (sleep_time)
902                 LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time);
903         if (spin_cnt > sleep_cnt)
904                 LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt));
905 #endif
906         GIANT_RESTORE();
907         return (error);
908 }
909
910 /*
911  * This function represents the so-called 'hard case' for sx_sunlock
912  * operation.  All 'easy case' failures are redirected to this.  Note
913  * that ideally this would be a static function, but it needs to be
914  * accessible from at least sx.h.
915  */
916 void
917 _sx_sunlock_hard(struct sx *sx, const char *file, int line)
918 {
919         uintptr_t x;
920         int wakeup_swapper;
921
922         for (;;) {
923                 x = sx->sx_lock;
924
925                 /*
926                  * We should never have sharers while at least one thread
927                  * holds a shared lock.
928                  */
929                 KASSERT(!(x & SX_LOCK_SHARED_WAITERS),
930                     ("%s: waiting sharers", __func__));
931
932                 /*
933                  * See if there is more than one shared lock held.  If
934                  * so, just drop one and return.
935                  */
936                 if (SX_SHARERS(x) > 1) {
937                         if (atomic_cmpset_rel_ptr(&sx->sx_lock, x,
938                             x - SX_ONE_SHARER)) {
939                                 if (LOCK_LOG_TEST(&sx->lock_object, 0))
940                                         CTR4(KTR_LOCK,
941                                             "%s: %p succeeded %p -> %p",
942                                             __func__, sx, (void *)x,
943                                             (void *)(x - SX_ONE_SHARER));
944                                 break;
945                         }
946                         continue;
947                 }
948
949                 /*
950                  * If there aren't any waiters for an exclusive lock,
951                  * then try to drop it quickly.
952                  */
953                 if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
954                         MPASS(x == SX_SHARERS_LOCK(1));
955                         if (atomic_cmpset_rel_ptr(&sx->sx_lock,
956                             SX_SHARERS_LOCK(1), SX_LOCK_UNLOCKED)) {
957                                 if (LOCK_LOG_TEST(&sx->lock_object, 0))
958                                         CTR2(KTR_LOCK, "%s: %p last succeeded",
959                                             __func__, sx);
960                                 break;
961                         }
962                         continue;
963                 }
964
965                 /*
966                  * At this point, there should just be one sharer with
967                  * exclusive waiters.
968                  */
969                 MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
970
971                 sleepq_lock(&sx->lock_object);
972
973                 /*
974                  * Wake up semantic here is quite simple:
975                  * Just wake up all the exclusive waiters.
976                  * Note that the state of the lock could have changed,
977                  * so if it fails loop back and retry.
978                  */
979                 if (!atomic_cmpset_rel_ptr(&sx->sx_lock,
980                     SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS,
981                     SX_LOCK_UNLOCKED)) {
982                         sleepq_release(&sx->lock_object);
983                         continue;
984                 }
985                 if (LOCK_LOG_TEST(&sx->lock_object, 0))
986                         CTR2(KTR_LOCK, "%s: %p waking up all thread on"
987                             "exclusive queue", __func__, sx);
988                 wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
989                     0, SQ_EXCLUSIVE_QUEUE);
990                 sleepq_release(&sx->lock_object);
991                 if (wakeup_swapper)
992                         kick_proc0();
993                 break;
994         }
995 }
996
997 #ifdef INVARIANT_SUPPORT
998 #ifndef INVARIANTS
999 #undef  _sx_assert
1000 #endif
1001
1002 /*
1003  * In the non-WITNESS case, sx_assert() can only detect that at least
1004  * *some* thread owns an slock, but it cannot guarantee that *this*
1005  * thread owns an slock.
1006  */
1007 void
1008 _sx_assert(struct sx *sx, int what, const char *file, int line)
1009 {
1010 #ifndef WITNESS
1011         int slocked = 0;
1012 #endif
1013
1014         if (panicstr != NULL)
1015                 return;
1016         switch (what) {
1017         case SA_SLOCKED:
1018         case SA_SLOCKED | SA_NOTRECURSED:
1019         case SA_SLOCKED | SA_RECURSED:
1020 #ifndef WITNESS
1021                 slocked = 1;
1022                 /* FALLTHROUGH */
1023 #endif
1024         case SA_LOCKED:
1025         case SA_LOCKED | SA_NOTRECURSED:
1026         case SA_LOCKED | SA_RECURSED:
1027 #ifdef WITNESS
1028                 witness_assert(&sx->lock_object, what, file, line);
1029 #else
1030                 /*
1031                  * If some other thread has an exclusive lock or we
1032                  * have one and are asserting a shared lock, fail.
1033                  * Also, if no one has a lock at all, fail.
1034                  */
1035                 if (sx->sx_lock == SX_LOCK_UNLOCKED ||
1036                     (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
1037                     sx_xholder(sx) != curthread)))
1038                         panic("Lock %s not %slocked @ %s:%d\n",
1039                             sx->lock_object.lo_name, slocked ? "share " : "",
1040                             file, line);
1041
1042                 if (!(sx->sx_lock & SX_LOCK_SHARED)) {
1043                         if (sx_recursed(sx)) {
1044                                 if (what & SA_NOTRECURSED)
1045                                         panic("Lock %s recursed @ %s:%d\n",
1046                                             sx->lock_object.lo_name, file,
1047                                             line);
1048                         } else if (what & SA_RECURSED)
1049                                 panic("Lock %s not recursed @ %s:%d\n",
1050                                     sx->lock_object.lo_name, file, line);
1051                 }
1052 #endif
1053                 break;
1054         case SA_XLOCKED:
1055         case SA_XLOCKED | SA_NOTRECURSED:
1056         case SA_XLOCKED | SA_RECURSED:
1057                 if (sx_xholder(sx) != curthread)
1058                         panic("Lock %s not exclusively locked @ %s:%d\n",
1059                             sx->lock_object.lo_name, file, line);
1060                 if (sx_recursed(sx)) {
1061                         if (what & SA_NOTRECURSED)
1062                                 panic("Lock %s recursed @ %s:%d\n",
1063                                     sx->lock_object.lo_name, file, line);
1064                 } else if (what & SA_RECURSED)
1065                         panic("Lock %s not recursed @ %s:%d\n",
1066                             sx->lock_object.lo_name, file, line);
1067                 break;
1068         case SA_UNLOCKED:
1069 #ifdef WITNESS
1070                 witness_assert(&sx->lock_object, what, file, line);
1071 #else
1072                 /*
1073                  * If we hold an exclusve lock fail.  We can't
1074                  * reliably check to see if we hold a shared lock or
1075                  * not.
1076                  */
1077                 if (sx_xholder(sx) == curthread)
1078                         panic("Lock %s exclusively locked @ %s:%d\n",
1079                             sx->lock_object.lo_name, file, line);
1080 #endif
1081                 break;
1082         default:
1083                 panic("Unknown sx lock assertion: %d @ %s:%d", what, file,
1084                     line);
1085         }
1086 }
1087 #endif  /* INVARIANT_SUPPORT */
1088
1089 #ifdef DDB
1090 static void
1091 db_show_sx(struct lock_object *lock)
1092 {
1093         struct thread *td;
1094         struct sx *sx;
1095
1096         sx = (struct sx *)lock;
1097
1098         db_printf(" state: ");
1099         if (sx->sx_lock == SX_LOCK_UNLOCKED)
1100                 db_printf("UNLOCKED\n");
1101         else if (sx->sx_lock == SX_LOCK_DESTROYED) {
1102                 db_printf("DESTROYED\n");
1103                 return;
1104         } else if (sx->sx_lock & SX_LOCK_SHARED)
1105                 db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
1106         else {
1107                 td = sx_xholder(sx);
1108                 db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1109                     td->td_tid, td->td_proc->p_pid, td->td_name);
1110                 if (sx_recursed(sx))
1111                         db_printf(" recursed: %d\n", sx->sx_recurse);
1112         }
1113
1114         db_printf(" waiters: ");
1115         switch(sx->sx_lock &
1116             (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
1117         case SX_LOCK_SHARED_WAITERS:
1118                 db_printf("shared\n");
1119                 break;
1120         case SX_LOCK_EXCLUSIVE_WAITERS:
1121                 db_printf("exclusive\n");
1122                 break;
1123         case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
1124                 db_printf("exclusive and shared\n");
1125                 break;
1126         default:
1127                 db_printf("none\n");
1128         }
1129 }
1130
1131 /*
1132  * Check to see if a thread that is blocked on a sleep queue is actually
1133  * blocked on an sx lock.  If so, output some details and return true.
1134  * If the lock has an exclusive owner, return that in *ownerp.
1135  */
1136 int
1137 sx_chain(struct thread *td, struct thread **ownerp)
1138 {
1139         struct sx *sx;
1140
1141         /*
1142          * Check to see if this thread is blocked on an sx lock.
1143          * First, we check the lock class.  If that is ok, then we
1144          * compare the lock name against the wait message.
1145          */
1146         sx = td->td_wchan;
1147         if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
1148             sx->lock_object.lo_name != td->td_wmesg)
1149                 return (0);
1150
1151         /* We think we have an sx lock, so output some details. */
1152         db_printf("blocked on sx \"%s\" ", td->td_wmesg);
1153         *ownerp = sx_xholder(sx);
1154         if (sx->sx_lock & SX_LOCK_SHARED)
1155                 db_printf("SLOCK (count %ju)\n",
1156                     (uintmax_t)SX_SHARERS(sx->sx_lock));
1157         else
1158                 db_printf("XLOCK\n");
1159         return (1);
1160 }
1161 #endif