]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libthr/thread/thr_mutex.c
zfs: merge openzfs/zfs@a382e2119
[FreeBSD/FreeBSD.git] / lib / libthr / thread / thr_mutex.c
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
5  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
6  * Copyright (c) 2015, 2016 The FreeBSD Foundation
7  *
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Konstantin Belousov
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *      This product includes software developed by John Birrell.
24  * 4. Neither the name of the author nor the names of any co-contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  */
40
41 #include <sys/cdefs.h>
42 #include "namespace.h"
43 #include <stdlib.h>
44 #include <errno.h>
45 #include <string.h>
46 #include <sys/param.h>
47 #include <sys/queue.h>
48 #include <pthread.h>
49 #include <pthread_np.h>
50 #include "un-namespace.h"
51
52 #include "thr_private.h"
53
54 _Static_assert(sizeof(struct pthread_mutex) <= THR_PAGE_SIZE_MIN,
55     "pthread_mutex is too large for off-page");
56
57 /*
58  * For adaptive mutexes, how many times to spin doing trylock2
59  * before entering the kernel to block
60  */
61 #define MUTEX_ADAPTIVE_SPINS    2000
62
63 /*
64  * Prototypes
65  */
66 int     __pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
67                 const struct timespec * __restrict abstime);
68 int     _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
69 int     _pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
70 int     __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
71 int     _pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
72 int     _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
73 int     __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
74
75 static int      mutex_self_trylock(pthread_mutex_t);
76 static int      mutex_self_lock(pthread_mutex_t,
77                                 const struct timespec *abstime);
78 static int      mutex_unlock_common(struct pthread_mutex *, bool, int *);
79 static int      mutex_lock_sleep(struct pthread *, pthread_mutex_t,
80                                 const struct timespec *);
81 static void     mutex_init_robust(struct pthread *curthread);
82 static int      mutex_qidx(struct pthread_mutex *m);
83 static bool     is_robust_mutex(struct pthread_mutex *m);
84 static bool     is_pshared_mutex(struct pthread_mutex *m);
85
86 __weak_reference(__Tthr_mutex_init, pthread_mutex_init);
87 __weak_reference(__Tthr_mutex_init, __pthread_mutex_init);
88 __strong_reference(__Tthr_mutex_init, _pthread_mutex_init);
89 __weak_reference(__Tthr_mutex_lock, pthread_mutex_lock);
90 __weak_reference(__Tthr_mutex_lock, __pthread_mutex_lock);
91 __strong_reference(__Tthr_mutex_lock, _pthread_mutex_lock);
92 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
93 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
94 __weak_reference(__Tthr_mutex_trylock, pthread_mutex_trylock);
95 __weak_reference(__Tthr_mutex_trylock, __pthread_mutex_trylock);
96 __strong_reference(__Tthr_mutex_trylock, _pthread_mutex_trylock);
97 __weak_reference(_Tthr_mutex_consistent, pthread_mutex_consistent);
98 __weak_reference(_Tthr_mutex_consistent, _pthread_mutex_consistent);
99 __strong_reference(_Tthr_mutex_consistent, __pthread_mutex_consistent);
100
101 /* Single underscore versions provided for libc internal usage: */
102 /* No difference between libc and application usage of these: */
103 __weak_reference(_thr_mutex_destroy, pthread_mutex_destroy);
104 __weak_reference(_thr_mutex_destroy, _pthread_mutex_destroy);
105 __weak_reference(_thr_mutex_unlock, pthread_mutex_unlock);
106 __weak_reference(_thr_mutex_unlock, _pthread_mutex_unlock);
107
108 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
109 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
110
111 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
112 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
113 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
114
115 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
116 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
117 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
118 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
119
120 static void
121 mutex_init_link(struct pthread_mutex *m __unused)
122 {
123
124 #if defined(_PTHREADS_INVARIANTS)
125         m->m_qe.tqe_prev = NULL;
126         m->m_qe.tqe_next = NULL;
127         m->m_pqe.tqe_prev = NULL;
128         m->m_pqe.tqe_next = NULL;
129 #endif
130 }
131
132 static void
133 mutex_assert_is_owned(struct pthread_mutex *m __unused)
134 {
135
136 #if defined(_PTHREADS_INVARIANTS)
137         if (__predict_false(m->m_qe.tqe_prev == NULL))
138                 PANIC("mutex %p own %#x is not on list %p %p",
139                     m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
140 #endif
141 }
142
143 static void
144 mutex_assert_not_owned(struct pthread *curthread __unused,
145     struct pthread_mutex *m __unused)
146 {
147
148 #if defined(_PTHREADS_INVARIANTS)
149         if (__predict_false(m->m_qe.tqe_prev != NULL ||
150             m->m_qe.tqe_next != NULL))
151                 PANIC("mutex %p own %#x is on list %p %p",
152                     m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
153         if (__predict_false(is_robust_mutex(m) &&
154             (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
155             (is_pshared_mutex(m) && curthread->robust_list ==
156             (uintptr_t)&m->m_lock) ||
157             (!is_pshared_mutex(m) && curthread->priv_robust_list ==
158             (uintptr_t)&m->m_lock))))
159                 PANIC(
160     "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
161                     m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
162                     m->m_rb_prev, (void *)curthread->robust_list,
163                     (void *)curthread->priv_robust_list);
164 #endif
165 }
166
167 static bool
168 is_pshared_mutex(struct pthread_mutex *m)
169 {
170
171         return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
172 }
173
174 static bool
175 is_robust_mutex(struct pthread_mutex *m)
176 {
177
178         return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
179 }
180
181 int
182 _mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
183 {
184
185 #if defined(_PTHREADS_INVARIANTS)
186         if (__predict_false(curthread->inact_mtx != 0))
187                 PANIC("inact_mtx enter");
188 #endif
189         if (!is_robust_mutex(m))
190                 return (0);
191
192         mutex_init_robust(curthread);
193         curthread->inact_mtx = (uintptr_t)&m->m_lock;
194         return (1);
195 }
196
197 void
198 _mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
199 {
200
201 #if defined(_PTHREADS_INVARIANTS)
202         if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
203                 PANIC("inact_mtx leave");
204 #endif
205         curthread->inact_mtx = 0;
206 }
207
208 static int
209 mutex_check_attr(const struct pthread_mutex_attr *attr)
210 {
211
212         if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
213             attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
214                 return (EINVAL);
215         if (attr->m_protocol < PTHREAD_PRIO_NONE ||
216             attr->m_protocol > PTHREAD_PRIO_PROTECT)
217                 return (EINVAL);
218         return (0);
219 }
220
221 static void
222 mutex_init_robust(struct pthread *curthread)
223 {
224         struct umtx_robust_lists_params rb;
225
226         if (curthread == NULL)
227                 curthread = _get_curthread();
228         if (curthread->robust_inited)
229                 return;
230         rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
231         rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
232         rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
233         _umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
234         curthread->robust_inited = 1;
235 }
236
237 static void
238 mutex_init_body(struct pthread_mutex *pmutex,
239     const struct pthread_mutex_attr *attr)
240 {
241
242         pmutex->m_flags = attr->m_type;
243         pmutex->m_count = 0;
244         pmutex->m_spinloops = 0;
245         pmutex->m_yieldloops = 0;
246         mutex_init_link(pmutex);
247         switch (attr->m_protocol) {
248         case PTHREAD_PRIO_NONE:
249                 pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
250                 pmutex->m_lock.m_flags = 0;
251                 break;
252         case PTHREAD_PRIO_INHERIT:
253                 pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
254                 pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
255                 break;
256         case PTHREAD_PRIO_PROTECT:
257                 pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
258                 pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
259                 pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
260                 break;
261         }
262         if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
263                 pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
264         if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
265                 mutex_init_robust(NULL);
266                 pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
267         }
268         if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
269                 pmutex->m_spinloops =
270                     _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
271                 pmutex->m_yieldloops = _thr_yieldloops;
272         }
273 }
274
275 static int
276 mutex_init(pthread_mutex_t *mutex,
277     const struct pthread_mutex_attr *mutex_attr,
278     void *(calloc_cb)(size_t, size_t))
279 {
280         const struct pthread_mutex_attr *attr;
281         struct pthread_mutex *pmutex;
282         int error;
283
284         if (mutex_attr == NULL) {
285                 attr = &_pthread_mutexattr_default;
286         } else {
287                 attr = mutex_attr;
288                 error = mutex_check_attr(attr);
289                 if (error != 0)
290                         return (error);
291         }
292         if ((pmutex = (pthread_mutex_t)calloc_cb(1,
293             sizeof(struct pthread_mutex))) == NULL)
294                 return (ENOMEM);
295         mutex_init_body(pmutex, attr);
296         *mutex = pmutex;
297         return (0);
298 }
299
300 static int
301 init_static(struct pthread *thread, pthread_mutex_t *mutex)
302 {
303         int ret;
304
305         THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
306
307         if (*mutex == THR_MUTEX_INITIALIZER)
308                 ret = mutex_init(mutex, &_pthread_mutexattr_default,
309                     __thr_calloc);
310         else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
311                 ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
312                     __thr_calloc);
313         else
314                 ret = 0;
315         THR_LOCK_RELEASE(thread, &_mutex_static_lock);
316
317         return (ret);
318 }
319
320 static void
321 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
322 {
323         struct pthread_mutex *m2;
324
325         m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
326         if (m2 != NULL)
327                 m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
328         else
329                 m->m_lock.m_ceilings[1] = -1;
330 }
331
332 static void
333 shared_mutex_init(struct pthread_mutex *pmtx, const struct
334     pthread_mutex_attr *mutex_attr)
335 {
336         static const struct pthread_mutex_attr foobar_mutex_attr = {
337                 .m_type = PTHREAD_MUTEX_DEFAULT,
338                 .m_protocol = PTHREAD_PRIO_NONE,
339                 .m_ceiling = 0,
340                 .m_pshared = PTHREAD_PROCESS_SHARED,
341                 .m_robust = PTHREAD_MUTEX_STALLED,
342         };
343         bool done;
344
345         /*
346          * Hack to allow multiple pthread_mutex_init() calls on the
347          * same process-shared mutex.  We rely on kernel allocating
348          * zeroed offpage for the mutex, i.e. the
349          * PMUTEX_INITSTAGE_ALLOC value must be zero.
350          */
351         for (done = false; !done;) {
352                 switch (pmtx->m_ps) {
353                 case PMUTEX_INITSTAGE_DONE:
354                         atomic_thread_fence_acq();
355                         done = true;
356                         break;
357                 case PMUTEX_INITSTAGE_ALLOC:
358                         if (atomic_cmpset_int(&pmtx->m_ps,
359                             PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
360                                 if (mutex_attr == NULL)
361                                         mutex_attr = &foobar_mutex_attr;
362                                 mutex_init_body(pmtx, mutex_attr);
363                                 atomic_store_rel_int(&pmtx->m_ps,
364                                     PMUTEX_INITSTAGE_DONE);
365                                 done = true;
366                         }
367                         break;
368                 case PMUTEX_INITSTAGE_BUSY:
369                         _pthread_yield();
370                         break;
371                 default:
372                         PANIC("corrupted offpage");
373                         break;
374                 }
375         }
376 }
377
378 int
379 __Tthr_mutex_init(pthread_mutex_t * __restrict mutex,
380     const pthread_mutexattr_t * __restrict mutex_attr)
381 {
382         struct pthread_mutex *pmtx;
383         int ret;
384
385         _thr_check_init();
386
387         if (mutex_attr != NULL) {
388                 ret = mutex_check_attr(*mutex_attr);
389                 if (ret != 0)
390                         return (ret);
391         }
392         if (mutex_attr == NULL ||
393             (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
394                 __thr_malloc_init();
395                 return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
396                     __thr_calloc));
397         }
398         pmtx = __thr_pshared_offpage(__DECONST(void *, mutex), 1);
399         if (pmtx == NULL)
400                 return (EFAULT);
401         *mutex = THR_PSHARED_PTR;
402         shared_mutex_init(pmtx, *mutex_attr);
403         return (0);
404 }
405
406 /* This function is used internally by malloc. */
407 int
408 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
409     void *(calloc_cb)(size_t, size_t))
410 {
411         static const struct pthread_mutex_attr attr = {
412                 .m_type = PTHREAD_MUTEX_NORMAL,
413                 .m_protocol = PTHREAD_PRIO_NONE,
414                 .m_ceiling = 0,
415                 .m_pshared = PTHREAD_PROCESS_PRIVATE,
416                 .m_robust = PTHREAD_MUTEX_STALLED,
417         };
418         int ret;
419
420         ret = mutex_init(mutex, &attr, calloc_cb);
421         if (ret == 0)
422                 (*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
423         return (ret);
424 }
425
426 /*
427  * Fix mutex ownership for child process.
428  *
429  * Process private mutex ownership is transmitted from the forking
430  * thread to the child process.
431  *
432  * Process shared mutex should not be inherited because owner is
433  * forking thread which is in parent process, they are removed from
434  * the owned mutex list.
435  */
436 static void
437 queue_fork(struct pthread *curthread, struct mutex_queue *q,
438     struct mutex_queue *qp, uint bit)
439 {
440         struct pthread_mutex *m;
441
442         TAILQ_INIT(q);
443         TAILQ_FOREACH(m, qp, m_pqe) {
444                 TAILQ_INSERT_TAIL(q, m, m_qe);
445                 m->m_lock.m_owner = TID(curthread) | bit;
446         }
447 }
448
449 void
450 _mutex_fork(struct pthread *curthread)
451 {
452
453         queue_fork(curthread, &curthread->mq[TMQ_NORM],
454             &curthread->mq[TMQ_NORM_PRIV], 0);
455         queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
456             &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
457         queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
458             &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
459         curthread->robust_list = 0;
460 }
461
462 int
463 _thr_mutex_destroy(pthread_mutex_t *mutex)
464 {
465         pthread_mutex_t m, m1;
466         int ret;
467
468         m = *mutex;
469         if (m < THR_MUTEX_DESTROYED) {
470                 ret = 0;
471         } else if (m == THR_MUTEX_DESTROYED) {
472                 ret = EINVAL;
473         } else {
474                 if (m == THR_PSHARED_PTR) {
475                         m1 = __thr_pshared_offpage(mutex, 0);
476                         if (m1 != NULL) {
477                                 if ((uint32_t)m1->m_lock.m_owner !=
478                                     UMUTEX_RB_OWNERDEAD) {
479                                         mutex_assert_not_owned(
480                                             _get_curthread(), m1);
481                                 }
482                                 __thr_pshared_destroy(mutex);
483                         }
484                         *mutex = THR_MUTEX_DESTROYED;
485                         return (0);
486                 }
487                 if (PMUTEX_OWNER_ID(m) != 0 &&
488                     (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
489                         ret = EBUSY;
490                 } else {
491                         *mutex = THR_MUTEX_DESTROYED;
492                         mutex_assert_not_owned(_get_curthread(), m);
493                         __thr_free(m);
494                         ret = 0;
495                 }
496         }
497
498         return (ret);
499 }
500
501 static int
502 mutex_qidx(struct pthread_mutex *m)
503 {
504
505         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
506                 return (TMQ_NORM);
507         return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
508 }
509
510 /*
511  * Both enqueue_mutex() and dequeue_mutex() operate on the
512  * thread-private linkage of the locked mutexes and on the robust
513  * linkage.
514  *
515  * Robust list, as seen by kernel, must be consistent even in the case
516  * of thread termination at arbitrary moment.  Since either enqueue or
517  * dequeue for list walked by kernel consists of rewriting a single
518  * forward pointer, it is safe.  On the other hand, rewrite of the
519  * back pointer is not atomic WRT the forward one, but kernel does not
520  * care.
521  */
522 static void
523 enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
524     int error)
525 {
526         struct pthread_mutex *m1;
527         uintptr_t *rl;
528         int qidx;
529
530         /* Add to the list of owned mutexes: */
531         if (error != EOWNERDEAD)
532                 mutex_assert_not_owned(curthread, m);
533         qidx = mutex_qidx(m);
534         TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
535         if (!is_pshared_mutex(m))
536                 TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
537         if (is_robust_mutex(m)) {
538                 rl = is_pshared_mutex(m) ? &curthread->robust_list :
539                     &curthread->priv_robust_list;
540                 m->m_rb_prev = NULL;
541                 if (*rl != 0) {
542                         m1 = __containerof((void *)*rl,
543                             struct pthread_mutex, m_lock);
544                         m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
545                         m1->m_rb_prev = m;
546                 } else {
547                         m1 = NULL;
548                         m->m_lock.m_rb_lnk = 0;
549                 }
550                 *rl = (uintptr_t)&m->m_lock;
551         }
552 }
553
554 static void
555 dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
556 {
557         struct pthread_mutex *mp, *mn;
558         int qidx;
559
560         mutex_assert_is_owned(m);
561         qidx = mutex_qidx(m);
562         if (is_robust_mutex(m)) {
563                 mp = m->m_rb_prev;
564                 if (mp == NULL) {
565                         if (is_pshared_mutex(m)) {
566                                 curthread->robust_list = m->m_lock.m_rb_lnk;
567                         } else {
568                                 curthread->priv_robust_list =
569                                     m->m_lock.m_rb_lnk;
570                         }
571                 } else {
572                         mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
573                 }
574                 if (m->m_lock.m_rb_lnk != 0) {
575                         mn = __containerof((void *)m->m_lock.m_rb_lnk,
576                             struct pthread_mutex, m_lock);
577                         mn->m_rb_prev = m->m_rb_prev;
578                 }
579                 m->m_lock.m_rb_lnk = 0;
580                 m->m_rb_prev = NULL;
581         }
582         TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
583         if (!is_pshared_mutex(m))
584                 TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
585         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
586                 set_inherited_priority(curthread, m);
587         mutex_init_link(m);
588 }
589
590 static int
591 check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
592 {
593         int ret;
594
595         *m = *mutex;
596         ret = 0;
597         if (__predict_false(*m == THR_PSHARED_PTR)) {
598                 *m = __thr_pshared_offpage(mutex, 0);
599                 if (*m == NULL)
600                         ret = EINVAL;
601                 else
602                         shared_mutex_init(*m, NULL);
603         } else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
604                 if (*m == THR_MUTEX_DESTROYED) {
605                         ret = EINVAL;
606                 } else {
607                         ret = init_static(_get_curthread(), mutex);
608                         if (ret == 0)
609                                 *m = *mutex;
610                 }
611         }
612         return (ret);
613 }
614
615 int
616 __Tthr_mutex_trylock(pthread_mutex_t *mutex)
617 {
618         struct pthread *curthread;
619         struct pthread_mutex *m;
620         uint32_t id;
621         int ret, robust;
622
623         ret = check_and_init_mutex(mutex, &m);
624         if (ret != 0)
625                 return (ret);
626         curthread = _get_curthread();
627         id = TID(curthread);
628         if (m->m_flags & PMUTEX_FLAG_PRIVATE)
629                 THR_CRITICAL_ENTER(curthread);
630         robust = _mutex_enter_robust(curthread, m);
631         ret = _thr_umutex_trylock(&m->m_lock, id);
632         if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
633                 enqueue_mutex(curthread, m, ret);
634                 if (ret == EOWNERDEAD)
635                         m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
636         } else if (PMUTEX_OWNER_ID(m) == id) {
637                 ret = mutex_self_trylock(m);
638         } /* else {} */
639         if (robust)
640                 _mutex_leave_robust(curthread, m);
641         if (ret != 0 && ret != EOWNERDEAD &&
642             (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
643                 THR_CRITICAL_LEAVE(curthread);
644         return (ret);
645 }
646
647 static int
648 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
649     const struct timespec *abstime)
650 {
651         uint32_t id, owner;
652         int count, ret;
653
654         id = TID(curthread);
655         if (PMUTEX_OWNER_ID(m) == id)
656                 return (mutex_self_lock(m, abstime));
657
658         /*
659          * For adaptive mutexes, spin for a bit in the expectation
660          * that if the application requests this mutex type then
661          * the lock is likely to be released quickly and it is
662          * faster than entering the kernel
663          */
664         if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
665             UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
666                 goto sleep_in_kernel;
667
668         if (!_thr_is_smp)
669                 goto yield_loop;
670
671         count = m->m_spinloops;
672         while (count--) {
673                 owner = m->m_lock.m_owner;
674                 if ((owner & ~UMUTEX_CONTESTED) == 0) {
675                         if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
676                             id | owner)) {
677                                 ret = 0;
678                                 goto done;
679                         }
680                 }
681                 CPU_SPINWAIT;
682         }
683
684 yield_loop:
685         count = m->m_yieldloops;
686         while (count--) {
687                 _sched_yield();
688                 owner = m->m_lock.m_owner;
689                 if ((owner & ~UMUTEX_CONTESTED) == 0) {
690                         if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
691                             id | owner)) {
692                                 ret = 0;
693                                 goto done;
694                         }
695                 }
696         }
697
698 sleep_in_kernel:
699         if (abstime == NULL)
700                 ret = __thr_umutex_lock(&m->m_lock, id);
701         else if (__predict_false(abstime->tv_nsec < 0 ||
702             abstime->tv_nsec >= 1000000000))
703                 ret = EINVAL;
704         else
705                 ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
706 done:
707         if (ret == 0 || ret == EOWNERDEAD) {
708                 enqueue_mutex(curthread, m, ret);
709                 if (ret == EOWNERDEAD)
710                         m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
711         }
712         return (ret);
713 }
714
715 static __always_inline int
716 mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
717     bool cvattach, bool rb_onlist)
718 {
719         struct pthread *curthread;
720         int ret, robust;
721
722         robust = 0;  /* pacify gcc */
723         curthread  = _get_curthread();
724         if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
725                 THR_CRITICAL_ENTER(curthread);
726         if (!rb_onlist)
727                 robust = _mutex_enter_robust(curthread, m);
728         ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
729         if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
730                 enqueue_mutex(curthread, m, ret);
731                 if (ret == EOWNERDEAD)
732                         m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
733         } else {
734                 ret = mutex_lock_sleep(curthread, m, abstime);
735         }
736         if (!rb_onlist && robust)
737                 _mutex_leave_robust(curthread, m);
738         if (ret != 0 && ret != EOWNERDEAD &&
739             (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
740                 THR_CRITICAL_LEAVE(curthread);
741         return (ret);
742 }
743
744 int
745 __Tthr_mutex_lock(pthread_mutex_t *mutex)
746 {
747         struct pthread_mutex *m;
748         int ret;
749
750         _thr_check_init();
751         ret = check_and_init_mutex(mutex, &m);
752         if (ret == 0)
753                 ret = mutex_lock_common(m, NULL, false, false);
754         return (ret);
755 }
756
757 int
758 __pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
759     const struct timespec * __restrict abstime)
760 {
761         struct pthread_mutex *m;
762         int ret;
763
764         _thr_check_init();
765         ret = check_and_init_mutex(mutex, &m);
766         if (ret == 0)
767                 ret = mutex_lock_common(m, abstime, false, false);
768         return (ret);
769 }
770
771 int
772 _thr_mutex_unlock(pthread_mutex_t *mutex)
773 {
774         struct pthread_mutex *mp;
775
776         if (*mutex == THR_PSHARED_PTR) {
777                 mp = __thr_pshared_offpage(mutex, 0);
778                 if (mp == NULL)
779                         return (EINVAL);
780                 shared_mutex_init(mp, NULL);
781         } else {
782                 mp = *mutex;
783         }
784         return (mutex_unlock_common(mp, false, NULL));
785 }
786
787 int
788 _mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
789 {
790         int error;
791
792         error = mutex_lock_common(m, NULL, true, rb_onlist);
793         if (error == 0 || error == EOWNERDEAD)
794                 m->m_count = count;
795         return (error);
796 }
797
798 int
799 _mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
800 {
801
802         /*
803          * Clear the count in case this is a recursive mutex.
804          */
805         *count = m->m_count;
806         m->m_count = 0;
807         (void)mutex_unlock_common(m, true, defer);
808         return (0);
809 }
810
811 int
812 _mutex_cv_attach(struct pthread_mutex *m, int count)
813 {
814         struct pthread *curthread;
815
816         curthread = _get_curthread();
817         enqueue_mutex(curthread, m, 0);
818         m->m_count = count;
819         return (0);
820 }
821
822 int
823 _mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
824 {
825         struct pthread *curthread;
826         int deferred, error;
827
828         curthread = _get_curthread();
829         if ((error = _mutex_owned(curthread, mp)) != 0)
830                 return (error);
831
832         /*
833          * Clear the count in case this is a recursive mutex.
834          */
835         *recurse = mp->m_count;
836         mp->m_count = 0;
837         dequeue_mutex(curthread, mp);
838
839         /* Will this happen in real-world ? */
840         if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
841                 deferred = 1;
842                 mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
843         } else
844                 deferred = 0;
845
846         if (deferred)  {
847                 _thr_wake_all(curthread->defer_waiters,
848                     curthread->nwaiter_defer);
849                 curthread->nwaiter_defer = 0;
850         }
851         return (0);
852 }
853
854 static int
855 mutex_self_trylock(struct pthread_mutex *m)
856 {
857         int ret;
858
859         switch (PMUTEX_TYPE(m->m_flags)) {
860         case PTHREAD_MUTEX_ERRORCHECK:
861         case PTHREAD_MUTEX_NORMAL:
862         case PTHREAD_MUTEX_ADAPTIVE_NP:
863                 ret = EBUSY;
864                 break;
865
866         case PTHREAD_MUTEX_RECURSIVE:
867                 /* Increment the lock count: */
868                 if (m->m_count + 1 > 0) {
869                         m->m_count++;
870                         ret = 0;
871                 } else
872                         ret = EAGAIN;
873                 break;
874
875         default:
876                 /* Trap invalid mutex types; */
877                 ret = EINVAL;
878         }
879
880         return (ret);
881 }
882
883 static int
884 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
885 {
886         struct timespec ts1, ts2;
887         int ret;
888
889         switch (PMUTEX_TYPE(m->m_flags)) {
890         case PTHREAD_MUTEX_ERRORCHECK:
891         case PTHREAD_MUTEX_ADAPTIVE_NP:
892                 if (abstime) {
893                         if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
894                             abstime->tv_nsec >= 1000000000) {
895                                 ret = EINVAL;
896                         } else {
897                                 clock_gettime(CLOCK_REALTIME, &ts1);
898                                 TIMESPEC_SUB(&ts2, abstime, &ts1);
899                                 __sys_nanosleep(&ts2, NULL);
900                                 ret = ETIMEDOUT;
901                         }
902                 } else {
903                         /*
904                          * POSIX specifies that mutexes should return
905                          * EDEADLK if a recursive lock is detected.
906                          */
907                         ret = EDEADLK; 
908                 }
909                 break;
910
911         case PTHREAD_MUTEX_NORMAL:
912                 /*
913                  * What SS2 define as a 'normal' mutex.  Intentionally
914                  * deadlock on attempts to get a lock you already own.
915                  */
916                 ret = 0;
917                 if (abstime) {
918                         if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
919                             abstime->tv_nsec >= 1000000000) {
920                                 ret = EINVAL;
921                         } else {
922                                 clock_gettime(CLOCK_REALTIME, &ts1);
923                                 TIMESPEC_SUB(&ts2, abstime, &ts1);
924                                 __sys_nanosleep(&ts2, NULL);
925                                 ret = ETIMEDOUT;
926                         }
927                 } else {
928                         ts1.tv_sec = 30;
929                         ts1.tv_nsec = 0;
930                         for (;;)
931                                 __sys_nanosleep(&ts1, NULL);
932                 }
933                 break;
934
935         case PTHREAD_MUTEX_RECURSIVE:
936                 /* Increment the lock count: */
937                 if (m->m_count + 1 > 0) {
938                         m->m_count++;
939                         ret = 0;
940                 } else
941                         ret = EAGAIN;
942                 break;
943
944         default:
945                 /* Trap invalid mutex types; */
946                 ret = EINVAL;
947         }
948
949         return (ret);
950 }
951
952 static __always_inline int
953 mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
954 {
955         struct pthread *curthread;
956         uint32_t id;
957         int deferred, error, private, robust;
958
959         if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
960                 if (m == THR_MUTEX_DESTROYED)
961                         return (EINVAL);
962                 return (EPERM);
963         }
964
965         curthread = _get_curthread();
966         id = TID(curthread);
967
968         /*
969          * Check if the running thread is not the owner of the mutex.
970          */
971         if (__predict_false(PMUTEX_OWNER_ID(m) != id))
972                 return (EPERM);
973
974         error = 0;
975         private = (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0;
976         if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
977             PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
978                 m->m_count--;
979         } else {
980                 if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
981                         deferred = 1;
982                         m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
983                 } else
984                         deferred = 0;
985
986                 robust = _mutex_enter_robust(curthread, m);
987                 dequeue_mutex(curthread, m);
988                 error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
989                 if (deferred)  {
990                         if (mtx_defer == NULL) {
991                                 _thr_wake_all(curthread->defer_waiters,
992                                     curthread->nwaiter_defer);
993                                 curthread->nwaiter_defer = 0;
994                         } else
995                                 *mtx_defer = 1;
996                 }
997                 if (robust)
998                         _mutex_leave_robust(curthread, m);
999         }
1000         if (!cv && private)
1001                 THR_CRITICAL_LEAVE(curthread);
1002         return (error);
1003 }
1004
1005 int
1006 _pthread_mutex_getprioceiling(const pthread_mutex_t * __restrict mutex,
1007     int * __restrict prioceiling)
1008 {
1009         struct pthread_mutex *m;
1010
1011         if (*mutex == THR_PSHARED_PTR) {
1012                 m = __thr_pshared_offpage(__DECONST(void *, mutex), 0);
1013                 if (m == NULL)
1014                         return (EINVAL);
1015                 shared_mutex_init(m, NULL);
1016         } else {
1017                 m = *mutex;
1018                 if (m <= THR_MUTEX_DESTROYED)
1019                         return (EINVAL);
1020         }
1021         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1022                 return (EINVAL);
1023         *prioceiling = m->m_lock.m_ceilings[0];
1024         return (0);
1025 }
1026
1027 int
1028 _pthread_mutex_setprioceiling(pthread_mutex_t * __restrict mutex,
1029     int ceiling, int * __restrict old_ceiling)
1030 {
1031         struct pthread *curthread;
1032         struct pthread_mutex *m, *m1, *m2;
1033         struct mutex_queue *q, *qp;
1034         int qidx, ret;
1035
1036         if (*mutex == THR_PSHARED_PTR) {
1037                 m = __thr_pshared_offpage(mutex, 0);
1038                 if (m == NULL)
1039                         return (EINVAL);
1040                 shared_mutex_init(m, NULL);
1041         } else {
1042                 m = *mutex;
1043                 if (m <= THR_MUTEX_DESTROYED)
1044                         return (EINVAL);
1045         }
1046         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1047                 return (EINVAL);
1048
1049         ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1050         if (ret != 0)
1051                 return (ret);
1052
1053         curthread = _get_curthread();
1054         if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1055                 mutex_assert_is_owned(m);
1056                 m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1057                 m2 = TAILQ_NEXT(m, m_qe);
1058                 if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1059                     (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1060                         qidx = mutex_qidx(m);
1061                         q = &curthread->mq[qidx];
1062                         qp = &curthread->mq[qidx + 1];
1063                         TAILQ_REMOVE(q, m, m_qe);
1064                         if (!is_pshared_mutex(m))
1065                                 TAILQ_REMOVE(qp, m, m_pqe);
1066                         TAILQ_FOREACH(m2, q, m_qe) {
1067                                 if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1068                                         TAILQ_INSERT_BEFORE(m2, m, m_qe);
1069                                         if (!is_pshared_mutex(m)) {
1070                                                 while (m2 != NULL &&
1071                                                     is_pshared_mutex(m2)) {
1072                                                         m2 = TAILQ_PREV(m2,
1073                                                             mutex_queue, m_qe);
1074                                                 }
1075                                                 if (m2 == NULL) {
1076                                                         TAILQ_INSERT_HEAD(qp,
1077                                                             m, m_pqe);
1078                                                 } else {
1079                                                         TAILQ_INSERT_BEFORE(m2,
1080                                                             m, m_pqe);
1081                                                 }
1082                                         }
1083                                         return (0);
1084                                 }
1085                         }
1086                         TAILQ_INSERT_TAIL(q, m, m_qe);
1087                         if (!is_pshared_mutex(m))
1088                                 TAILQ_INSERT_TAIL(qp, m, m_pqe);
1089                 }
1090         }
1091         return (0);
1092 }
1093
1094 int
1095 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1096 {
1097         struct pthread_mutex *m;
1098         int ret;
1099
1100         ret = check_and_init_mutex(mutex, &m);
1101         if (ret == 0)
1102                 *count = m->m_spinloops;
1103         return (ret);
1104 }
1105
1106 int
1107 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1108 {
1109         struct pthread_mutex *m;
1110         int ret;
1111
1112         ret = check_and_init_mutex(mutex, &m);
1113         if (ret == 0)
1114                 m->m_spinloops = count;
1115         return (ret);
1116 }
1117
1118 int
1119 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1120 {
1121         struct pthread_mutex *m;
1122         int ret;
1123
1124         ret = check_and_init_mutex(mutex, &m);
1125         if (ret == 0)
1126                 *count = m->m_yieldloops;
1127         return (ret);
1128 }
1129
1130 int
1131 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1132 {
1133         struct pthread_mutex *m;
1134         int ret;
1135
1136         ret = check_and_init_mutex(mutex, &m);
1137         if (ret == 0)
1138                 m->m_yieldloops = count;
1139         return (0);
1140 }
1141
1142 int
1143 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1144 {
1145         struct pthread_mutex *m;
1146
1147         if (*mutex == THR_PSHARED_PTR) {
1148                 m = __thr_pshared_offpage(mutex, 0);
1149                 if (m == NULL)
1150                         return (0);
1151                 shared_mutex_init(m, NULL);
1152         } else {
1153                 m = *mutex;
1154                 if (m <= THR_MUTEX_DESTROYED)
1155                         return (0);
1156         }
1157         return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1158 }
1159
1160 int
1161 _mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1162 {
1163
1164         if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1165                 if (mp == THR_MUTEX_DESTROYED)
1166                         return (EINVAL);
1167                 return (EPERM);
1168         }
1169         if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1170                 return (EPERM);
1171         return (0);                  
1172 }
1173
1174 int
1175 _Tthr_mutex_consistent(pthread_mutex_t *mutex)
1176 {
1177         struct pthread_mutex *m;
1178         struct pthread *curthread;
1179
1180         if (*mutex == THR_PSHARED_PTR) {
1181                 m = __thr_pshared_offpage(mutex, 0);
1182                 if (m == NULL)
1183                         return (EINVAL);
1184                 shared_mutex_init(m, NULL);
1185         } else {
1186                 m = *mutex;
1187                 if (m <= THR_MUTEX_DESTROYED)
1188                         return (EINVAL);
1189         }
1190         curthread = _get_curthread();
1191         if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1192             (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1193                 return (EINVAL);
1194         if (PMUTEX_OWNER_ID(m) != TID(curthread))
1195                 return (EPERM);
1196         m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1197         return (0);
1198 }