]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libthr/thread/thr_mutex.c
MFV r302003,r302037,r302038,r302056:
[FreeBSD/FreeBSD.git] / lib / libthr / thread / thr_mutex.c
1 /*
2  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
3  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
4  * Copyright (c) 2015, 2016 The FreeBSD Foundation
5  *
6  * All rights reserved.
7  *
8  * Portions of this software were developed by Konstantin Belousov
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *      This product includes software developed by John Birrell.
22  * 4. Neither the name of the author nor the names of any co-contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41
42 #include "namespace.h"
43 #include <stdlib.h>
44 #include <errno.h>
45 #include <string.h>
46 #include <sys/param.h>
47 #include <sys/queue.h>
48 #include <pthread.h>
49 #include <pthread_np.h>
50 #include "un-namespace.h"
51
52 #include "thr_private.h"
53
54 _Static_assert(sizeof(struct pthread_mutex) <= PAGE_SIZE,
55     "pthread_mutex is too large for off-page");
56
57 /*
58  * For adaptive mutexes, how many times to spin doing trylock2
59  * before entering the kernel to block
60  */
61 #define MUTEX_ADAPTIVE_SPINS    2000
62
63 /*
64  * Prototypes
65  */
66 int     __pthread_mutex_consistent(pthread_mutex_t *mutex);
67 int     __pthread_mutex_init(pthread_mutex_t *mutex,
68                 const pthread_mutexattr_t *mutex_attr);
69 int     __pthread_mutex_trylock(pthread_mutex_t *mutex);
70 int     __pthread_mutex_lock(pthread_mutex_t *mutex);
71 int     __pthread_mutex_timedlock(pthread_mutex_t *mutex,
72                 const struct timespec *abstime);
73 int     _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
74                 void *(calloc_cb)(size_t, size_t));
75 int     _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
76 int     _pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
77 int     __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
78 int     _pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
79 int     _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
80 int     __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
81
82 static int      mutex_self_trylock(pthread_mutex_t);
83 static int      mutex_self_lock(pthread_mutex_t,
84                                 const struct timespec *abstime);
85 static int      mutex_unlock_common(struct pthread_mutex *, bool, int *);
86 static int      mutex_lock_sleep(struct pthread *, pthread_mutex_t,
87                                 const struct timespec *);
88 static void     mutex_init_robust(struct pthread *curthread);
89 static int      mutex_qidx(struct pthread_mutex *m);
90 static bool     is_robust_mutex(struct pthread_mutex *m);
91 static bool     is_pshared_mutex(struct pthread_mutex *m);
92
93 __weak_reference(__pthread_mutex_init, pthread_mutex_init);
94 __strong_reference(__pthread_mutex_init, _pthread_mutex_init);
95 __weak_reference(__pthread_mutex_lock, pthread_mutex_lock);
96 __strong_reference(__pthread_mutex_lock, _pthread_mutex_lock);
97 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
98 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
99 __weak_reference(__pthread_mutex_trylock, pthread_mutex_trylock);
100 __strong_reference(__pthread_mutex_trylock, _pthread_mutex_trylock);
101 __weak_reference(_pthread_mutex_consistent, pthread_mutex_consistent);
102 __strong_reference(_pthread_mutex_consistent, __pthread_mutex_consistent);
103
104 /* Single underscore versions provided for libc internal usage: */
105 /* No difference between libc and application usage of these: */
106 __weak_reference(_pthread_mutex_destroy, pthread_mutex_destroy);
107 __weak_reference(_pthread_mutex_unlock, pthread_mutex_unlock);
108
109 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
110 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
111
112 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
113 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
114 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
115
116 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
117 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
118 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
119 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
120
121 static void
122 mutex_init_link(struct pthread_mutex *m)
123 {
124
125 #if defined(_PTHREADS_INVARIANTS)
126         m->m_qe.tqe_prev = NULL;
127         m->m_qe.tqe_next = NULL;
128         m->m_pqe.tqe_prev = NULL;
129         m->m_pqe.tqe_next = NULL;
130 #endif
131 }
132
133 static void
134 mutex_assert_is_owned(struct pthread_mutex *m __unused)
135 {
136
137 #if defined(_PTHREADS_INVARIANTS)
138         if (__predict_false(m->m_qe.tqe_prev == NULL))
139                 PANIC("mutex %p own %#x is not on list %p %p",
140                     m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
141 #endif
142 }
143
144 static void
145 mutex_assert_not_owned(struct pthread *curthread __unused,
146     struct pthread_mutex *m __unused)
147 {
148
149 #if defined(_PTHREADS_INVARIANTS)
150         if (__predict_false(m->m_qe.tqe_prev != NULL ||
151             m->m_qe.tqe_next != NULL))
152                 PANIC("mutex %p own %#x is on list %p %p",
153                     m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
154         if (__predict_false(is_robust_mutex(m) &&
155             (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
156             (is_pshared_mutex(m) && curthread->robust_list ==
157             (uintptr_t)&m->m_lock) ||
158             (!is_pshared_mutex(m) && curthread->priv_robust_list ==
159             (uintptr_t)&m->m_lock))))
160                 PANIC(
161     "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
162                     m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
163                     m->m_rb_prev, (void *)curthread->robust_list,
164                     (void *)curthread->priv_robust_list);
165 #endif
166 }
167
168 static bool
169 is_pshared_mutex(struct pthread_mutex *m)
170 {
171
172         return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
173 }
174
175 static bool
176 is_robust_mutex(struct pthread_mutex *m)
177 {
178
179         return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
180 }
181
182 int
183 _mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
184 {
185
186 #if defined(_PTHREADS_INVARIANTS)
187         if (__predict_false(curthread->inact_mtx != 0))
188                 PANIC("inact_mtx enter");
189 #endif
190         if (!is_robust_mutex(m))
191                 return (0);
192
193         mutex_init_robust(curthread);
194         curthread->inact_mtx = (uintptr_t)&m->m_lock;
195         return (1);
196 }
197
198 void
199 _mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
200 {
201
202 #if defined(_PTHREADS_INVARIANTS)
203         if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
204                 PANIC("inact_mtx leave");
205 #endif
206         curthread->inact_mtx = 0;
207 }
208
209 static int
210 mutex_check_attr(const struct pthread_mutex_attr *attr)
211 {
212
213         if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
214             attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
215                 return (EINVAL);
216         if (attr->m_protocol < PTHREAD_PRIO_NONE ||
217             attr->m_protocol > PTHREAD_PRIO_PROTECT)
218                 return (EINVAL);
219         return (0);
220 }
221
222 static void
223 mutex_init_robust(struct pthread *curthread)
224 {
225         struct umtx_robust_lists_params rb;
226
227         if (curthread == NULL)
228                 curthread = _get_curthread();
229         if (curthread->robust_inited)
230                 return;
231         rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
232         rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
233         rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
234         _umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
235         curthread->robust_inited = 1;
236 }
237
238 static void
239 mutex_init_body(struct pthread_mutex *pmutex,
240     const struct pthread_mutex_attr *attr)
241 {
242
243         pmutex->m_flags = attr->m_type;
244         pmutex->m_count = 0;
245         pmutex->m_spinloops = 0;
246         pmutex->m_yieldloops = 0;
247         mutex_init_link(pmutex);
248         switch (attr->m_protocol) {
249         case PTHREAD_PRIO_NONE:
250                 pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
251                 pmutex->m_lock.m_flags = 0;
252                 break;
253         case PTHREAD_PRIO_INHERIT:
254                 pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
255                 pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
256                 break;
257         case PTHREAD_PRIO_PROTECT:
258                 pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
259                 pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
260                 pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
261                 break;
262         }
263         if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
264                 pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
265         if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
266                 mutex_init_robust(NULL);
267                 pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
268         }
269         if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
270                 pmutex->m_spinloops =
271                     _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
272                 pmutex->m_yieldloops = _thr_yieldloops;
273         }
274 }
275
276 static int
277 mutex_init(pthread_mutex_t *mutex,
278     const struct pthread_mutex_attr *mutex_attr,
279     void *(calloc_cb)(size_t, size_t))
280 {
281         const struct pthread_mutex_attr *attr;
282         struct pthread_mutex *pmutex;
283         int error;
284
285         if (mutex_attr == NULL) {
286                 attr = &_pthread_mutexattr_default;
287         } else {
288                 attr = mutex_attr;
289                 error = mutex_check_attr(attr);
290                 if (error != 0)
291                         return (error);
292         }
293         if ((pmutex = (pthread_mutex_t)
294                 calloc_cb(1, sizeof(struct pthread_mutex))) == NULL)
295                 return (ENOMEM);
296         mutex_init_body(pmutex, attr);
297         *mutex = pmutex;
298         return (0);
299 }
300
301 static int
302 init_static(struct pthread *thread, pthread_mutex_t *mutex)
303 {
304         int ret;
305
306         THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
307
308         if (*mutex == THR_MUTEX_INITIALIZER)
309                 ret = mutex_init(mutex, &_pthread_mutexattr_default, calloc);
310         else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
311                 ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
312                     calloc);
313         else
314                 ret = 0;
315         THR_LOCK_RELEASE(thread, &_mutex_static_lock);
316
317         return (ret);
318 }
319
320 static void
321 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
322 {
323         struct pthread_mutex *m2;
324
325         m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
326         if (m2 != NULL)
327                 m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
328         else
329                 m->m_lock.m_ceilings[1] = -1;
330 }
331
332 static void
333 shared_mutex_init(struct pthread_mutex *pmtx, const struct
334     pthread_mutex_attr *mutex_attr)
335 {
336         static const struct pthread_mutex_attr foobar_mutex_attr = {
337                 .m_type = PTHREAD_MUTEX_DEFAULT,
338                 .m_protocol = PTHREAD_PRIO_NONE,
339                 .m_ceiling = 0,
340                 .m_pshared = PTHREAD_PROCESS_SHARED,
341                 .m_robust = PTHREAD_MUTEX_STALLED,
342         };
343         bool done;
344
345         /*
346          * Hack to allow multiple pthread_mutex_init() calls on the
347          * same process-shared mutex.  We rely on kernel allocating
348          * zeroed offpage for the mutex, i.e. the
349          * PMUTEX_INITSTAGE_ALLOC value must be zero.
350          */
351         for (done = false; !done;) {
352                 switch (pmtx->m_ps) {
353                 case PMUTEX_INITSTAGE_DONE:
354                         atomic_thread_fence_acq();
355                         done = true;
356                         break;
357                 case PMUTEX_INITSTAGE_ALLOC:
358                         if (atomic_cmpset_int(&pmtx->m_ps,
359                             PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
360                                 if (mutex_attr == NULL)
361                                         mutex_attr = &foobar_mutex_attr;
362                                 mutex_init_body(pmtx, mutex_attr);
363                                 atomic_store_rel_int(&pmtx->m_ps,
364                                     PMUTEX_INITSTAGE_DONE);
365                                 done = true;
366                         }
367                         break;
368                 case PMUTEX_INITSTAGE_BUSY:
369                         _pthread_yield();
370                         break;
371                 default:
372                         PANIC("corrupted offpage");
373                         break;
374                 }
375         }
376 }
377
378 int
379 __pthread_mutex_init(pthread_mutex_t *mutex,
380     const pthread_mutexattr_t *mutex_attr)
381 {
382         struct pthread_mutex *pmtx;
383         int ret;
384
385         if (mutex_attr != NULL) {
386                 ret = mutex_check_attr(*mutex_attr);
387                 if (ret != 0)
388                         return (ret);
389         }
390         if (mutex_attr == NULL ||
391             (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
392                 return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
393                     calloc));
394         }
395         pmtx = __thr_pshared_offpage(mutex, 1);
396         if (pmtx == NULL)
397                 return (EFAULT);
398         *mutex = THR_PSHARED_PTR;
399         shared_mutex_init(pmtx, *mutex_attr);
400         return (0);
401 }
402
403 /* This function is used internally by malloc. */
404 int
405 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
406     void *(calloc_cb)(size_t, size_t))
407 {
408         static const struct pthread_mutex_attr attr = {
409                 .m_type = PTHREAD_MUTEX_NORMAL,
410                 .m_protocol = PTHREAD_PRIO_NONE,
411                 .m_ceiling = 0,
412                 .m_pshared = PTHREAD_PROCESS_PRIVATE,
413                 .m_robust = PTHREAD_MUTEX_STALLED,
414         };
415         int ret;
416
417         ret = mutex_init(mutex, &attr, calloc_cb);
418         if (ret == 0)
419                 (*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
420         return (ret);
421 }
422
423 /*
424  * Fix mutex ownership for child process.
425  *
426  * Process private mutex ownership is transmitted from the forking
427  * thread to the child process.
428  *
429  * Process shared mutex should not be inherited because owner is
430  * forking thread which is in parent process, they are removed from
431  * the owned mutex list.
432  */
433 static void
434 queue_fork(struct pthread *curthread, struct mutex_queue *q,
435     struct mutex_queue *qp, uint bit)
436 {
437         struct pthread_mutex *m;
438
439         TAILQ_INIT(q);
440         TAILQ_FOREACH(m, qp, m_pqe) {
441                 TAILQ_INSERT_TAIL(q, m, m_qe);
442                 m->m_lock.m_owner = TID(curthread) | bit;
443         }
444 }
445
446 void
447 _mutex_fork(struct pthread *curthread)
448 {
449
450         queue_fork(curthread, &curthread->mq[TMQ_NORM],
451             &curthread->mq[TMQ_NORM_PRIV], 0);
452         queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
453             &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
454         queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
455             &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
456         curthread->robust_list = 0;
457 }
458
459 int
460 _pthread_mutex_destroy(pthread_mutex_t *mutex)
461 {
462         pthread_mutex_t m, m1;
463         int ret;
464
465         m = *mutex;
466         if (m < THR_MUTEX_DESTROYED) {
467                 ret = 0;
468         } else if (m == THR_MUTEX_DESTROYED) {
469                 ret = EINVAL;
470         } else {
471                 if (m == THR_PSHARED_PTR) {
472                         m1 = __thr_pshared_offpage(mutex, 0);
473                         if (m1 != NULL) {
474                                 mutex_assert_not_owned(_get_curthread(), m1);
475                                 __thr_pshared_destroy(mutex);
476                         }
477                         *mutex = THR_MUTEX_DESTROYED;
478                         return (0);
479                 }
480                 if (PMUTEX_OWNER_ID(m) != 0 &&
481                     (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
482                         ret = EBUSY;
483                 } else {
484                         *mutex = THR_MUTEX_DESTROYED;
485                         mutex_assert_not_owned(_get_curthread(), m);
486                         free(m);
487                         ret = 0;
488                 }
489         }
490
491         return (ret);
492 }
493
494 static int
495 mutex_qidx(struct pthread_mutex *m)
496 {
497
498         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
499                 return (TMQ_NORM);
500         return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
501 }
502
503 /*
504  * Both enqueue_mutex() and dequeue_mutex() operate on the
505  * thread-private linkage of the locked mutexes and on the robust
506  * linkage.
507  *
508  * Robust list, as seen by kernel, must be consistent even in the case
509  * of thread termination at arbitrary moment.  Since either enqueue or
510  * dequeue for list walked by kernel consists of rewriting a single
511  * forward pointer, it is safe.  On the other hand, rewrite of the
512  * back pointer is not atomic WRT the forward one, but kernel does not
513  * care.
514  */
515 static void
516 enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
517     int error)
518 {
519         struct pthread_mutex *m1;
520         uintptr_t *rl;
521         int qidx;
522
523         /* Add to the list of owned mutexes: */
524         if (error != EOWNERDEAD)
525                 mutex_assert_not_owned(curthread, m);
526         qidx = mutex_qidx(m);
527         TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
528         if (!is_pshared_mutex(m))
529                 TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
530         if (is_robust_mutex(m)) {
531                 rl = is_pshared_mutex(m) ? &curthread->robust_list :
532                     &curthread->priv_robust_list;
533                 m->m_rb_prev = NULL;
534                 if (*rl != 0) {
535                         m1 = __containerof((void *)*rl,
536                             struct pthread_mutex, m_lock);
537                         m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
538                         m1->m_rb_prev = m;
539                 } else {
540                         m1 = NULL;
541                         m->m_lock.m_rb_lnk = 0;
542                 }
543                 *rl = (uintptr_t)&m->m_lock;
544         }
545 }
546
547 static void
548 dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
549 {
550         struct pthread_mutex *mp, *mn;
551         int qidx;
552
553         mutex_assert_is_owned(m);
554         qidx = mutex_qidx(m);
555         if (is_robust_mutex(m)) {
556                 mp = m->m_rb_prev;
557                 if (mp == NULL) {
558                         if (is_pshared_mutex(m)) {
559                                 curthread->robust_list = m->m_lock.m_rb_lnk;
560                         } else {
561                                 curthread->priv_robust_list =
562                                     m->m_lock.m_rb_lnk;
563                         }
564                 } else {
565                         mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
566                 }
567                 if (m->m_lock.m_rb_lnk != 0) {
568                         mn = __containerof((void *)m->m_lock.m_rb_lnk,
569                             struct pthread_mutex, m_lock);
570                         mn->m_rb_prev = m->m_rb_prev;
571                 }
572                 m->m_lock.m_rb_lnk = 0;
573                 m->m_rb_prev = NULL;
574         }
575         TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
576         if (!is_pshared_mutex(m))
577                 TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
578         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
579                 set_inherited_priority(curthread, m);
580         mutex_init_link(m);
581 }
582
583 static int
584 check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
585 {
586         int ret;
587
588         *m = *mutex;
589         ret = 0;
590         if (*m == THR_PSHARED_PTR) {
591                 *m = __thr_pshared_offpage(mutex, 0);
592                 if (*m == NULL)
593                         ret = EINVAL;
594                 else
595                         shared_mutex_init(*m, NULL);
596         } else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
597                 if (*m == THR_MUTEX_DESTROYED) {
598                         ret = EINVAL;
599                 } else {
600                         ret = init_static(_get_curthread(), mutex);
601                         if (ret == 0)
602                                 *m = *mutex;
603                 }
604         }
605         return (ret);
606 }
607
608 int
609 __pthread_mutex_trylock(pthread_mutex_t *mutex)
610 {
611         struct pthread *curthread;
612         struct pthread_mutex *m;
613         uint32_t id;
614         int ret, robust;
615
616         ret = check_and_init_mutex(mutex, &m);
617         if (ret != 0)
618                 return (ret);
619         curthread = _get_curthread();
620         id = TID(curthread);
621         if (m->m_flags & PMUTEX_FLAG_PRIVATE)
622                 THR_CRITICAL_ENTER(curthread);
623         robust = _mutex_enter_robust(curthread, m);
624         ret = _thr_umutex_trylock(&m->m_lock, id);
625         if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
626                 enqueue_mutex(curthread, m, ret);
627                 if (ret == EOWNERDEAD)
628                         m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
629         } else if (PMUTEX_OWNER_ID(m) == id) {
630                 ret = mutex_self_trylock(m);
631         } /* else {} */
632         if (robust)
633                 _mutex_leave_robust(curthread, m);
634         if ((ret == 0 || ret == EOWNERDEAD) &&
635             (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
636                 THR_CRITICAL_LEAVE(curthread);
637         return (ret);
638 }
639
640 static int
641 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
642     const struct timespec *abstime)
643 {
644         uint32_t id, owner;
645         int count, ret;
646
647         id = TID(curthread);
648         if (PMUTEX_OWNER_ID(m) == id)
649                 return (mutex_self_lock(m, abstime));
650
651         /*
652          * For adaptive mutexes, spin for a bit in the expectation
653          * that if the application requests this mutex type then
654          * the lock is likely to be released quickly and it is
655          * faster than entering the kernel
656          */
657         if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
658             UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
659                 goto sleep_in_kernel;
660
661         if (!_thr_is_smp)
662                 goto yield_loop;
663
664         count = m->m_spinloops;
665         while (count--) {
666                 owner = m->m_lock.m_owner;
667                 if ((owner & ~UMUTEX_CONTESTED) == 0) {
668                         if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
669                             id | owner)) {
670                                 ret = 0;
671                                 goto done;
672                         }
673                 }
674                 CPU_SPINWAIT;
675         }
676
677 yield_loop:
678         count = m->m_yieldloops;
679         while (count--) {
680                 _sched_yield();
681                 owner = m->m_lock.m_owner;
682                 if ((owner & ~UMUTEX_CONTESTED) == 0) {
683                         if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
684                             id | owner)) {
685                                 ret = 0;
686                                 goto done;
687                         }
688                 }
689         }
690
691 sleep_in_kernel:
692         if (abstime == NULL)
693                 ret = __thr_umutex_lock(&m->m_lock, id);
694         else if (__predict_false(abstime->tv_nsec < 0 ||
695             abstime->tv_nsec >= 1000000000))
696                 ret = EINVAL;
697         else
698                 ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
699 done:
700         if (ret == 0 || ret == EOWNERDEAD) {
701                 enqueue_mutex(curthread, m, ret);
702                 if (ret == EOWNERDEAD)
703                         m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
704         }
705         return (ret);
706 }
707
708 static inline int
709 mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
710     bool cvattach, bool rb_onlist)
711 {
712         struct pthread *curthread;
713         int ret, robust;
714
715         curthread  = _get_curthread();
716         if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
717                 THR_CRITICAL_ENTER(curthread);
718         if (!rb_onlist)
719                 robust = _mutex_enter_robust(curthread, m);
720         ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
721         if (ret == 0 || ret == EOWNERDEAD) {
722                 enqueue_mutex(curthread, m, ret);
723                 if (ret == EOWNERDEAD)
724                         m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
725         } else {
726                 ret = mutex_lock_sleep(curthread, m, abstime);
727         }
728         if (!rb_onlist && robust)
729                 _mutex_leave_robust(curthread, m);
730         if (ret != 0 && ret != EOWNERDEAD &&
731             (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
732                 THR_CRITICAL_LEAVE(curthread);
733         return (ret);
734 }
735
736 int
737 __pthread_mutex_lock(pthread_mutex_t *mutex)
738 {
739         struct pthread_mutex *m;
740         int ret;
741
742         _thr_check_init();
743         ret = check_and_init_mutex(mutex, &m);
744         if (ret == 0)
745                 ret = mutex_lock_common(m, NULL, false, false);
746         return (ret);
747 }
748
749 int
750 __pthread_mutex_timedlock(pthread_mutex_t *mutex,
751     const struct timespec *abstime)
752 {
753         struct pthread_mutex *m;
754         int ret;
755
756         _thr_check_init();
757         ret = check_and_init_mutex(mutex, &m);
758         if (ret == 0)
759                 ret = mutex_lock_common(m, abstime, false, false);
760         return (ret);
761 }
762
763 int
764 _pthread_mutex_unlock(pthread_mutex_t *mutex)
765 {
766         struct pthread_mutex *mp;
767
768         if (*mutex == THR_PSHARED_PTR) {
769                 mp = __thr_pshared_offpage(mutex, 0);
770                 if (mp == NULL)
771                         return (EINVAL);
772                 shared_mutex_init(mp, NULL);
773         } else {
774                 mp = *mutex;
775         }
776         return (mutex_unlock_common(mp, false, NULL));
777 }
778
779 int
780 _mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
781 {
782         int error;
783
784         error = mutex_lock_common(m, NULL, true, rb_onlist);
785         if (error == 0 || error == EOWNERDEAD)
786                 m->m_count = count;
787         return (error);
788 }
789
790 int
791 _mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
792 {
793
794         /*
795          * Clear the count in case this is a recursive mutex.
796          */
797         *count = m->m_count;
798         m->m_count = 0;
799         (void)mutex_unlock_common(m, true, defer);
800         return (0);
801 }
802
803 int
804 _mutex_cv_attach(struct pthread_mutex *m, int count)
805 {
806         struct pthread *curthread;
807
808         curthread = _get_curthread();
809         enqueue_mutex(curthread, m, 0);
810         m->m_count = count;
811         return (0);
812 }
813
814 int
815 _mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
816 {
817         struct pthread *curthread;
818         int deferred, error;
819
820         curthread = _get_curthread();
821         if ((error = _mutex_owned(curthread, mp)) != 0)
822                 return (error);
823
824         /*
825          * Clear the count in case this is a recursive mutex.
826          */
827         *recurse = mp->m_count;
828         mp->m_count = 0;
829         dequeue_mutex(curthread, mp);
830
831         /* Will this happen in real-world ? */
832         if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
833                 deferred = 1;
834                 mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
835         } else
836                 deferred = 0;
837
838         if (deferred)  {
839                 _thr_wake_all(curthread->defer_waiters,
840                     curthread->nwaiter_defer);
841                 curthread->nwaiter_defer = 0;
842         }
843         return (0);
844 }
845
846 static int
847 mutex_self_trylock(struct pthread_mutex *m)
848 {
849         int ret;
850
851         switch (PMUTEX_TYPE(m->m_flags)) {
852         case PTHREAD_MUTEX_ERRORCHECK:
853         case PTHREAD_MUTEX_NORMAL:
854         case PTHREAD_MUTEX_ADAPTIVE_NP:
855                 ret = EBUSY; 
856                 break;
857
858         case PTHREAD_MUTEX_RECURSIVE:
859                 /* Increment the lock count: */
860                 if (m->m_count + 1 > 0) {
861                         m->m_count++;
862                         ret = 0;
863                 } else
864                         ret = EAGAIN;
865                 break;
866
867         default:
868                 /* Trap invalid mutex types; */
869                 ret = EINVAL;
870         }
871
872         return (ret);
873 }
874
875 static int
876 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
877 {
878         struct timespec ts1, ts2;
879         int ret;
880
881         switch (PMUTEX_TYPE(m->m_flags)) {
882         case PTHREAD_MUTEX_ERRORCHECK:
883         case PTHREAD_MUTEX_ADAPTIVE_NP:
884                 if (abstime) {
885                         if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
886                             abstime->tv_nsec >= 1000000000) {
887                                 ret = EINVAL;
888                         } else {
889                                 clock_gettime(CLOCK_REALTIME, &ts1);
890                                 TIMESPEC_SUB(&ts2, abstime, &ts1);
891                                 __sys_nanosleep(&ts2, NULL);
892                                 ret = ETIMEDOUT;
893                         }
894                 } else {
895                         /*
896                          * POSIX specifies that mutexes should return
897                          * EDEADLK if a recursive lock is detected.
898                          */
899                         ret = EDEADLK; 
900                 }
901                 break;
902
903         case PTHREAD_MUTEX_NORMAL:
904                 /*
905                  * What SS2 define as a 'normal' mutex.  Intentionally
906                  * deadlock on attempts to get a lock you already own.
907                  */
908                 ret = 0;
909                 if (abstime) {
910                         if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
911                             abstime->tv_nsec >= 1000000000) {
912                                 ret = EINVAL;
913                         } else {
914                                 clock_gettime(CLOCK_REALTIME, &ts1);
915                                 TIMESPEC_SUB(&ts2, abstime, &ts1);
916                                 __sys_nanosleep(&ts2, NULL);
917                                 ret = ETIMEDOUT;
918                         }
919                 } else {
920                         ts1.tv_sec = 30;
921                         ts1.tv_nsec = 0;
922                         for (;;)
923                                 __sys_nanosleep(&ts1, NULL);
924                 }
925                 break;
926
927         case PTHREAD_MUTEX_RECURSIVE:
928                 /* Increment the lock count: */
929                 if (m->m_count + 1 > 0) {
930                         m->m_count++;
931                         ret = 0;
932                 } else
933                         ret = EAGAIN;
934                 break;
935
936         default:
937                 /* Trap invalid mutex types; */
938                 ret = EINVAL;
939         }
940
941         return (ret);
942 }
943
944 static int
945 mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
946 {
947         struct pthread *curthread;
948         uint32_t id;
949         int deferred, error, robust;
950
951         if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
952                 if (m == THR_MUTEX_DESTROYED)
953                         return (EINVAL);
954                 return (EPERM);
955         }
956
957         curthread = _get_curthread();
958         id = TID(curthread);
959
960         /*
961          * Check if the running thread is not the owner of the mutex.
962          */
963         if (__predict_false(PMUTEX_OWNER_ID(m) != id))
964                 return (EPERM);
965
966         error = 0;
967         if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
968             PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
969                 m->m_count--;
970         } else {
971                 if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
972                         deferred = 1;
973                         m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
974                 } else
975                         deferred = 0;
976
977                 robust = _mutex_enter_robust(curthread, m);
978                 dequeue_mutex(curthread, m);
979                 error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
980                 if (deferred)  {
981                         if (mtx_defer == NULL) {
982                                 _thr_wake_all(curthread->defer_waiters,
983                                     curthread->nwaiter_defer);
984                                 curthread->nwaiter_defer = 0;
985                         } else
986                                 *mtx_defer = 1;
987                 }
988                 if (robust)
989                         _mutex_leave_robust(curthread, m);
990         }
991         if (!cv && m->m_flags & PMUTEX_FLAG_PRIVATE)
992                 THR_CRITICAL_LEAVE(curthread);
993         return (error);
994 }
995
996 int
997 _pthread_mutex_getprioceiling(pthread_mutex_t *mutex,
998     int *prioceiling)
999 {
1000         struct pthread_mutex *m;
1001
1002         if (*mutex == THR_PSHARED_PTR) {
1003                 m = __thr_pshared_offpage(mutex, 0);
1004                 if (m == NULL)
1005                         return (EINVAL);
1006                 shared_mutex_init(m, NULL);
1007         } else {
1008                 m = *mutex;
1009                 if (m <= THR_MUTEX_DESTROYED)
1010                         return (EINVAL);
1011         }
1012         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1013                 return (EINVAL);
1014         *prioceiling = m->m_lock.m_ceilings[0];
1015         return (0);
1016 }
1017
1018 int
1019 _pthread_mutex_setprioceiling(pthread_mutex_t *mutex,
1020     int ceiling, int *old_ceiling)
1021 {
1022         struct pthread *curthread;
1023         struct pthread_mutex *m, *m1, *m2;
1024         struct mutex_queue *q, *qp;
1025         int qidx, ret;
1026
1027         if (*mutex == THR_PSHARED_PTR) {
1028                 m = __thr_pshared_offpage(mutex, 0);
1029                 if (m == NULL)
1030                         return (EINVAL);
1031                 shared_mutex_init(m, NULL);
1032         } else {
1033                 m = *mutex;
1034                 if (m <= THR_MUTEX_DESTROYED)
1035                         return (EINVAL);
1036         }
1037         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1038                 return (EINVAL);
1039
1040         ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1041         if (ret != 0)
1042                 return (ret);
1043
1044         curthread = _get_curthread();
1045         if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1046                 mutex_assert_is_owned(m);
1047                 m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1048                 m2 = TAILQ_NEXT(m, m_qe);
1049                 if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1050                     (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1051                         qidx = mutex_qidx(m);
1052                         q = &curthread->mq[qidx];
1053                         qp = &curthread->mq[qidx + 1];
1054                         TAILQ_REMOVE(q, m, m_qe);
1055                         if (!is_pshared_mutex(m))
1056                                 TAILQ_REMOVE(qp, m, m_pqe);
1057                         TAILQ_FOREACH(m2, q, m_qe) {
1058                                 if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1059                                         TAILQ_INSERT_BEFORE(m2, m, m_qe);
1060                                         if (!is_pshared_mutex(m)) {
1061                                                 while (m2 != NULL &&
1062                                                     is_pshared_mutex(m2)) {
1063                                                         m2 = TAILQ_PREV(m2,
1064                                                             mutex_queue, m_qe);
1065                                                 }
1066                                                 if (m2 == NULL) {
1067                                                         TAILQ_INSERT_HEAD(qp,
1068                                                             m, m_pqe);
1069                                                 } else {
1070                                                         TAILQ_INSERT_BEFORE(m2,
1071                                                             m, m_pqe);
1072                                                 }
1073                                         }
1074                                         return (0);
1075                                 }
1076                         }
1077                         TAILQ_INSERT_TAIL(q, m, m_qe);
1078                         if (!is_pshared_mutex(m))
1079                                 TAILQ_INSERT_TAIL(qp, m, m_pqe);
1080                 }
1081         }
1082         return (0);
1083 }
1084
1085 int
1086 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1087 {
1088         struct pthread_mutex *m;
1089         int ret;
1090
1091         ret = check_and_init_mutex(mutex, &m);
1092         if (ret == 0)
1093                 *count = m->m_spinloops;
1094         return (ret);
1095 }
1096
1097 int
1098 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1099 {
1100         struct pthread_mutex *m;
1101         int ret;
1102
1103         ret = check_and_init_mutex(mutex, &m);
1104         if (ret == 0)
1105                 m->m_spinloops = count;
1106         return (ret);
1107 }
1108
1109 int
1110 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1111 {
1112         struct pthread_mutex *m;
1113         int ret;
1114
1115         ret = check_and_init_mutex(mutex, &m);
1116         if (ret == 0)
1117                 *count = m->m_yieldloops;
1118         return (ret);
1119 }
1120
1121 int
1122 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1123 {
1124         struct pthread_mutex *m;
1125         int ret;
1126
1127         ret = check_and_init_mutex(mutex, &m);
1128         if (ret == 0)
1129                 m->m_yieldloops = count;
1130         return (0);
1131 }
1132
1133 int
1134 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1135 {
1136         struct pthread_mutex *m;
1137
1138         if (*mutex == THR_PSHARED_PTR) {
1139                 m = __thr_pshared_offpage(mutex, 0);
1140                 if (m == NULL)
1141                         return (0);
1142                 shared_mutex_init(m, NULL);
1143         } else {
1144                 m = *mutex;
1145                 if (m <= THR_MUTEX_DESTROYED)
1146                         return (0);
1147         }
1148         return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1149 }
1150
1151 int
1152 _mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1153 {
1154
1155         if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1156                 if (mp == THR_MUTEX_DESTROYED)
1157                         return (EINVAL);
1158                 return (EPERM);
1159         }
1160         if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1161                 return (EPERM);
1162         return (0);                  
1163 }
1164
1165 int
1166 _pthread_mutex_consistent(pthread_mutex_t *mutex)
1167 {
1168         struct pthread_mutex *m;
1169         struct pthread *curthread;
1170
1171         if (*mutex == THR_PSHARED_PTR) {
1172                 m = __thr_pshared_offpage(mutex, 0);
1173                 if (m == NULL)
1174                         return (EINVAL);
1175                 shared_mutex_init(m, NULL);
1176         } else {
1177                 m = *mutex;
1178                 if (m <= THR_MUTEX_DESTROYED)
1179                         return (EINVAL);
1180         }
1181         curthread = _get_curthread();
1182         if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1183             (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1184                 return (EINVAL);
1185         if (PMUTEX_OWNER_ID(m) != TID(curthread))
1186                 return (EPERM);
1187         m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1188         return (0);
1189 }