]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libthr/thread/thr_mutex.c
MFC r346158:
[FreeBSD/FreeBSD.git] / lib / libthr / thread / thr_mutex.c
1 /*
2  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
3  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
4  * Copyright (c) 2015, 2016 The FreeBSD Foundation
5  *
6  * All rights reserved.
7  *
8  * Portions of this software were developed by Konstantin Belousov
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *      This product includes software developed by John Birrell.
22  * 4. Neither the name of the author nor the names of any co-contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41
42 #include "namespace.h"
43 #include <stdlib.h>
44 #include <errno.h>
45 #include <string.h>
46 #include <sys/param.h>
47 #include <sys/queue.h>
48 #include <pthread.h>
49 #include <pthread_np.h>
50 #include "un-namespace.h"
51
52 #include "thr_private.h"
53
54 _Static_assert(sizeof(struct pthread_mutex) <= PAGE_SIZE,
55     "pthread_mutex is too large for off-page");
56
57 /*
58  * For adaptive mutexes, how many times to spin doing trylock2
59  * before entering the kernel to block
60  */
61 #define MUTEX_ADAPTIVE_SPINS    2000
62
63 /*
64  * Prototypes
65  */
66 int     __pthread_mutex_consistent(pthread_mutex_t *mutex);
67 int     __pthread_mutex_init(pthread_mutex_t * __restrict mutex,
68                 const pthread_mutexattr_t * __restrict mutex_attr);
69 int     __pthread_mutex_trylock(pthread_mutex_t *mutex);
70 int     __pthread_mutex_lock(pthread_mutex_t *mutex);
71 int     __pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
72                 const struct timespec * __restrict abstime);
73 int     _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
74 int     _pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
75 int     __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
76 int     _pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
77 int     _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
78 int     __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
79
80 static int      mutex_self_trylock(pthread_mutex_t);
81 static int      mutex_self_lock(pthread_mutex_t,
82                                 const struct timespec *abstime);
83 static int      mutex_unlock_common(struct pthread_mutex *, bool, int *);
84 static int      mutex_lock_sleep(struct pthread *, pthread_mutex_t,
85                                 const struct timespec *);
86 static void     mutex_init_robust(struct pthread *curthread);
87 static int      mutex_qidx(struct pthread_mutex *m);
88 static bool     is_robust_mutex(struct pthread_mutex *m);
89 static bool     is_pshared_mutex(struct pthread_mutex *m);
90
91 __weak_reference(__pthread_mutex_init, pthread_mutex_init);
92 __strong_reference(__pthread_mutex_init, _pthread_mutex_init);
93 __weak_reference(__pthread_mutex_lock, pthread_mutex_lock);
94 __strong_reference(__pthread_mutex_lock, _pthread_mutex_lock);
95 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
96 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
97 __weak_reference(__pthread_mutex_trylock, pthread_mutex_trylock);
98 __strong_reference(__pthread_mutex_trylock, _pthread_mutex_trylock);
99 __weak_reference(_pthread_mutex_consistent, pthread_mutex_consistent);
100 __strong_reference(_pthread_mutex_consistent, __pthread_mutex_consistent);
101
102 /* Single underscore versions provided for libc internal usage: */
103 /* No difference between libc and application usage of these: */
104 __weak_reference(_pthread_mutex_destroy, pthread_mutex_destroy);
105 __weak_reference(_pthread_mutex_unlock, pthread_mutex_unlock);
106
107 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
108 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
109
110 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
111 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
112 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
113
114 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
115 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
116 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
117 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
118
119 static void
120 mutex_init_link(struct pthread_mutex *m)
121 {
122
123 #if defined(_PTHREADS_INVARIANTS)
124         m->m_qe.tqe_prev = NULL;
125         m->m_qe.tqe_next = NULL;
126         m->m_pqe.tqe_prev = NULL;
127         m->m_pqe.tqe_next = NULL;
128 #endif
129 }
130
131 static void
132 mutex_assert_is_owned(struct pthread_mutex *m __unused)
133 {
134
135 #if defined(_PTHREADS_INVARIANTS)
136         if (__predict_false(m->m_qe.tqe_prev == NULL))
137                 PANIC("mutex %p own %#x is not on list %p %p",
138                     m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
139 #endif
140 }
141
142 static void
143 mutex_assert_not_owned(struct pthread *curthread __unused,
144     struct pthread_mutex *m __unused)
145 {
146
147 #if defined(_PTHREADS_INVARIANTS)
148         if (__predict_false(m->m_qe.tqe_prev != NULL ||
149             m->m_qe.tqe_next != NULL))
150                 PANIC("mutex %p own %#x is on list %p %p",
151                     m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
152         if (__predict_false(is_robust_mutex(m) &&
153             (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
154             (is_pshared_mutex(m) && curthread->robust_list ==
155             (uintptr_t)&m->m_lock) ||
156             (!is_pshared_mutex(m) && curthread->priv_robust_list ==
157             (uintptr_t)&m->m_lock))))
158                 PANIC(
159     "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
160                     m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
161                     m->m_rb_prev, (void *)curthread->robust_list,
162                     (void *)curthread->priv_robust_list);
163 #endif
164 }
165
166 static bool
167 is_pshared_mutex(struct pthread_mutex *m)
168 {
169
170         return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
171 }
172
173 static bool
174 is_robust_mutex(struct pthread_mutex *m)
175 {
176
177         return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
178 }
179
180 int
181 _mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
182 {
183
184 #if defined(_PTHREADS_INVARIANTS)
185         if (__predict_false(curthread->inact_mtx != 0))
186                 PANIC("inact_mtx enter");
187 #endif
188         if (!is_robust_mutex(m))
189                 return (0);
190
191         mutex_init_robust(curthread);
192         curthread->inact_mtx = (uintptr_t)&m->m_lock;
193         return (1);
194 }
195
196 void
197 _mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
198 {
199
200 #if defined(_PTHREADS_INVARIANTS)
201         if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
202                 PANIC("inact_mtx leave");
203 #endif
204         curthread->inact_mtx = 0;
205 }
206
207 static int
208 mutex_check_attr(const struct pthread_mutex_attr *attr)
209 {
210
211         if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
212             attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
213                 return (EINVAL);
214         if (attr->m_protocol < PTHREAD_PRIO_NONE ||
215             attr->m_protocol > PTHREAD_PRIO_PROTECT)
216                 return (EINVAL);
217         return (0);
218 }
219
220 static void
221 mutex_init_robust(struct pthread *curthread)
222 {
223         struct umtx_robust_lists_params rb;
224
225         if (curthread == NULL)
226                 curthread = _get_curthread();
227         if (curthread->robust_inited)
228                 return;
229         rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
230         rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
231         rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
232         _umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
233         curthread->robust_inited = 1;
234 }
235
236 static void
237 mutex_init_body(struct pthread_mutex *pmutex,
238     const struct pthread_mutex_attr *attr)
239 {
240
241         pmutex->m_flags = attr->m_type;
242         pmutex->m_count = 0;
243         pmutex->m_spinloops = 0;
244         pmutex->m_yieldloops = 0;
245         mutex_init_link(pmutex);
246         switch (attr->m_protocol) {
247         case PTHREAD_PRIO_NONE:
248                 pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
249                 pmutex->m_lock.m_flags = 0;
250                 break;
251         case PTHREAD_PRIO_INHERIT:
252                 pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
253                 pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
254                 break;
255         case PTHREAD_PRIO_PROTECT:
256                 pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
257                 pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
258                 pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
259                 break;
260         }
261         if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
262                 pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
263         if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
264                 mutex_init_robust(NULL);
265                 pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
266         }
267         if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
268                 pmutex->m_spinloops =
269                     _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
270                 pmutex->m_yieldloops = _thr_yieldloops;
271         }
272 }
273
274 static int
275 mutex_init(pthread_mutex_t *mutex,
276     const struct pthread_mutex_attr *mutex_attr,
277     void *(calloc_cb)(size_t, size_t))
278 {
279         const struct pthread_mutex_attr *attr;
280         struct pthread_mutex *pmutex;
281         int error;
282
283         if (mutex_attr == NULL) {
284                 attr = &_pthread_mutexattr_default;
285         } else {
286                 attr = mutex_attr;
287                 error = mutex_check_attr(attr);
288                 if (error != 0)
289                         return (error);
290         }
291         if ((pmutex = (pthread_mutex_t)
292                 calloc_cb(1, sizeof(struct pthread_mutex))) == NULL)
293                 return (ENOMEM);
294         mutex_init_body(pmutex, attr);
295         *mutex = pmutex;
296         return (0);
297 }
298
299 static int
300 init_static(struct pthread *thread, pthread_mutex_t *mutex)
301 {
302         int ret;
303
304         THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
305
306         if (*mutex == THR_MUTEX_INITIALIZER)
307                 ret = mutex_init(mutex, &_pthread_mutexattr_default, calloc);
308         else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
309                 ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
310                     calloc);
311         else
312                 ret = 0;
313         THR_LOCK_RELEASE(thread, &_mutex_static_lock);
314
315         return (ret);
316 }
317
318 static void
319 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
320 {
321         struct pthread_mutex *m2;
322
323         m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
324         if (m2 != NULL)
325                 m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
326         else
327                 m->m_lock.m_ceilings[1] = -1;
328 }
329
330 static void
331 shared_mutex_init(struct pthread_mutex *pmtx, const struct
332     pthread_mutex_attr *mutex_attr)
333 {
334         static const struct pthread_mutex_attr foobar_mutex_attr = {
335                 .m_type = PTHREAD_MUTEX_DEFAULT,
336                 .m_protocol = PTHREAD_PRIO_NONE,
337                 .m_ceiling = 0,
338                 .m_pshared = PTHREAD_PROCESS_SHARED,
339                 .m_robust = PTHREAD_MUTEX_STALLED,
340         };
341         bool done;
342
343         /*
344          * Hack to allow multiple pthread_mutex_init() calls on the
345          * same process-shared mutex.  We rely on kernel allocating
346          * zeroed offpage for the mutex, i.e. the
347          * PMUTEX_INITSTAGE_ALLOC value must be zero.
348          */
349         for (done = false; !done;) {
350                 switch (pmtx->m_ps) {
351                 case PMUTEX_INITSTAGE_DONE:
352                         atomic_thread_fence_acq();
353                         done = true;
354                         break;
355                 case PMUTEX_INITSTAGE_ALLOC:
356                         if (atomic_cmpset_int(&pmtx->m_ps,
357                             PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
358                                 if (mutex_attr == NULL)
359                                         mutex_attr = &foobar_mutex_attr;
360                                 mutex_init_body(pmtx, mutex_attr);
361                                 atomic_store_rel_int(&pmtx->m_ps,
362                                     PMUTEX_INITSTAGE_DONE);
363                                 done = true;
364                         }
365                         break;
366                 case PMUTEX_INITSTAGE_BUSY:
367                         _pthread_yield();
368                         break;
369                 default:
370                         PANIC("corrupted offpage");
371                         break;
372                 }
373         }
374 }
375
376 int
377 __pthread_mutex_init(pthread_mutex_t * __restrict mutex,
378     const pthread_mutexattr_t * __restrict mutex_attr)
379 {
380         struct pthread_mutex *pmtx;
381         int ret;
382
383         if (mutex_attr != NULL) {
384                 ret = mutex_check_attr(*mutex_attr);
385                 if (ret != 0)
386                         return (ret);
387         }
388         if (mutex_attr == NULL ||
389             (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
390                 return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
391                     calloc));
392         }
393         pmtx = __thr_pshared_offpage(__DECONST(void *, mutex), 1);
394         if (pmtx == NULL)
395                 return (EFAULT);
396         *mutex = THR_PSHARED_PTR;
397         shared_mutex_init(pmtx, *mutex_attr);
398         return (0);
399 }
400
401 /* This function is used internally by malloc. */
402 int
403 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
404     void *(calloc_cb)(size_t, size_t))
405 {
406         static const struct pthread_mutex_attr attr = {
407                 .m_type = PTHREAD_MUTEX_NORMAL,
408                 .m_protocol = PTHREAD_PRIO_NONE,
409                 .m_ceiling = 0,
410                 .m_pshared = PTHREAD_PROCESS_PRIVATE,
411                 .m_robust = PTHREAD_MUTEX_STALLED,
412         };
413         int ret;
414
415         ret = mutex_init(mutex, &attr, calloc_cb);
416         if (ret == 0)
417                 (*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
418         return (ret);
419 }
420
421 /*
422  * Fix mutex ownership for child process.
423  *
424  * Process private mutex ownership is transmitted from the forking
425  * thread to the child process.
426  *
427  * Process shared mutex should not be inherited because owner is
428  * forking thread which is in parent process, they are removed from
429  * the owned mutex list.
430  */
431 static void
432 queue_fork(struct pthread *curthread, struct mutex_queue *q,
433     struct mutex_queue *qp, uint bit)
434 {
435         struct pthread_mutex *m;
436
437         TAILQ_INIT(q);
438         TAILQ_FOREACH(m, qp, m_pqe) {
439                 TAILQ_INSERT_TAIL(q, m, m_qe);
440                 m->m_lock.m_owner = TID(curthread) | bit;
441         }
442 }
443
444 void
445 _mutex_fork(struct pthread *curthread)
446 {
447
448         queue_fork(curthread, &curthread->mq[TMQ_NORM],
449             &curthread->mq[TMQ_NORM_PRIV], 0);
450         queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
451             &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
452         queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
453             &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
454         curthread->robust_list = 0;
455 }
456
457 int
458 _pthread_mutex_destroy(pthread_mutex_t *mutex)
459 {
460         pthread_mutex_t m, m1;
461         int ret;
462
463         m = *mutex;
464         if (m < THR_MUTEX_DESTROYED) {
465                 ret = 0;
466         } else if (m == THR_MUTEX_DESTROYED) {
467                 ret = EINVAL;
468         } else {
469                 if (m == THR_PSHARED_PTR) {
470                         m1 = __thr_pshared_offpage(mutex, 0);
471                         if (m1 != NULL) {
472                                 mutex_assert_not_owned(_get_curthread(), m1);
473                                 __thr_pshared_destroy(mutex);
474                         }
475                         *mutex = THR_MUTEX_DESTROYED;
476                         return (0);
477                 }
478                 if (PMUTEX_OWNER_ID(m) != 0 &&
479                     (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
480                         ret = EBUSY;
481                 } else {
482                         *mutex = THR_MUTEX_DESTROYED;
483                         mutex_assert_not_owned(_get_curthread(), m);
484                         free(m);
485                         ret = 0;
486                 }
487         }
488
489         return (ret);
490 }
491
492 static int
493 mutex_qidx(struct pthread_mutex *m)
494 {
495
496         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
497                 return (TMQ_NORM);
498         return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
499 }
500
501 /*
502  * Both enqueue_mutex() and dequeue_mutex() operate on the
503  * thread-private linkage of the locked mutexes and on the robust
504  * linkage.
505  *
506  * Robust list, as seen by kernel, must be consistent even in the case
507  * of thread termination at arbitrary moment.  Since either enqueue or
508  * dequeue for list walked by kernel consists of rewriting a single
509  * forward pointer, it is safe.  On the other hand, rewrite of the
510  * back pointer is not atomic WRT the forward one, but kernel does not
511  * care.
512  */
513 static void
514 enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
515     int error)
516 {
517         struct pthread_mutex *m1;
518         uintptr_t *rl;
519         int qidx;
520
521         /* Add to the list of owned mutexes: */
522         if (error != EOWNERDEAD)
523                 mutex_assert_not_owned(curthread, m);
524         qidx = mutex_qidx(m);
525         TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
526         if (!is_pshared_mutex(m))
527                 TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
528         if (is_robust_mutex(m)) {
529                 rl = is_pshared_mutex(m) ? &curthread->robust_list :
530                     &curthread->priv_robust_list;
531                 m->m_rb_prev = NULL;
532                 if (*rl != 0) {
533                         m1 = __containerof((void *)*rl,
534                             struct pthread_mutex, m_lock);
535                         m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
536                         m1->m_rb_prev = m;
537                 } else {
538                         m1 = NULL;
539                         m->m_lock.m_rb_lnk = 0;
540                 }
541                 *rl = (uintptr_t)&m->m_lock;
542         }
543 }
544
545 static void
546 dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
547 {
548         struct pthread_mutex *mp, *mn;
549         int qidx;
550
551         mutex_assert_is_owned(m);
552         qidx = mutex_qidx(m);
553         if (is_robust_mutex(m)) {
554                 mp = m->m_rb_prev;
555                 if (mp == NULL) {
556                         if (is_pshared_mutex(m)) {
557                                 curthread->robust_list = m->m_lock.m_rb_lnk;
558                         } else {
559                                 curthread->priv_robust_list =
560                                     m->m_lock.m_rb_lnk;
561                         }
562                 } else {
563                         mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
564                 }
565                 if (m->m_lock.m_rb_lnk != 0) {
566                         mn = __containerof((void *)m->m_lock.m_rb_lnk,
567                             struct pthread_mutex, m_lock);
568                         mn->m_rb_prev = m->m_rb_prev;
569                 }
570                 m->m_lock.m_rb_lnk = 0;
571                 m->m_rb_prev = NULL;
572         }
573         TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
574         if (!is_pshared_mutex(m))
575                 TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
576         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
577                 set_inherited_priority(curthread, m);
578         mutex_init_link(m);
579 }
580
581 static int
582 check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
583 {
584         int ret;
585
586         *m = *mutex;
587         ret = 0;
588         if (*m == THR_PSHARED_PTR) {
589                 *m = __thr_pshared_offpage(mutex, 0);
590                 if (*m == NULL)
591                         ret = EINVAL;
592                 else
593                         shared_mutex_init(*m, NULL);
594         } else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
595                 if (*m == THR_MUTEX_DESTROYED) {
596                         ret = EINVAL;
597                 } else {
598                         ret = init_static(_get_curthread(), mutex);
599                         if (ret == 0)
600                                 *m = *mutex;
601                 }
602         }
603         return (ret);
604 }
605
606 int
607 __pthread_mutex_trylock(pthread_mutex_t *mutex)
608 {
609         struct pthread *curthread;
610         struct pthread_mutex *m;
611         uint32_t id;
612         int ret, robust;
613
614         ret = check_and_init_mutex(mutex, &m);
615         if (ret != 0)
616                 return (ret);
617         curthread = _get_curthread();
618         id = TID(curthread);
619         if (m->m_flags & PMUTEX_FLAG_PRIVATE)
620                 THR_CRITICAL_ENTER(curthread);
621         robust = _mutex_enter_robust(curthread, m);
622         ret = _thr_umutex_trylock(&m->m_lock, id);
623         if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
624                 enqueue_mutex(curthread, m, ret);
625                 if (ret == EOWNERDEAD)
626                         m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
627         } else if (PMUTEX_OWNER_ID(m) == id) {
628                 ret = mutex_self_trylock(m);
629         } /* else {} */
630         if (robust)
631                 _mutex_leave_robust(curthread, m);
632         if (ret != 0 && ret != EOWNERDEAD &&
633             (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
634                 THR_CRITICAL_LEAVE(curthread);
635         return (ret);
636 }
637
638 static int
639 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
640     const struct timespec *abstime)
641 {
642         uint32_t id, owner;
643         int count, ret;
644
645         id = TID(curthread);
646         if (PMUTEX_OWNER_ID(m) == id)
647                 return (mutex_self_lock(m, abstime));
648
649         /*
650          * For adaptive mutexes, spin for a bit in the expectation
651          * that if the application requests this mutex type then
652          * the lock is likely to be released quickly and it is
653          * faster than entering the kernel
654          */
655         if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
656             UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
657                 goto sleep_in_kernel;
658
659         if (!_thr_is_smp)
660                 goto yield_loop;
661
662         count = m->m_spinloops;
663         while (count--) {
664                 owner = m->m_lock.m_owner;
665                 if ((owner & ~UMUTEX_CONTESTED) == 0) {
666                         if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
667                             id | owner)) {
668                                 ret = 0;
669                                 goto done;
670                         }
671                 }
672                 CPU_SPINWAIT;
673         }
674
675 yield_loop:
676         count = m->m_yieldloops;
677         while (count--) {
678                 _sched_yield();
679                 owner = m->m_lock.m_owner;
680                 if ((owner & ~UMUTEX_CONTESTED) == 0) {
681                         if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
682                             id | owner)) {
683                                 ret = 0;
684                                 goto done;
685                         }
686                 }
687         }
688
689 sleep_in_kernel:
690         if (abstime == NULL)
691                 ret = __thr_umutex_lock(&m->m_lock, id);
692         else if (__predict_false(abstime->tv_nsec < 0 ||
693             abstime->tv_nsec >= 1000000000))
694                 ret = EINVAL;
695         else
696                 ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
697 done:
698         if (ret == 0 || ret == EOWNERDEAD) {
699                 enqueue_mutex(curthread, m, ret);
700                 if (ret == EOWNERDEAD)
701                         m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
702         }
703         return (ret);
704 }
705
706 static inline int
707 mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
708     bool cvattach, bool rb_onlist)
709 {
710         struct pthread *curthread;
711         int ret, robust;
712
713         robust = 0;  /* pacify gcc */
714         curthread  = _get_curthread();
715         if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
716                 THR_CRITICAL_ENTER(curthread);
717         if (!rb_onlist)
718                 robust = _mutex_enter_robust(curthread, m);
719         ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
720         if (ret == 0 || ret == EOWNERDEAD) {
721                 enqueue_mutex(curthread, m, ret);
722                 if (ret == EOWNERDEAD)
723                         m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
724         } else {
725                 ret = mutex_lock_sleep(curthread, m, abstime);
726         }
727         if (!rb_onlist && robust)
728                 _mutex_leave_robust(curthread, m);
729         if (ret != 0 && ret != EOWNERDEAD &&
730             (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
731                 THR_CRITICAL_LEAVE(curthread);
732         return (ret);
733 }
734
735 int
736 __pthread_mutex_lock(pthread_mutex_t *mutex)
737 {
738         struct pthread_mutex *m;
739         int ret;
740
741         _thr_check_init();
742         ret = check_and_init_mutex(mutex, &m);
743         if (ret == 0)
744                 ret = mutex_lock_common(m, NULL, false, false);
745         return (ret);
746 }
747
748 int
749 __pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
750     const struct timespec * __restrict abstime)
751 {
752         struct pthread_mutex *m;
753         int ret;
754
755         _thr_check_init();
756         ret = check_and_init_mutex(mutex, &m);
757         if (ret == 0)
758                 ret = mutex_lock_common(m, abstime, false, false);
759         return (ret);
760 }
761
762 int
763 _pthread_mutex_unlock(pthread_mutex_t *mutex)
764 {
765         struct pthread_mutex *mp;
766
767         if (*mutex == THR_PSHARED_PTR) {
768                 mp = __thr_pshared_offpage(mutex, 0);
769                 if (mp == NULL)
770                         return (EINVAL);
771                 shared_mutex_init(mp, NULL);
772         } else {
773                 mp = *mutex;
774         }
775         return (mutex_unlock_common(mp, false, NULL));
776 }
777
778 int
779 _mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
780 {
781         int error;
782
783         error = mutex_lock_common(m, NULL, true, rb_onlist);
784         if (error == 0 || error == EOWNERDEAD)
785                 m->m_count = count;
786         return (error);
787 }
788
789 int
790 _mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
791 {
792
793         /*
794          * Clear the count in case this is a recursive mutex.
795          */
796         *count = m->m_count;
797         m->m_count = 0;
798         (void)mutex_unlock_common(m, true, defer);
799         return (0);
800 }
801
802 int
803 _mutex_cv_attach(struct pthread_mutex *m, int count)
804 {
805         struct pthread *curthread;
806
807         curthread = _get_curthread();
808         enqueue_mutex(curthread, m, 0);
809         m->m_count = count;
810         return (0);
811 }
812
813 int
814 _mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
815 {
816         struct pthread *curthread;
817         int deferred, error;
818
819         curthread = _get_curthread();
820         if ((error = _mutex_owned(curthread, mp)) != 0)
821                 return (error);
822
823         /*
824          * Clear the count in case this is a recursive mutex.
825          */
826         *recurse = mp->m_count;
827         mp->m_count = 0;
828         dequeue_mutex(curthread, mp);
829
830         /* Will this happen in real-world ? */
831         if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
832                 deferred = 1;
833                 mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
834         } else
835                 deferred = 0;
836
837         if (deferred)  {
838                 _thr_wake_all(curthread->defer_waiters,
839                     curthread->nwaiter_defer);
840                 curthread->nwaiter_defer = 0;
841         }
842         return (0);
843 }
844
845 static int
846 mutex_self_trylock(struct pthread_mutex *m)
847 {
848         int ret;
849
850         switch (PMUTEX_TYPE(m->m_flags)) {
851         case PTHREAD_MUTEX_ERRORCHECK:
852         case PTHREAD_MUTEX_NORMAL:
853         case PTHREAD_MUTEX_ADAPTIVE_NP:
854                 ret = EBUSY;
855                 break;
856
857         case PTHREAD_MUTEX_RECURSIVE:
858                 /* Increment the lock count: */
859                 if (m->m_count + 1 > 0) {
860                         m->m_count++;
861                         ret = 0;
862                 } else
863                         ret = EAGAIN;
864                 break;
865
866         default:
867                 /* Trap invalid mutex types; */
868                 ret = EINVAL;
869         }
870
871         return (ret);
872 }
873
874 static int
875 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
876 {
877         struct timespec ts1, ts2;
878         int ret;
879
880         switch (PMUTEX_TYPE(m->m_flags)) {
881         case PTHREAD_MUTEX_ERRORCHECK:
882         case PTHREAD_MUTEX_ADAPTIVE_NP:
883                 if (abstime) {
884                         if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
885                             abstime->tv_nsec >= 1000000000) {
886                                 ret = EINVAL;
887                         } else {
888                                 clock_gettime(CLOCK_REALTIME, &ts1);
889                                 TIMESPEC_SUB(&ts2, abstime, &ts1);
890                                 __sys_nanosleep(&ts2, NULL);
891                                 ret = ETIMEDOUT;
892                         }
893                 } else {
894                         /*
895                          * POSIX specifies that mutexes should return
896                          * EDEADLK if a recursive lock is detected.
897                          */
898                         ret = EDEADLK; 
899                 }
900                 break;
901
902         case PTHREAD_MUTEX_NORMAL:
903                 /*
904                  * What SS2 define as a 'normal' mutex.  Intentionally
905                  * deadlock on attempts to get a lock you already own.
906                  */
907                 ret = 0;
908                 if (abstime) {
909                         if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
910                             abstime->tv_nsec >= 1000000000) {
911                                 ret = EINVAL;
912                         } else {
913                                 clock_gettime(CLOCK_REALTIME, &ts1);
914                                 TIMESPEC_SUB(&ts2, abstime, &ts1);
915                                 __sys_nanosleep(&ts2, NULL);
916                                 ret = ETIMEDOUT;
917                         }
918                 } else {
919                         ts1.tv_sec = 30;
920                         ts1.tv_nsec = 0;
921                         for (;;)
922                                 __sys_nanosleep(&ts1, NULL);
923                 }
924                 break;
925
926         case PTHREAD_MUTEX_RECURSIVE:
927                 /* Increment the lock count: */
928                 if (m->m_count + 1 > 0) {
929                         m->m_count++;
930                         ret = 0;
931                 } else
932                         ret = EAGAIN;
933                 break;
934
935         default:
936                 /* Trap invalid mutex types; */
937                 ret = EINVAL;
938         }
939
940         return (ret);
941 }
942
943 static int
944 mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
945 {
946         struct pthread *curthread;
947         uint32_t id;
948         int deferred, error, private, robust;
949
950         if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
951                 if (m == THR_MUTEX_DESTROYED)
952                         return (EINVAL);
953                 return (EPERM);
954         }
955
956         curthread = _get_curthread();
957         id = TID(curthread);
958
959         /*
960          * Check if the running thread is not the owner of the mutex.
961          */
962         if (__predict_false(PMUTEX_OWNER_ID(m) != id))
963                 return (EPERM);
964
965         error = 0;
966         private = (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0;
967         if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
968             PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
969                 m->m_count--;
970         } else {
971                 if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
972                         deferred = 1;
973                         m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
974                 } else
975                         deferred = 0;
976
977                 robust = _mutex_enter_robust(curthread, m);
978                 dequeue_mutex(curthread, m);
979                 error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
980                 if (deferred)  {
981                         if (mtx_defer == NULL) {
982                                 _thr_wake_all(curthread->defer_waiters,
983                                     curthread->nwaiter_defer);
984                                 curthread->nwaiter_defer = 0;
985                         } else
986                                 *mtx_defer = 1;
987                 }
988                 if (robust)
989                         _mutex_leave_robust(curthread, m);
990         }
991         if (!cv && private)
992                 THR_CRITICAL_LEAVE(curthread);
993         return (error);
994 }
995
996 int
997 _pthread_mutex_getprioceiling(const pthread_mutex_t * __restrict mutex,
998     int * __restrict prioceiling)
999 {
1000         struct pthread_mutex *m;
1001
1002         if (*mutex == THR_PSHARED_PTR) {
1003                 m = __thr_pshared_offpage(__DECONST(void *, mutex), 0);
1004                 if (m == NULL)
1005                         return (EINVAL);
1006                 shared_mutex_init(m, NULL);
1007         } else {
1008                 m = *mutex;
1009                 if (m <= THR_MUTEX_DESTROYED)
1010                         return (EINVAL);
1011         }
1012         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1013                 return (EINVAL);
1014         *prioceiling = m->m_lock.m_ceilings[0];
1015         return (0);
1016 }
1017
1018 int
1019 _pthread_mutex_setprioceiling(pthread_mutex_t * __restrict mutex,
1020     int ceiling, int * __restrict old_ceiling)
1021 {
1022         struct pthread *curthread;
1023         struct pthread_mutex *m, *m1, *m2;
1024         struct mutex_queue *q, *qp;
1025         int qidx, ret;
1026
1027         if (*mutex == THR_PSHARED_PTR) {
1028                 m = __thr_pshared_offpage(mutex, 0);
1029                 if (m == NULL)
1030                         return (EINVAL);
1031                 shared_mutex_init(m, NULL);
1032         } else {
1033                 m = *mutex;
1034                 if (m <= THR_MUTEX_DESTROYED)
1035                         return (EINVAL);
1036         }
1037         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1038                 return (EINVAL);
1039
1040         ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1041         if (ret != 0)
1042                 return (ret);
1043
1044         curthread = _get_curthread();
1045         if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1046                 mutex_assert_is_owned(m);
1047                 m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1048                 m2 = TAILQ_NEXT(m, m_qe);
1049                 if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1050                     (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1051                         qidx = mutex_qidx(m);
1052                         q = &curthread->mq[qidx];
1053                         qp = &curthread->mq[qidx + 1];
1054                         TAILQ_REMOVE(q, m, m_qe);
1055                         if (!is_pshared_mutex(m))
1056                                 TAILQ_REMOVE(qp, m, m_pqe);
1057                         TAILQ_FOREACH(m2, q, m_qe) {
1058                                 if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1059                                         TAILQ_INSERT_BEFORE(m2, m, m_qe);
1060                                         if (!is_pshared_mutex(m)) {
1061                                                 while (m2 != NULL &&
1062                                                     is_pshared_mutex(m2)) {
1063                                                         m2 = TAILQ_PREV(m2,
1064                                                             mutex_queue, m_qe);
1065                                                 }
1066                                                 if (m2 == NULL) {
1067                                                         TAILQ_INSERT_HEAD(qp,
1068                                                             m, m_pqe);
1069                                                 } else {
1070                                                         TAILQ_INSERT_BEFORE(m2,
1071                                                             m, m_pqe);
1072                                                 }
1073                                         }
1074                                         return (0);
1075                                 }
1076                         }
1077                         TAILQ_INSERT_TAIL(q, m, m_qe);
1078                         if (!is_pshared_mutex(m))
1079                                 TAILQ_INSERT_TAIL(qp, m, m_pqe);
1080                 }
1081         }
1082         return (0);
1083 }
1084
1085 int
1086 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1087 {
1088         struct pthread_mutex *m;
1089         int ret;
1090
1091         ret = check_and_init_mutex(mutex, &m);
1092         if (ret == 0)
1093                 *count = m->m_spinloops;
1094         return (ret);
1095 }
1096
1097 int
1098 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1099 {
1100         struct pthread_mutex *m;
1101         int ret;
1102
1103         ret = check_and_init_mutex(mutex, &m);
1104         if (ret == 0)
1105                 m->m_spinloops = count;
1106         return (ret);
1107 }
1108
1109 int
1110 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1111 {
1112         struct pthread_mutex *m;
1113         int ret;
1114
1115         ret = check_and_init_mutex(mutex, &m);
1116         if (ret == 0)
1117                 *count = m->m_yieldloops;
1118         return (ret);
1119 }
1120
1121 int
1122 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1123 {
1124         struct pthread_mutex *m;
1125         int ret;
1126
1127         ret = check_and_init_mutex(mutex, &m);
1128         if (ret == 0)
1129                 m->m_yieldloops = count;
1130         return (0);
1131 }
1132
1133 int
1134 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1135 {
1136         struct pthread_mutex *m;
1137
1138         if (*mutex == THR_PSHARED_PTR) {
1139                 m = __thr_pshared_offpage(mutex, 0);
1140                 if (m == NULL)
1141                         return (0);
1142                 shared_mutex_init(m, NULL);
1143         } else {
1144                 m = *mutex;
1145                 if (m <= THR_MUTEX_DESTROYED)
1146                         return (0);
1147         }
1148         return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1149 }
1150
1151 int
1152 _mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1153 {
1154
1155         if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1156                 if (mp == THR_MUTEX_DESTROYED)
1157                         return (EINVAL);
1158                 return (EPERM);
1159         }
1160         if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1161                 return (EPERM);
1162         return (0);                  
1163 }
1164
1165 int
1166 _pthread_mutex_consistent(pthread_mutex_t *mutex)
1167 {
1168         struct pthread_mutex *m;
1169         struct pthread *curthread;
1170
1171         if (*mutex == THR_PSHARED_PTR) {
1172                 m = __thr_pshared_offpage(mutex, 0);
1173                 if (m == NULL)
1174                         return (EINVAL);
1175                 shared_mutex_init(m, NULL);
1176         } else {
1177                 m = *mutex;
1178                 if (m <= THR_MUTEX_DESTROYED)
1179                         return (EINVAL);
1180         }
1181         curthread = _get_curthread();
1182         if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1183             (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1184                 return (EINVAL);
1185         if (PMUTEX_OWNER_ID(m) != TID(curthread))
1186                 return (EPERM);
1187         m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1188         return (0);
1189 }