]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libthr/thread/thr_mutex.c
MFV r337744:
[FreeBSD/FreeBSD.git] / lib / libthr / thread / thr_mutex.c
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
5  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
6  * Copyright (c) 2015, 2016 The FreeBSD Foundation
7  *
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Konstantin Belousov
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *      This product includes software developed by John Birrell.
24  * 4. Neither the name of the author nor the names of any co-contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  */
40
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43
44 #include "namespace.h"
45 #include <stdlib.h>
46 #include <errno.h>
47 #include <string.h>
48 #include <sys/param.h>
49 #include <sys/queue.h>
50 #include <pthread.h>
51 #include <pthread_np.h>
52 #include "un-namespace.h"
53
54 #include "thr_private.h"
55
56 _Static_assert(sizeof(struct pthread_mutex) <= PAGE_SIZE,
57     "pthread_mutex is too large for off-page");
58
59 /*
60  * For adaptive mutexes, how many times to spin doing trylock2
61  * before entering the kernel to block
62  */
63 #define MUTEX_ADAPTIVE_SPINS    2000
64
65 /*
66  * Prototypes
67  */
68 int     __pthread_mutex_consistent(pthread_mutex_t *mutex);
69 int     __pthread_mutex_init(pthread_mutex_t *mutex,
70                 const pthread_mutexattr_t *mutex_attr);
71 int     __pthread_mutex_trylock(pthread_mutex_t *mutex);
72 int     __pthread_mutex_lock(pthread_mutex_t *mutex);
73 int     __pthread_mutex_timedlock(pthread_mutex_t *mutex,
74                 const struct timespec *abstime);
75 int     _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
76 int     _pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
77 int     __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
78 int     _pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
79 int     _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
80 int     __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
81
82 static int      mutex_self_trylock(pthread_mutex_t);
83 static int      mutex_self_lock(pthread_mutex_t,
84                                 const struct timespec *abstime);
85 static int      mutex_unlock_common(struct pthread_mutex *, bool, int *);
86 static int      mutex_lock_sleep(struct pthread *, pthread_mutex_t,
87                                 const struct timespec *);
88 static void     mutex_init_robust(struct pthread *curthread);
89 static int      mutex_qidx(struct pthread_mutex *m);
90 static bool     is_robust_mutex(struct pthread_mutex *m);
91 static bool     is_pshared_mutex(struct pthread_mutex *m);
92
93 __weak_reference(__pthread_mutex_init, pthread_mutex_init);
94 __strong_reference(__pthread_mutex_init, _pthread_mutex_init);
95 __weak_reference(__pthread_mutex_lock, pthread_mutex_lock);
96 __strong_reference(__pthread_mutex_lock, _pthread_mutex_lock);
97 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
98 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
99 __weak_reference(__pthread_mutex_trylock, pthread_mutex_trylock);
100 __strong_reference(__pthread_mutex_trylock, _pthread_mutex_trylock);
101 __weak_reference(_pthread_mutex_consistent, pthread_mutex_consistent);
102 __strong_reference(_pthread_mutex_consistent, __pthread_mutex_consistent);
103
104 /* Single underscore versions provided for libc internal usage: */
105 /* No difference between libc and application usage of these: */
106 __weak_reference(_pthread_mutex_destroy, pthread_mutex_destroy);
107 __weak_reference(_pthread_mutex_unlock, pthread_mutex_unlock);
108
109 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
110 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
111
112 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
113 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
114 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
115
116 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
117 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
118 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
119 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
120
121 static void
122 mutex_init_link(struct pthread_mutex *m)
123 {
124
125 #if defined(_PTHREADS_INVARIANTS)
126         m->m_qe.tqe_prev = NULL;
127         m->m_qe.tqe_next = NULL;
128         m->m_pqe.tqe_prev = NULL;
129         m->m_pqe.tqe_next = NULL;
130 #endif
131 }
132
133 static void
134 mutex_assert_is_owned(struct pthread_mutex *m __unused)
135 {
136
137 #if defined(_PTHREADS_INVARIANTS)
138         if (__predict_false(m->m_qe.tqe_prev == NULL))
139                 PANIC("mutex %p own %#x is not on list %p %p",
140                     m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
141 #endif
142 }
143
144 static void
145 mutex_assert_not_owned(struct pthread *curthread __unused,
146     struct pthread_mutex *m __unused)
147 {
148
149 #if defined(_PTHREADS_INVARIANTS)
150         if (__predict_false(m->m_qe.tqe_prev != NULL ||
151             m->m_qe.tqe_next != NULL))
152                 PANIC("mutex %p own %#x is on list %p %p",
153                     m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
154         if (__predict_false(is_robust_mutex(m) &&
155             (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
156             (is_pshared_mutex(m) && curthread->robust_list ==
157             (uintptr_t)&m->m_lock) ||
158             (!is_pshared_mutex(m) && curthread->priv_robust_list ==
159             (uintptr_t)&m->m_lock))))
160                 PANIC(
161     "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
162                     m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
163                     m->m_rb_prev, (void *)curthread->robust_list,
164                     (void *)curthread->priv_robust_list);
165 #endif
166 }
167
168 static bool
169 is_pshared_mutex(struct pthread_mutex *m)
170 {
171
172         return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
173 }
174
175 static bool
176 is_robust_mutex(struct pthread_mutex *m)
177 {
178
179         return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
180 }
181
182 int
183 _mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
184 {
185
186 #if defined(_PTHREADS_INVARIANTS)
187         if (__predict_false(curthread->inact_mtx != 0))
188                 PANIC("inact_mtx enter");
189 #endif
190         if (!is_robust_mutex(m))
191                 return (0);
192
193         mutex_init_robust(curthread);
194         curthread->inact_mtx = (uintptr_t)&m->m_lock;
195         return (1);
196 }
197
198 void
199 _mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
200 {
201
202 #if defined(_PTHREADS_INVARIANTS)
203         if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
204                 PANIC("inact_mtx leave");
205 #endif
206         curthread->inact_mtx = 0;
207 }
208
209 static int
210 mutex_check_attr(const struct pthread_mutex_attr *attr)
211 {
212
213         if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
214             attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
215                 return (EINVAL);
216         if (attr->m_protocol < PTHREAD_PRIO_NONE ||
217             attr->m_protocol > PTHREAD_PRIO_PROTECT)
218                 return (EINVAL);
219         return (0);
220 }
221
222 static void
223 mutex_init_robust(struct pthread *curthread)
224 {
225         struct umtx_robust_lists_params rb;
226
227         if (curthread == NULL)
228                 curthread = _get_curthread();
229         if (curthread->robust_inited)
230                 return;
231         rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
232         rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
233         rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
234         _umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
235         curthread->robust_inited = 1;
236 }
237
238 static void
239 mutex_init_body(struct pthread_mutex *pmutex,
240     const struct pthread_mutex_attr *attr)
241 {
242
243         pmutex->m_flags = attr->m_type;
244         pmutex->m_count = 0;
245         pmutex->m_spinloops = 0;
246         pmutex->m_yieldloops = 0;
247         mutex_init_link(pmutex);
248         switch (attr->m_protocol) {
249         case PTHREAD_PRIO_NONE:
250                 pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
251                 pmutex->m_lock.m_flags = 0;
252                 break;
253         case PTHREAD_PRIO_INHERIT:
254                 pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
255                 pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
256                 break;
257         case PTHREAD_PRIO_PROTECT:
258                 pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
259                 pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
260                 pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
261                 break;
262         }
263         if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
264                 pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
265         if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
266                 mutex_init_robust(NULL);
267                 pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
268         }
269         if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
270                 pmutex->m_spinloops =
271                     _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
272                 pmutex->m_yieldloops = _thr_yieldloops;
273         }
274 }
275
276 static int
277 mutex_init(pthread_mutex_t *mutex,
278     const struct pthread_mutex_attr *mutex_attr,
279     void *(calloc_cb)(size_t, size_t))
280 {
281         const struct pthread_mutex_attr *attr;
282         struct pthread_mutex *pmutex;
283         int error;
284
285         if (mutex_attr == NULL) {
286                 attr = &_pthread_mutexattr_default;
287         } else {
288                 attr = mutex_attr;
289                 error = mutex_check_attr(attr);
290                 if (error != 0)
291                         return (error);
292         }
293         if ((pmutex = (pthread_mutex_t)
294                 calloc_cb(1, sizeof(struct pthread_mutex))) == NULL)
295                 return (ENOMEM);
296         mutex_init_body(pmutex, attr);
297         *mutex = pmutex;
298         return (0);
299 }
300
301 static int
302 init_static(struct pthread *thread, pthread_mutex_t *mutex)
303 {
304         int ret;
305
306         THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
307
308         if (*mutex == THR_MUTEX_INITIALIZER)
309                 ret = mutex_init(mutex, &_pthread_mutexattr_default, calloc);
310         else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
311                 ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
312                     calloc);
313         else
314                 ret = 0;
315         THR_LOCK_RELEASE(thread, &_mutex_static_lock);
316
317         return (ret);
318 }
319
320 static void
321 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
322 {
323         struct pthread_mutex *m2;
324
325         m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
326         if (m2 != NULL)
327                 m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
328         else
329                 m->m_lock.m_ceilings[1] = -1;
330 }
331
332 static void
333 shared_mutex_init(struct pthread_mutex *pmtx, const struct
334     pthread_mutex_attr *mutex_attr)
335 {
336         static const struct pthread_mutex_attr foobar_mutex_attr = {
337                 .m_type = PTHREAD_MUTEX_DEFAULT,
338                 .m_protocol = PTHREAD_PRIO_NONE,
339                 .m_ceiling = 0,
340                 .m_pshared = PTHREAD_PROCESS_SHARED,
341                 .m_robust = PTHREAD_MUTEX_STALLED,
342         };
343         bool done;
344
345         /*
346          * Hack to allow multiple pthread_mutex_init() calls on the
347          * same process-shared mutex.  We rely on kernel allocating
348          * zeroed offpage for the mutex, i.e. the
349          * PMUTEX_INITSTAGE_ALLOC value must be zero.
350          */
351         for (done = false; !done;) {
352                 switch (pmtx->m_ps) {
353                 case PMUTEX_INITSTAGE_DONE:
354                         atomic_thread_fence_acq();
355                         done = true;
356                         break;
357                 case PMUTEX_INITSTAGE_ALLOC:
358                         if (atomic_cmpset_int(&pmtx->m_ps,
359                             PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
360                                 if (mutex_attr == NULL)
361                                         mutex_attr = &foobar_mutex_attr;
362                                 mutex_init_body(pmtx, mutex_attr);
363                                 atomic_store_rel_int(&pmtx->m_ps,
364                                     PMUTEX_INITSTAGE_DONE);
365                                 done = true;
366                         }
367                         break;
368                 case PMUTEX_INITSTAGE_BUSY:
369                         _pthread_yield();
370                         break;
371                 default:
372                         PANIC("corrupted offpage");
373                         break;
374                 }
375         }
376 }
377
378 int
379 __pthread_mutex_init(pthread_mutex_t *mutex,
380     const pthread_mutexattr_t *mutex_attr)
381 {
382         struct pthread_mutex *pmtx;
383         int ret;
384
385         if (mutex_attr != NULL) {
386                 ret = mutex_check_attr(*mutex_attr);
387                 if (ret != 0)
388                         return (ret);
389         }
390         if (mutex_attr == NULL ||
391             (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
392                 return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
393                     calloc));
394         }
395         pmtx = __thr_pshared_offpage(mutex, 1);
396         if (pmtx == NULL)
397                 return (EFAULT);
398         *mutex = THR_PSHARED_PTR;
399         shared_mutex_init(pmtx, *mutex_attr);
400         return (0);
401 }
402
403 /* This function is used internally by malloc. */
404 int
405 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
406     void *(calloc_cb)(size_t, size_t))
407 {
408         static const struct pthread_mutex_attr attr = {
409                 .m_type = PTHREAD_MUTEX_NORMAL,
410                 .m_protocol = PTHREAD_PRIO_NONE,
411                 .m_ceiling = 0,
412                 .m_pshared = PTHREAD_PROCESS_PRIVATE,
413                 .m_robust = PTHREAD_MUTEX_STALLED,
414         };
415         int ret;
416
417         ret = mutex_init(mutex, &attr, calloc_cb);
418         if (ret == 0)
419                 (*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
420         return (ret);
421 }
422
423 /*
424  * Fix mutex ownership for child process.
425  *
426  * Process private mutex ownership is transmitted from the forking
427  * thread to the child process.
428  *
429  * Process shared mutex should not be inherited because owner is
430  * forking thread which is in parent process, they are removed from
431  * the owned mutex list.
432  */
433 static void
434 queue_fork(struct pthread *curthread, struct mutex_queue *q,
435     struct mutex_queue *qp, uint bit)
436 {
437         struct pthread_mutex *m;
438
439         TAILQ_INIT(q);
440         TAILQ_FOREACH(m, qp, m_pqe) {
441                 TAILQ_INSERT_TAIL(q, m, m_qe);
442                 m->m_lock.m_owner = TID(curthread) | bit;
443         }
444 }
445
446 void
447 _mutex_fork(struct pthread *curthread)
448 {
449
450         queue_fork(curthread, &curthread->mq[TMQ_NORM],
451             &curthread->mq[TMQ_NORM_PRIV], 0);
452         queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
453             &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
454         queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
455             &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
456         curthread->robust_list = 0;
457 }
458
459 int
460 _pthread_mutex_destroy(pthread_mutex_t *mutex)
461 {
462         pthread_mutex_t m, m1;
463         int ret;
464
465         m = *mutex;
466         if (m < THR_MUTEX_DESTROYED) {
467                 ret = 0;
468         } else if (m == THR_MUTEX_DESTROYED) {
469                 ret = EINVAL;
470         } else {
471                 if (m == THR_PSHARED_PTR) {
472                         m1 = __thr_pshared_offpage(mutex, 0);
473                         if (m1 != NULL) {
474                                 mutex_assert_not_owned(_get_curthread(), m1);
475                                 __thr_pshared_destroy(mutex);
476                         }
477                         *mutex = THR_MUTEX_DESTROYED;
478                         return (0);
479                 }
480                 if (PMUTEX_OWNER_ID(m) != 0 &&
481                     (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
482                         ret = EBUSY;
483                 } else {
484                         *mutex = THR_MUTEX_DESTROYED;
485                         mutex_assert_not_owned(_get_curthread(), m);
486                         free(m);
487                         ret = 0;
488                 }
489         }
490
491         return (ret);
492 }
493
494 static int
495 mutex_qidx(struct pthread_mutex *m)
496 {
497
498         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
499                 return (TMQ_NORM);
500         return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
501 }
502
503 /*
504  * Both enqueue_mutex() and dequeue_mutex() operate on the
505  * thread-private linkage of the locked mutexes and on the robust
506  * linkage.
507  *
508  * Robust list, as seen by kernel, must be consistent even in the case
509  * of thread termination at arbitrary moment.  Since either enqueue or
510  * dequeue for list walked by kernel consists of rewriting a single
511  * forward pointer, it is safe.  On the other hand, rewrite of the
512  * back pointer is not atomic WRT the forward one, but kernel does not
513  * care.
514  */
515 static void
516 enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
517     int error)
518 {
519         struct pthread_mutex *m1;
520         uintptr_t *rl;
521         int qidx;
522
523         /* Add to the list of owned mutexes: */
524         if (error != EOWNERDEAD)
525                 mutex_assert_not_owned(curthread, m);
526         qidx = mutex_qidx(m);
527         TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
528         if (!is_pshared_mutex(m))
529                 TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
530         if (is_robust_mutex(m)) {
531                 rl = is_pshared_mutex(m) ? &curthread->robust_list :
532                     &curthread->priv_robust_list;
533                 m->m_rb_prev = NULL;
534                 if (*rl != 0) {
535                         m1 = __containerof((void *)*rl,
536                             struct pthread_mutex, m_lock);
537                         m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
538                         m1->m_rb_prev = m;
539                 } else {
540                         m1 = NULL;
541                         m->m_lock.m_rb_lnk = 0;
542                 }
543                 *rl = (uintptr_t)&m->m_lock;
544         }
545 }
546
547 static void
548 dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
549 {
550         struct pthread_mutex *mp, *mn;
551         int qidx;
552
553         mutex_assert_is_owned(m);
554         qidx = mutex_qidx(m);
555         if (is_robust_mutex(m)) {
556                 mp = m->m_rb_prev;
557                 if (mp == NULL) {
558                         if (is_pshared_mutex(m)) {
559                                 curthread->robust_list = m->m_lock.m_rb_lnk;
560                         } else {
561                                 curthread->priv_robust_list =
562                                     m->m_lock.m_rb_lnk;
563                         }
564                 } else {
565                         mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
566                 }
567                 if (m->m_lock.m_rb_lnk != 0) {
568                         mn = __containerof((void *)m->m_lock.m_rb_lnk,
569                             struct pthread_mutex, m_lock);
570                         mn->m_rb_prev = m->m_rb_prev;
571                 }
572                 m->m_lock.m_rb_lnk = 0;
573                 m->m_rb_prev = NULL;
574         }
575         TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
576         if (!is_pshared_mutex(m))
577                 TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
578         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
579                 set_inherited_priority(curthread, m);
580         mutex_init_link(m);
581 }
582
583 static int
584 check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
585 {
586         int ret;
587
588         *m = *mutex;
589         ret = 0;
590         if (*m == THR_PSHARED_PTR) {
591                 *m = __thr_pshared_offpage(mutex, 0);
592                 if (*m == NULL)
593                         ret = EINVAL;
594                 else
595                         shared_mutex_init(*m, NULL);
596         } else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
597                 if (*m == THR_MUTEX_DESTROYED) {
598                         ret = EINVAL;
599                 } else {
600                         ret = init_static(_get_curthread(), mutex);
601                         if (ret == 0)
602                                 *m = *mutex;
603                 }
604         }
605         return (ret);
606 }
607
608 int
609 __pthread_mutex_trylock(pthread_mutex_t *mutex)
610 {
611         struct pthread *curthread;
612         struct pthread_mutex *m;
613         uint32_t id;
614         int ret, robust;
615
616         ret = check_and_init_mutex(mutex, &m);
617         if (ret != 0)
618                 return (ret);
619         curthread = _get_curthread();
620         id = TID(curthread);
621         if (m->m_flags & PMUTEX_FLAG_PRIVATE)
622                 THR_CRITICAL_ENTER(curthread);
623         robust = _mutex_enter_robust(curthread, m);
624         ret = _thr_umutex_trylock(&m->m_lock, id);
625         if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
626                 enqueue_mutex(curthread, m, ret);
627                 if (ret == EOWNERDEAD)
628                         m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
629         } else if (PMUTEX_OWNER_ID(m) == id) {
630                 ret = mutex_self_trylock(m);
631         } /* else {} */
632         if (robust)
633                 _mutex_leave_robust(curthread, m);
634         if (ret != 0 && ret != EOWNERDEAD &&
635             (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
636                 THR_CRITICAL_LEAVE(curthread);
637         return (ret);
638 }
639
640 static int
641 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
642     const struct timespec *abstime)
643 {
644         uint32_t id, owner;
645         int count, ret;
646
647         id = TID(curthread);
648         if (PMUTEX_OWNER_ID(m) == id)
649                 return (mutex_self_lock(m, abstime));
650
651         /*
652          * For adaptive mutexes, spin for a bit in the expectation
653          * that if the application requests this mutex type then
654          * the lock is likely to be released quickly and it is
655          * faster than entering the kernel
656          */
657         if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
658             UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
659                 goto sleep_in_kernel;
660
661         if (!_thr_is_smp)
662                 goto yield_loop;
663
664         count = m->m_spinloops;
665         while (count--) {
666                 owner = m->m_lock.m_owner;
667                 if ((owner & ~UMUTEX_CONTESTED) == 0) {
668                         if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
669                             id | owner)) {
670                                 ret = 0;
671                                 goto done;
672                         }
673                 }
674                 CPU_SPINWAIT;
675         }
676
677 yield_loop:
678         count = m->m_yieldloops;
679         while (count--) {
680                 _sched_yield();
681                 owner = m->m_lock.m_owner;
682                 if ((owner & ~UMUTEX_CONTESTED) == 0) {
683                         if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
684                             id | owner)) {
685                                 ret = 0;
686                                 goto done;
687                         }
688                 }
689         }
690
691 sleep_in_kernel:
692         if (abstime == NULL)
693                 ret = __thr_umutex_lock(&m->m_lock, id);
694         else if (__predict_false(abstime->tv_nsec < 0 ||
695             abstime->tv_nsec >= 1000000000))
696                 ret = EINVAL;
697         else
698                 ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
699 done:
700         if (ret == 0 || ret == EOWNERDEAD) {
701                 enqueue_mutex(curthread, m, ret);
702                 if (ret == EOWNERDEAD)
703                         m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
704         }
705         return (ret);
706 }
707
708 static inline int
709 mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
710     bool cvattach, bool rb_onlist)
711 {
712         struct pthread *curthread;
713         int ret, robust;
714
715         robust = 0;  /* pacify gcc */
716         curthread  = _get_curthread();
717         if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
718                 THR_CRITICAL_ENTER(curthread);
719         if (!rb_onlist)
720                 robust = _mutex_enter_robust(curthread, m);
721         ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
722         if (ret == 0 || ret == EOWNERDEAD) {
723                 enqueue_mutex(curthread, m, ret);
724                 if (ret == EOWNERDEAD)
725                         m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
726         } else {
727                 ret = mutex_lock_sleep(curthread, m, abstime);
728         }
729         if (!rb_onlist && robust)
730                 _mutex_leave_robust(curthread, m);
731         if (ret != 0 && ret != EOWNERDEAD &&
732             (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
733                 THR_CRITICAL_LEAVE(curthread);
734         return (ret);
735 }
736
737 int
738 __pthread_mutex_lock(pthread_mutex_t *mutex)
739 {
740         struct pthread_mutex *m;
741         int ret;
742
743         _thr_check_init();
744         ret = check_and_init_mutex(mutex, &m);
745         if (ret == 0)
746                 ret = mutex_lock_common(m, NULL, false, false);
747         return (ret);
748 }
749
750 int
751 __pthread_mutex_timedlock(pthread_mutex_t *mutex,
752     const struct timespec *abstime)
753 {
754         struct pthread_mutex *m;
755         int ret;
756
757         _thr_check_init();
758         ret = check_and_init_mutex(mutex, &m);
759         if (ret == 0)
760                 ret = mutex_lock_common(m, abstime, false, false);
761         return (ret);
762 }
763
764 int
765 _pthread_mutex_unlock(pthread_mutex_t *mutex)
766 {
767         struct pthread_mutex *mp;
768
769         if (*mutex == THR_PSHARED_PTR) {
770                 mp = __thr_pshared_offpage(mutex, 0);
771                 if (mp == NULL)
772                         return (EINVAL);
773                 shared_mutex_init(mp, NULL);
774         } else {
775                 mp = *mutex;
776         }
777         return (mutex_unlock_common(mp, false, NULL));
778 }
779
780 int
781 _mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
782 {
783         int error;
784
785         error = mutex_lock_common(m, NULL, true, rb_onlist);
786         if (error == 0 || error == EOWNERDEAD)
787                 m->m_count = count;
788         return (error);
789 }
790
791 int
792 _mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
793 {
794
795         /*
796          * Clear the count in case this is a recursive mutex.
797          */
798         *count = m->m_count;
799         m->m_count = 0;
800         (void)mutex_unlock_common(m, true, defer);
801         return (0);
802 }
803
804 int
805 _mutex_cv_attach(struct pthread_mutex *m, int count)
806 {
807         struct pthread *curthread;
808
809         curthread = _get_curthread();
810         enqueue_mutex(curthread, m, 0);
811         m->m_count = count;
812         return (0);
813 }
814
815 int
816 _mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
817 {
818         struct pthread *curthread;
819         int deferred, error;
820
821         curthread = _get_curthread();
822         if ((error = _mutex_owned(curthread, mp)) != 0)
823                 return (error);
824
825         /*
826          * Clear the count in case this is a recursive mutex.
827          */
828         *recurse = mp->m_count;
829         mp->m_count = 0;
830         dequeue_mutex(curthread, mp);
831
832         /* Will this happen in real-world ? */
833         if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
834                 deferred = 1;
835                 mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
836         } else
837                 deferred = 0;
838
839         if (deferred)  {
840                 _thr_wake_all(curthread->defer_waiters,
841                     curthread->nwaiter_defer);
842                 curthread->nwaiter_defer = 0;
843         }
844         return (0);
845 }
846
847 static int
848 mutex_self_trylock(struct pthread_mutex *m)
849 {
850         int ret;
851
852         switch (PMUTEX_TYPE(m->m_flags)) {
853         case PTHREAD_MUTEX_ERRORCHECK:
854         case PTHREAD_MUTEX_NORMAL:
855         case PTHREAD_MUTEX_ADAPTIVE_NP:
856                 ret = EBUSY;
857                 break;
858
859         case PTHREAD_MUTEX_RECURSIVE:
860                 /* Increment the lock count: */
861                 if (m->m_count + 1 > 0) {
862                         m->m_count++;
863                         ret = 0;
864                 } else
865                         ret = EAGAIN;
866                 break;
867
868         default:
869                 /* Trap invalid mutex types; */
870                 ret = EINVAL;
871         }
872
873         return (ret);
874 }
875
876 static int
877 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
878 {
879         struct timespec ts1, ts2;
880         int ret;
881
882         switch (PMUTEX_TYPE(m->m_flags)) {
883         case PTHREAD_MUTEX_ERRORCHECK:
884         case PTHREAD_MUTEX_ADAPTIVE_NP:
885                 if (abstime) {
886                         if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
887                             abstime->tv_nsec >= 1000000000) {
888                                 ret = EINVAL;
889                         } else {
890                                 clock_gettime(CLOCK_REALTIME, &ts1);
891                                 TIMESPEC_SUB(&ts2, abstime, &ts1);
892                                 __sys_nanosleep(&ts2, NULL);
893                                 ret = ETIMEDOUT;
894                         }
895                 } else {
896                         /*
897                          * POSIX specifies that mutexes should return
898                          * EDEADLK if a recursive lock is detected.
899                          */
900                         ret = EDEADLK; 
901                 }
902                 break;
903
904         case PTHREAD_MUTEX_NORMAL:
905                 /*
906                  * What SS2 define as a 'normal' mutex.  Intentionally
907                  * deadlock on attempts to get a lock you already own.
908                  */
909                 ret = 0;
910                 if (abstime) {
911                         if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
912                             abstime->tv_nsec >= 1000000000) {
913                                 ret = EINVAL;
914                         } else {
915                                 clock_gettime(CLOCK_REALTIME, &ts1);
916                                 TIMESPEC_SUB(&ts2, abstime, &ts1);
917                                 __sys_nanosleep(&ts2, NULL);
918                                 ret = ETIMEDOUT;
919                         }
920                 } else {
921                         ts1.tv_sec = 30;
922                         ts1.tv_nsec = 0;
923                         for (;;)
924                                 __sys_nanosleep(&ts1, NULL);
925                 }
926                 break;
927
928         case PTHREAD_MUTEX_RECURSIVE:
929                 /* Increment the lock count: */
930                 if (m->m_count + 1 > 0) {
931                         m->m_count++;
932                         ret = 0;
933                 } else
934                         ret = EAGAIN;
935                 break;
936
937         default:
938                 /* Trap invalid mutex types; */
939                 ret = EINVAL;
940         }
941
942         return (ret);
943 }
944
945 static int
946 mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
947 {
948         struct pthread *curthread;
949         uint32_t id;
950         int deferred, error, robust;
951
952         if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
953                 if (m == THR_MUTEX_DESTROYED)
954                         return (EINVAL);
955                 return (EPERM);
956         }
957
958         curthread = _get_curthread();
959         id = TID(curthread);
960
961         /*
962          * Check if the running thread is not the owner of the mutex.
963          */
964         if (__predict_false(PMUTEX_OWNER_ID(m) != id))
965                 return (EPERM);
966
967         error = 0;
968         if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
969             PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
970                 m->m_count--;
971         } else {
972                 if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
973                         deferred = 1;
974                         m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
975                 } else
976                         deferred = 0;
977
978                 robust = _mutex_enter_robust(curthread, m);
979                 dequeue_mutex(curthread, m);
980                 error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
981                 if (deferred)  {
982                         if (mtx_defer == NULL) {
983                                 _thr_wake_all(curthread->defer_waiters,
984                                     curthread->nwaiter_defer);
985                                 curthread->nwaiter_defer = 0;
986                         } else
987                                 *mtx_defer = 1;
988                 }
989                 if (robust)
990                         _mutex_leave_robust(curthread, m);
991         }
992         if (!cv && m->m_flags & PMUTEX_FLAG_PRIVATE)
993                 THR_CRITICAL_LEAVE(curthread);
994         return (error);
995 }
996
997 int
998 _pthread_mutex_getprioceiling(pthread_mutex_t *mutex,
999     int *prioceiling)
1000 {
1001         struct pthread_mutex *m;
1002
1003         if (*mutex == THR_PSHARED_PTR) {
1004                 m = __thr_pshared_offpage(mutex, 0);
1005                 if (m == NULL)
1006                         return (EINVAL);
1007                 shared_mutex_init(m, NULL);
1008         } else {
1009                 m = *mutex;
1010                 if (m <= THR_MUTEX_DESTROYED)
1011                         return (EINVAL);
1012         }
1013         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1014                 return (EINVAL);
1015         *prioceiling = m->m_lock.m_ceilings[0];
1016         return (0);
1017 }
1018
1019 int
1020 _pthread_mutex_setprioceiling(pthread_mutex_t *mutex,
1021     int ceiling, int *old_ceiling)
1022 {
1023         struct pthread *curthread;
1024         struct pthread_mutex *m, *m1, *m2;
1025         struct mutex_queue *q, *qp;
1026         int qidx, ret;
1027
1028         if (*mutex == THR_PSHARED_PTR) {
1029                 m = __thr_pshared_offpage(mutex, 0);
1030                 if (m == NULL)
1031                         return (EINVAL);
1032                 shared_mutex_init(m, NULL);
1033         } else {
1034                 m = *mutex;
1035                 if (m <= THR_MUTEX_DESTROYED)
1036                         return (EINVAL);
1037         }
1038         if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1039                 return (EINVAL);
1040
1041         ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1042         if (ret != 0)
1043                 return (ret);
1044
1045         curthread = _get_curthread();
1046         if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1047                 mutex_assert_is_owned(m);
1048                 m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1049                 m2 = TAILQ_NEXT(m, m_qe);
1050                 if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1051                     (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1052                         qidx = mutex_qidx(m);
1053                         q = &curthread->mq[qidx];
1054                         qp = &curthread->mq[qidx + 1];
1055                         TAILQ_REMOVE(q, m, m_qe);
1056                         if (!is_pshared_mutex(m))
1057                                 TAILQ_REMOVE(qp, m, m_pqe);
1058                         TAILQ_FOREACH(m2, q, m_qe) {
1059                                 if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1060                                         TAILQ_INSERT_BEFORE(m2, m, m_qe);
1061                                         if (!is_pshared_mutex(m)) {
1062                                                 while (m2 != NULL &&
1063                                                     is_pshared_mutex(m2)) {
1064                                                         m2 = TAILQ_PREV(m2,
1065                                                             mutex_queue, m_qe);
1066                                                 }
1067                                                 if (m2 == NULL) {
1068                                                         TAILQ_INSERT_HEAD(qp,
1069                                                             m, m_pqe);
1070                                                 } else {
1071                                                         TAILQ_INSERT_BEFORE(m2,
1072                                                             m, m_pqe);
1073                                                 }
1074                                         }
1075                                         return (0);
1076                                 }
1077                         }
1078                         TAILQ_INSERT_TAIL(q, m, m_qe);
1079                         if (!is_pshared_mutex(m))
1080                                 TAILQ_INSERT_TAIL(qp, m, m_pqe);
1081                 }
1082         }
1083         return (0);
1084 }
1085
1086 int
1087 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1088 {
1089         struct pthread_mutex *m;
1090         int ret;
1091
1092         ret = check_and_init_mutex(mutex, &m);
1093         if (ret == 0)
1094                 *count = m->m_spinloops;
1095         return (ret);
1096 }
1097
1098 int
1099 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1100 {
1101         struct pthread_mutex *m;
1102         int ret;
1103
1104         ret = check_and_init_mutex(mutex, &m);
1105         if (ret == 0)
1106                 m->m_spinloops = count;
1107         return (ret);
1108 }
1109
1110 int
1111 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1112 {
1113         struct pthread_mutex *m;
1114         int ret;
1115
1116         ret = check_and_init_mutex(mutex, &m);
1117         if (ret == 0)
1118                 *count = m->m_yieldloops;
1119         return (ret);
1120 }
1121
1122 int
1123 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1124 {
1125         struct pthread_mutex *m;
1126         int ret;
1127
1128         ret = check_and_init_mutex(mutex, &m);
1129         if (ret == 0)
1130                 m->m_yieldloops = count;
1131         return (0);
1132 }
1133
1134 int
1135 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1136 {
1137         struct pthread_mutex *m;
1138
1139         if (*mutex == THR_PSHARED_PTR) {
1140                 m = __thr_pshared_offpage(mutex, 0);
1141                 if (m == NULL)
1142                         return (0);
1143                 shared_mutex_init(m, NULL);
1144         } else {
1145                 m = *mutex;
1146                 if (m <= THR_MUTEX_DESTROYED)
1147                         return (0);
1148         }
1149         return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1150 }
1151
1152 int
1153 _mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1154 {
1155
1156         if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1157                 if (mp == THR_MUTEX_DESTROYED)
1158                         return (EINVAL);
1159                 return (EPERM);
1160         }
1161         if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1162                 return (EPERM);
1163         return (0);                  
1164 }
1165
1166 int
1167 _pthread_mutex_consistent(pthread_mutex_t *mutex)
1168 {
1169         struct pthread_mutex *m;
1170         struct pthread *curthread;
1171
1172         if (*mutex == THR_PSHARED_PTR) {
1173                 m = __thr_pshared_offpage(mutex, 0);
1174                 if (m == NULL)
1175                         return (EINVAL);
1176                 shared_mutex_init(m, NULL);
1177         } else {
1178                 m = *mutex;
1179                 if (m <= THR_MUTEX_DESTROYED)
1180                         return (EINVAL);
1181         }
1182         curthread = _get_curthread();
1183         if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1184             (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1185                 return (EINVAL);
1186         if (PMUTEX_OWNER_ID(m) != TID(curthread))
1187                 return (EPERM);
1188         m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1189         return (0);
1190 }