2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include "opt_compat.h"
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
40 #include <sys/sched.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysent.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/eventhandler.h>
50 #include <vm/vm_param.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_object.h>
55 #include <machine/cpu.h>
58 #include <compat/freebsd32/freebsd32_proto.h>
61 #define TYPE_SIMPLE_LOCK 0
62 #define TYPE_SIMPLE_WAIT 1
63 #define TYPE_NORMAL_UMUTEX 2
64 #define TYPE_PI_UMUTEX 3
65 #define TYPE_PP_UMUTEX 4
68 /* Key to represent a unique userland synchronous object */
89 /* Priority inheritance mutex info. */
92 struct thread *pi_owner;
97 /* List entry to link umtx holding by thread */
98 TAILQ_ENTRY(umtx_pi) pi_link;
100 /* List entry in hash */
101 TAILQ_ENTRY(umtx_pi) pi_hashlink;
103 /* List for waiters */
104 TAILQ_HEAD(,umtx_q) pi_blocked;
106 /* Identify a userland lock object */
107 struct umtx_key pi_key;
110 /* A userland synchronous object user. */
112 /* Linked list for the hash. */
113 TAILQ_ENTRY(umtx_q) uq_link;
116 struct umtx_key uq_key;
120 #define UQF_UMTXQ 0x0001
122 /* The thread waits on. */
123 struct thread *uq_thread;
126 * Blocked on PI mutex. read can use chain lock
127 * or sched_lock, write must have both chain lock and
128 * sched_lock being hold.
130 struct umtx_pi *uq_pi_blocked;
132 /* On blocked list */
133 TAILQ_ENTRY(umtx_q) uq_lockq;
135 /* Thread contending with us */
136 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
138 /* Inherited priority from PP mutex */
139 u_char uq_inherited_pri;
142 TAILQ_HEAD(umtxq_head, umtx_q);
144 /* Userland lock object's wait-queue chain */
146 /* Lock for this chain. */
149 /* List of sleep queues. */
150 struct umtxq_head uc_queue;
155 /* Chain lock waiters */
158 /* All PI in the list */
159 TAILQ_HEAD(,umtx_pi) uc_pi_list;
162 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
165 * Don't propagate time-sharing priority, there is a security reason,
166 * a user can simply introduce PI-mutex, let thread A lock the mutex,
167 * and let another thread B block on the mutex, because B is
168 * sleeping, its priority will be boosted, this causes A's priority to
169 * be boosted via priority propagating too and will never be lowered even
170 * if it is using 100%CPU, this is unfair to other processes.
173 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
174 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
175 PRI_MAX_TIMESHARE : (td)->td_user_pri)
177 #define GOLDEN_RATIO_PRIME 2654404609U
178 #define UMTX_CHAINS 128
179 #define UMTX_SHIFTS (__WORD_BIT - 7)
181 #define THREAD_SHARE 0
182 #define PROCESS_SHARE 1
185 #define GET_SHARE(flags) \
186 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
188 static uma_zone_t umtx_pi_zone;
189 static struct umtxq_chain umtxq_chains[UMTX_CHAINS];
190 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
191 static int umtx_pi_allocated;
193 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
194 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
195 &umtx_pi_allocated, 0, "Allocated umtx_pi");
196 SYSCTL_DECL(_kern_threads);
197 static int umtx_dflt_spins = 0;
198 SYSCTL_INT(_kern_threads, OID_AUTO, umtx_dflt_spins, CTLFLAG_RW,
199 &umtx_dflt_spins, 0, "default umtx spin count");
200 static int umtx_max_spins = 3000;
201 SYSCTL_INT(_kern_threads, OID_AUTO, umtx_max_spins, CTLFLAG_RW,
202 &umtx_max_spins, 0, "max umtx spin count");
204 static void umtxq_sysinit(void *);
205 static void umtxq_hash(struct umtx_key *key);
206 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
207 static void umtxq_lock(struct umtx_key *key);
208 static void umtxq_unlock(struct umtx_key *key);
209 static void umtxq_busy(struct umtx_key *key);
210 static void umtxq_unbusy(struct umtx_key *key);
211 static void umtxq_insert(struct umtx_q *uq);
212 static void umtxq_remove(struct umtx_q *uq);
213 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
214 static int umtxq_count(struct umtx_key *key);
215 static int umtxq_signal(struct umtx_key *key, int nr_wakeup);
216 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
217 static int umtx_key_get(void *addr, int type, int share,
218 struct umtx_key *key);
219 static void umtx_key_release(struct umtx_key *key);
220 static struct umtx_pi *umtx_pi_alloc(int);
221 static void umtx_pi_free(struct umtx_pi *pi);
222 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
223 static void umtx_thread_cleanup(struct thread *td);
224 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
225 struct image_params *imgp __unused);
226 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
229 umtxq_sysinit(void *arg __unused)
233 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
234 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
235 for (i = 0; i < UMTX_CHAINS; ++i) {
236 mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL,
237 MTX_DEF | MTX_DUPOK);
238 TAILQ_INIT(&umtxq_chains[i].uc_queue);
239 TAILQ_INIT(&umtxq_chains[i].uc_pi_list);
240 umtxq_chains[i].uc_busy = 0;
241 umtxq_chains[i].uc_waiters = 0;
243 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
244 EVENTHANDLER_PRI_ANY);
252 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
253 TAILQ_INIT(&uq->uq_pi_contested);
254 uq->uq_inherited_pri = PRI_MAX;
259 umtxq_free(struct umtx_q *uq)
265 umtxq_hash(struct umtx_key *key)
267 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
268 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
272 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
274 return (k1->type == k2->type &&
275 k1->info.both.a == k2->info.both.a &&
276 k1->info.both.b == k2->info.both.b);
279 static inline struct umtxq_chain *
280 umtxq_getchain(struct umtx_key *key)
282 return (&umtxq_chains[key->hash]);
286 * Set chain to busy state when following operation
287 * may be blocked (kernel mutex can not be used).
290 umtxq_busy(struct umtx_key *key)
292 struct umtxq_chain *uc;
294 uc = umtxq_getchain(key);
295 mtx_assert(&uc->uc_lock, MA_OWNED);
296 while (uc->uc_busy != 0) {
298 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
308 umtxq_unbusy(struct umtx_key *key)
310 struct umtxq_chain *uc;
312 uc = umtxq_getchain(key);
313 mtx_assert(&uc->uc_lock, MA_OWNED);
314 KASSERT(uc->uc_busy != 0, ("not busy"));
324 umtxq_lock(struct umtx_key *key)
326 struct umtxq_chain *uc;
328 uc = umtxq_getchain(key);
329 mtx_lock(&uc->uc_lock);
336 umtxq_unlock(struct umtx_key *key)
338 struct umtxq_chain *uc;
340 uc = umtxq_getchain(key);
341 mtx_unlock(&uc->uc_lock);
345 * Insert a thread onto the umtx queue.
348 umtxq_insert(struct umtx_q *uq)
350 struct umtxq_chain *uc;
352 uc = umtxq_getchain(&uq->uq_key);
353 UMTXQ_LOCKED_ASSERT(uc);
354 TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link);
355 uq->uq_flags |= UQF_UMTXQ;
359 * Remove thread from the umtx queue.
362 umtxq_remove(struct umtx_q *uq)
364 struct umtxq_chain *uc;
366 uc = umtxq_getchain(&uq->uq_key);
367 UMTXQ_LOCKED_ASSERT(uc);
368 if (uq->uq_flags & UQF_UMTXQ) {
369 TAILQ_REMOVE(&uc->uc_queue, uq, uq_link);
370 uq->uq_flags &= ~UQF_UMTXQ;
375 * Check if there are multiple waiters
378 umtxq_count(struct umtx_key *key)
380 struct umtxq_chain *uc;
384 uc = umtxq_getchain(key);
385 UMTXQ_LOCKED_ASSERT(uc);
386 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
387 if (umtx_key_match(&uq->uq_key, key)) {
396 * Check if there are multiple PI waiters and returns first
400 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
402 struct umtxq_chain *uc;
407 uc = umtxq_getchain(key);
408 UMTXQ_LOCKED_ASSERT(uc);
409 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
410 if (umtx_key_match(&uq->uq_key, key)) {
420 * Wake up threads waiting on an userland object.
423 umtxq_signal(struct umtx_key *key, int n_wake)
425 struct umtxq_chain *uc;
426 struct umtx_q *uq, *next;
430 uc = umtxq_getchain(key);
431 UMTXQ_LOCKED_ASSERT(uc);
432 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) {
433 if (umtx_key_match(&uq->uq_key, key)) {
444 * Wake up specified thread.
447 umtxq_signal_thread(struct umtx_q *uq)
449 struct umtxq_chain *uc;
451 uc = umtxq_getchain(&uq->uq_key);
452 UMTXQ_LOCKED_ASSERT(uc);
458 * Put thread into sleep state, before sleeping, check if
459 * thread was removed from umtx queue.
462 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
464 struct umtxq_chain *uc;
467 uc = umtxq_getchain(&uq->uq_key);
468 UMTXQ_LOCKED_ASSERT(uc);
469 if (!(uq->uq_flags & UQF_UMTXQ))
471 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
472 if (error == EWOULDBLOCK)
478 * Convert userspace address into unique logical address.
481 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
483 struct thread *td = curthread;
485 vm_map_entry_t entry;
491 if (share == THREAD_SHARE) {
493 key->info.private.vs = td->td_proc->p_vmspace;
494 key->info.private.addr = (uintptr_t)addr;
496 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
497 map = &td->td_proc->p_vmspace->vm_map;
498 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
499 &entry, &key->info.shared.object, &pindex, &prot,
500 &wired) != KERN_SUCCESS) {
504 if ((share == PROCESS_SHARE) ||
505 (share == AUTO_SHARE &&
506 VM_INHERIT_SHARE == entry->inheritance)) {
508 key->info.shared.offset = entry->offset + entry->start -
510 vm_object_reference(key->info.shared.object);
513 key->info.private.vs = td->td_proc->p_vmspace;
514 key->info.private.addr = (uintptr_t)addr;
516 vm_map_lookup_done(map, entry);
527 umtx_key_release(struct umtx_key *key)
530 vm_object_deallocate(key->info.shared.object);
534 * Lock a umtx object.
537 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
547 * Care must be exercised when dealing with umtx structure. It
548 * can fault on any access.
552 * Try the uncontested case. This should be done in userland.
554 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
556 /* The acquire succeeded. */
557 if (owner == UMTX_UNOWNED)
560 /* The address was invalid. */
564 /* If no one owns it but it is contested try to acquire it. */
565 if (owner == UMTX_CONTESTED) {
566 owner = casuword(&umtx->u_owner,
567 UMTX_CONTESTED, id | UMTX_CONTESTED);
569 if (owner == UMTX_CONTESTED)
572 /* The address was invalid. */
576 /* If this failed the lock has changed, restart. */
581 * If we caught a signal, we have retried and now
587 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
588 AUTO_SHARE, &uq->uq_key)) != 0)
591 umtxq_lock(&uq->uq_key);
592 umtxq_busy(&uq->uq_key);
594 umtxq_unbusy(&uq->uq_key);
595 umtxq_unlock(&uq->uq_key);
598 * Set the contested bit so that a release in user space
599 * knows to use the system call for unlock. If this fails
600 * either some one else has acquired the lock or it has been
603 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
605 /* The address was invalid. */
607 umtxq_lock(&uq->uq_key);
609 umtxq_unlock(&uq->uq_key);
610 umtx_key_release(&uq->uq_key);
615 * We set the contested bit, sleep. Otherwise the lock changed
616 * and we need to retry or we lost a race to the thread
617 * unlocking the umtx.
619 umtxq_lock(&uq->uq_key);
621 error = umtxq_sleep(uq, "umtx", timo);
623 umtxq_unlock(&uq->uq_key);
624 umtx_key_release(&uq->uq_key);
631 * Lock a umtx object.
634 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
635 struct timespec *timeout)
637 struct timespec ts, ts2, ts3;
641 if (timeout == NULL) {
642 error = _do_lock_umtx(td, umtx, id, 0);
643 /* Mutex locking is restarted if it is interrupted. */
648 timespecadd(&ts, timeout);
649 TIMESPEC_TO_TIMEVAL(&tv, timeout);
651 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
652 if (error != ETIMEDOUT)
655 if (timespeccmp(&ts2, &ts, >=)) {
660 timespecsub(&ts3, &ts2);
661 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
663 /* Timed-locking is not restarted. */
664 if (error == ERESTART)
671 * Unlock a umtx object.
674 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
683 * Make sure we own this mtx.
685 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
689 if ((owner & ~UMTX_CONTESTED) != id)
692 /* This should be done in userland */
693 if ((owner & UMTX_CONTESTED) == 0) {
694 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
702 /* We should only ever be in here for contested locks */
703 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
709 count = umtxq_count(&key);
713 * When unlocking the umtx, it must be marked as unowned if
714 * there is zero or one thread only waiting for it.
715 * Otherwise, it must be marked as contested.
717 old = casuword(&umtx->u_owner, owner,
718 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
720 umtxq_signal(&key,1);
723 umtx_key_release(&key);
734 * Lock a umtx object.
737 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
747 * Care must be exercised when dealing with umtx structure. It
748 * can fault on any access.
752 * Try the uncontested case. This should be done in userland.
754 owner = casuword32(m, UMUTEX_UNOWNED, id);
756 /* The acquire succeeded. */
757 if (owner == UMUTEX_UNOWNED)
760 /* The address was invalid. */
764 /* If no one owns it but it is contested try to acquire it. */
765 if (owner == UMUTEX_CONTESTED) {
766 owner = casuword32(m,
767 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
768 if (owner == UMUTEX_CONTESTED)
771 /* The address was invalid. */
775 /* If this failed the lock has changed, restart. */
780 * If we caught a signal, we have retried and now
786 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
787 AUTO_SHARE, &uq->uq_key)) != 0)
790 umtxq_lock(&uq->uq_key);
791 umtxq_busy(&uq->uq_key);
793 umtxq_unbusy(&uq->uq_key);
794 umtxq_unlock(&uq->uq_key);
797 * Set the contested bit so that a release in user space
798 * knows to use the system call for unlock. If this fails
799 * either some one else has acquired the lock or it has been
802 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
804 /* The address was invalid. */
806 umtxq_lock(&uq->uq_key);
808 umtxq_unlock(&uq->uq_key);
809 umtx_key_release(&uq->uq_key);
814 * We set the contested bit, sleep. Otherwise the lock changed
815 * and we need to retry or we lost a race to the thread
816 * unlocking the umtx.
818 umtxq_lock(&uq->uq_key);
820 error = umtxq_sleep(uq, "umtx", timo);
822 umtxq_unlock(&uq->uq_key);
823 umtx_key_release(&uq->uq_key);
830 * Lock a umtx object.
833 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
834 struct timespec *timeout)
836 struct timespec ts, ts2, ts3;
840 if (timeout == NULL) {
841 error = _do_lock_umtx32(td, m, id, 0);
842 /* Mutex locking is restarted if it is interrupted. */
847 timespecadd(&ts, timeout);
848 TIMESPEC_TO_TIMEVAL(&tv, timeout);
850 error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
851 if (error != ETIMEDOUT)
854 if (timespeccmp(&ts2, &ts, >=)) {
859 timespecsub(&ts3, &ts2);
860 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
862 /* Timed-locking is not restarted. */
863 if (error == ERESTART)
870 * Unlock a umtx object.
873 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
882 * Make sure we own this mtx.
888 if ((owner & ~UMUTEX_CONTESTED) != id)
891 /* This should be done in userland */
892 if ((owner & UMUTEX_CONTESTED) == 0) {
893 old = casuword32(m, owner, UMUTEX_UNOWNED);
901 /* We should only ever be in here for contested locks */
902 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
908 count = umtxq_count(&key);
912 * When unlocking the umtx, it must be marked as unowned if
913 * there is zero or one thread only waiting for it.
914 * Otherwise, it must be marked as contested.
916 old = casuword32(m, owner,
917 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
919 umtxq_signal(&key,1);
922 umtx_key_release(&key);
932 * Fetch and compare value, sleep on the address if value is not changed.
935 do_wait(struct thread *td, void *addr, u_long id,
936 struct timespec *timeout, int compat32)
939 struct timespec ts, ts2, ts3;
945 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
949 umtxq_lock(&uq->uq_key);
951 umtxq_unlock(&uq->uq_key);
955 tmp = fuword32(addr);
957 umtxq_lock(&uq->uq_key);
959 umtxq_unlock(&uq->uq_key);
960 } else if (timeout == NULL) {
961 umtxq_lock(&uq->uq_key);
962 error = umtxq_sleep(uq, "uwait", 0);
964 umtxq_unlock(&uq->uq_key);
967 timespecadd(&ts, timeout);
968 TIMESPEC_TO_TIMEVAL(&tv, timeout);
969 umtxq_lock(&uq->uq_key);
971 error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
972 if (!(uq->uq_flags & UQF_UMTXQ))
974 if (error != ETIMEDOUT)
976 umtxq_unlock(&uq->uq_key);
978 if (timespeccmp(&ts2, &ts, >=)) {
980 umtxq_lock(&uq->uq_key);
984 timespecsub(&ts3, &ts2);
985 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
986 umtxq_lock(&uq->uq_key);
989 umtxq_unlock(&uq->uq_key);
991 umtx_key_release(&uq->uq_key);
992 if (error == ERESTART)
998 * Wake up threads sleeping on the specified address.
1001 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
1003 struct umtx_key key;
1006 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
1010 ret = umtxq_signal(&key, n_wake);
1012 umtx_key_release(&key);
1017 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1020 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1024 uint32_t owner, old, id;
1035 spincount = fuword32(&m->m_spincount);
1037 spincount = umtx_dflt_spins;
1038 if (spincount > umtx_max_spins)
1039 spincount = umtx_max_spins;
1045 * Care must be exercised when dealing with umtx structure. It
1046 * can fault on any access.
1053 * Try the uncontested case. This should be done in userland.
1055 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1057 /* The acquire succeeded. */
1058 if (owner == UMUTEX_UNOWNED)
1061 /* The address was invalid. */
1065 /* If no one owns it but it is contested try to acquire it. */
1066 if (owner == UMUTEX_CONTESTED) {
1070 owner = casuword32(&m->m_owner,
1071 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1073 if (owner == UMUTEX_CONTESTED)
1076 /* The address was invalid. */
1080 /* If this failed the lock has changed, restart. */
1084 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1085 (owner & ~UMUTEX_CONTESTED) == id)
1092 if (spincount > 0 && (owner & ~UMUTEX_CONTESTED) != id) {
1094 struct pcpu *pcpu = NULL;
1096 /* Look for a cpu the owner is running on */
1097 for (i = 0; i < MAXCPU; i++) {
1100 pcpu = pcpu_find(i);
1101 if ((owner & ~UMUTEX_CONTESTED) == pcpu->pc_curtid) {
1107 if (__predict_false(!found))
1110 while ((owner & ~UMUTEX_CONTESTED) == pcpu->pc_curtid &&
1111 (owner & ~UMUTEX_CONTESTED) != id) {
1112 if (--spincount <= 0)
1115 (TDF_NEEDRESCHED|TDF_ASTPENDING|TDF_NEEDSIGCHK)) ||
1116 P_SHOULDSTOP(td->td_proc))
1118 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1119 if (owner == UMUTEX_UNOWNED)
1121 if (owner == UMUTEX_CONTESTED)
1132 * If we caught a signal, we have retried and now
1138 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1139 GET_SHARE(flags), &uq->uq_key)) != 0)
1142 umtxq_lock(&uq->uq_key);
1143 umtxq_busy(&uq->uq_key);
1145 umtxq_unbusy(&uq->uq_key);
1146 umtxq_unlock(&uq->uq_key);
1149 * Set the contested bit so that a release in user space
1150 * knows to use the system call for unlock. If this fails
1151 * either some one else has acquired the lock or it has been
1154 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1156 /* The address was invalid. */
1158 umtxq_lock(&uq->uq_key);
1160 umtxq_unlock(&uq->uq_key);
1161 umtx_key_release(&uq->uq_key);
1166 * We set the contested bit, sleep. Otherwise the lock changed
1167 * and we need to retry or we lost a race to the thread
1168 * unlocking the umtx.
1170 umtxq_lock(&uq->uq_key);
1172 error = umtxq_sleep(uq, "umtxn", timo);
1174 umtxq_unlock(&uq->uq_key);
1175 umtx_key_release(&uq->uq_key);
1182 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1185 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1188 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1190 struct umtx_key key;
1191 uint32_t owner, old, id;
1197 * Make sure we own this mtx.
1199 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1203 if ((owner & ~UMUTEX_CONTESTED) != id)
1206 /* This should be done in userland */
1207 if ((owner & UMUTEX_CONTESTED) == 0) {
1208 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1216 /* We should only ever be in here for contested locks */
1217 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1223 count = umtxq_count(&key);
1227 * When unlocking the umtx, it must be marked as unowned if
1228 * there is zero or one thread only waiting for it.
1229 * Otherwise, it must be marked as contested.
1231 old = casuword32(&m->m_owner, owner,
1232 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1234 umtxq_signal(&key,1);
1237 umtx_key_release(&key);
1245 static inline struct umtx_pi *
1246 umtx_pi_alloc(int flags)
1250 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1251 TAILQ_INIT(&pi->pi_blocked);
1252 atomic_add_int(&umtx_pi_allocated, 1);
1257 umtx_pi_free(struct umtx_pi *pi)
1259 uma_zfree(umtx_pi_zone, pi);
1260 atomic_add_int(&umtx_pi_allocated, -1);
1264 * Adjust the thread's position on a pi_state after its priority has been
1268 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1270 struct umtx_q *uq, *uq1, *uq2;
1273 mtx_assert(&sched_lock, MA_OWNED);
1280 * Check if the thread needs to be moved on the blocked chain.
1281 * It needs to be moved if either its priority is lower than
1282 * the previous thread or higher than the next thread.
1284 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1285 uq2 = TAILQ_NEXT(uq, uq_lockq);
1286 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1287 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1289 * Remove thread from blocked chain and determine where
1290 * it should be moved to.
1292 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1293 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1294 td1 = uq1->uq_thread;
1295 MPASS(td1->td_proc->p_magic == P_MAGIC);
1296 if (UPRI(td1) > UPRI(td))
1301 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1303 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1309 * Propagate priority when a thread is blocked on POSIX
1313 umtx_propagate_priority(struct thread *td)
1319 mtx_assert(&sched_lock, MA_OWNED);
1322 pi = uq->uq_pi_blocked;
1331 MPASS(td->td_proc != NULL);
1332 MPASS(td->td_proc->p_magic == P_MAGIC);
1334 if (UPRI(td) <= pri)
1337 sched_lend_user_prio(td, pri);
1340 * Pick up the lock that td is blocked on.
1343 pi = uq->uq_pi_blocked;
1344 /* Resort td on the list if needed. */
1345 if (!umtx_pi_adjust_thread(pi, td))
1351 * Unpropagate priority for a PI mutex when a thread blocked on
1352 * it is interrupted by signal or resumed by others.
1355 umtx_unpropagate_priority(struct umtx_pi *pi)
1357 struct umtx_q *uq, *uq_owner;
1358 struct umtx_pi *pi2;
1361 mtx_assert(&sched_lock, MA_OWNED);
1363 while (pi != NULL && pi->pi_owner != NULL) {
1365 uq_owner = pi->pi_owner->td_umtxq;
1367 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1368 uq = TAILQ_FIRST(&pi2->pi_blocked);
1370 if (pri > UPRI(uq->uq_thread))
1371 pri = UPRI(uq->uq_thread);
1375 if (pri > uq_owner->uq_inherited_pri)
1376 pri = uq_owner->uq_inherited_pri;
1377 sched_unlend_user_prio(pi->pi_owner, pri);
1378 pi = uq_owner->uq_pi_blocked;
1383 * Insert a PI mutex into owned list.
1386 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1388 struct umtx_q *uq_owner;
1390 uq_owner = owner->td_umtxq;
1391 mtx_assert(&sched_lock, MA_OWNED);
1392 if (pi->pi_owner != NULL)
1393 panic("pi_ower != NULL");
1394 pi->pi_owner = owner;
1395 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1399 * Claim ownership of a PI mutex.
1402 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1404 struct umtx_q *uq, *uq_owner;
1406 uq_owner = owner->td_umtxq;
1407 mtx_lock_spin(&sched_lock);
1408 if (pi->pi_owner == owner) {
1409 mtx_unlock_spin(&sched_lock);
1413 if (pi->pi_owner != NULL) {
1415 * userland may have already messed the mutex, sigh.
1417 mtx_unlock_spin(&sched_lock);
1420 umtx_pi_setowner(pi, owner);
1421 uq = TAILQ_FIRST(&pi->pi_blocked);
1425 pri = UPRI(uq->uq_thread);
1426 if (pri < UPRI(owner))
1427 sched_lend_user_prio(owner, pri);
1429 mtx_unlock_spin(&sched_lock);
1434 * Adjust a thread's order position in its blocked PI mutex,
1435 * this may result new priority propagating process.
1438 umtx_pi_adjust(struct thread *td, u_char oldpri)
1445 mtx_assert(&sched_lock, MA_OWNED);
1446 MPASS(TD_ON_UPILOCK(td));
1449 * Pick up the lock that td is blocked on.
1451 pi = uq->uq_pi_blocked;
1454 /* Resort the turnstile on the list. */
1455 if (!umtx_pi_adjust_thread(pi, td))
1459 * If our priority was lowered and we are at the head of the
1460 * turnstile, then propagate our new priority up the chain.
1462 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1463 umtx_propagate_priority(td);
1467 * Sleep on a PI mutex.
1470 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1471 uint32_t owner, const char *wmesg, int timo)
1473 struct umtxq_chain *uc;
1474 struct thread *td, *td1;
1480 KASSERT(td == curthread, ("inconsistent uq_thread"));
1481 uc = umtxq_getchain(&uq->uq_key);
1482 UMTXQ_LOCKED_ASSERT(uc);
1484 if (pi->pi_owner == NULL) {
1486 * Current, We only support process private PI-mutex,
1487 * non-contended PI-mutexes are locked in userland.
1488 * Process shared PI-mutex should always be initialized
1489 * by kernel and be registered in kernel, locking should
1490 * always be done by kernel to avoid security problems.
1491 * For process private PI-mutex, we can find owner
1492 * thread and boost its priority safely.
1495 td1 = thread_find(curproc, owner);
1496 mtx_lock_spin(&sched_lock);
1497 if (td1 != NULL && pi->pi_owner == NULL) {
1498 uq1 = td1->td_umtxq;
1499 umtx_pi_setowner(pi, td1);
1501 PROC_UNLOCK(curproc);
1503 mtx_lock_spin(&sched_lock);
1506 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1507 pri = UPRI(uq1->uq_thread);
1513 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1515 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1517 uq->uq_pi_blocked = pi;
1518 td->td_flags |= TDF_UPIBLOCKED;
1519 mtx_unlock_spin(&sched_lock);
1520 umtxq_unlock(&uq->uq_key);
1522 mtx_lock_spin(&sched_lock);
1523 umtx_propagate_priority(td);
1524 mtx_unlock_spin(&sched_lock);
1526 umtxq_lock(&uq->uq_key);
1527 if (uq->uq_flags & UQF_UMTXQ) {
1528 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1529 if (error == EWOULDBLOCK)
1531 if (uq->uq_flags & UQF_UMTXQ) {
1532 umtxq_busy(&uq->uq_key);
1534 umtxq_unbusy(&uq->uq_key);
1537 umtxq_unlock(&uq->uq_key);
1539 mtx_lock_spin(&sched_lock);
1540 uq->uq_pi_blocked = NULL;
1541 td->td_flags &= ~TDF_UPIBLOCKED;
1542 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1543 umtx_unpropagate_priority(pi);
1544 mtx_unlock_spin(&sched_lock);
1546 umtxq_lock(&uq->uq_key);
1552 * Add reference count for a PI mutex.
1555 umtx_pi_ref(struct umtx_pi *pi)
1557 struct umtxq_chain *uc;
1559 uc = umtxq_getchain(&pi->pi_key);
1560 UMTXQ_LOCKED_ASSERT(uc);
1565 * Decrease reference count for a PI mutex, if the counter
1566 * is decreased to zero, its memory space is freed.
1569 umtx_pi_unref(struct umtx_pi *pi)
1571 struct umtxq_chain *uc;
1574 uc = umtxq_getchain(&pi->pi_key);
1575 UMTXQ_LOCKED_ASSERT(uc);
1576 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1577 if (--pi->pi_refcount == 0) {
1578 mtx_lock_spin(&sched_lock);
1579 if (pi->pi_owner != NULL) {
1580 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1582 pi->pi_owner = NULL;
1584 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1585 ("blocked queue not empty"));
1586 mtx_unlock_spin(&sched_lock);
1587 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1595 * Find a PI mutex in hash table.
1597 static struct umtx_pi *
1598 umtx_pi_lookup(struct umtx_key *key)
1600 struct umtxq_chain *uc;
1603 uc = umtxq_getchain(key);
1604 UMTXQ_LOCKED_ASSERT(uc);
1606 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1607 if (umtx_key_match(&pi->pi_key, key)) {
1615 * Insert a PI mutex into hash table.
1618 umtx_pi_insert(struct umtx_pi *pi)
1620 struct umtxq_chain *uc;
1622 uc = umtxq_getchain(&pi->pi_key);
1623 UMTXQ_LOCKED_ASSERT(uc);
1624 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1631 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1635 struct umtx_pi *pi, *new_pi;
1636 uint32_t id, owner, old;
1642 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1645 umtxq_lock(&uq->uq_key);
1646 pi = umtx_pi_lookup(&uq->uq_key);
1648 new_pi = umtx_pi_alloc(M_NOWAIT);
1649 if (new_pi == NULL) {
1650 umtxq_unlock(&uq->uq_key);
1651 new_pi = umtx_pi_alloc(M_WAITOK);
1652 new_pi->pi_key = uq->uq_key;
1653 umtxq_lock(&uq->uq_key);
1654 pi = umtx_pi_lookup(&uq->uq_key);
1656 umtx_pi_free(new_pi);
1660 if (new_pi != NULL) {
1661 new_pi->pi_key = uq->uq_key;
1662 umtx_pi_insert(new_pi);
1667 umtxq_unlock(&uq->uq_key);
1670 * Care must be exercised when dealing with umtx structure. It
1671 * can fault on any access.
1675 * Try the uncontested case. This should be done in userland.
1677 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1679 /* The acquire succeeded. */
1680 if (owner == UMUTEX_UNOWNED) {
1685 /* The address was invalid. */
1691 /* If no one owns it but it is contested try to acquire it. */
1692 if (owner == UMUTEX_CONTESTED) {
1693 owner = casuword32(&m->m_owner,
1694 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1696 if (owner == UMUTEX_CONTESTED) {
1697 umtxq_lock(&uq->uq_key);
1698 error = umtx_pi_claim(pi, td);
1699 umtxq_unlock(&uq->uq_key);
1703 /* The address was invalid. */
1709 /* If this failed the lock has changed, restart. */
1713 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1714 (owner & ~UMUTEX_CONTESTED) == id) {
1725 * If we caught a signal, we have retried and now
1731 umtxq_lock(&uq->uq_key);
1732 umtxq_busy(&uq->uq_key);
1733 umtxq_unlock(&uq->uq_key);
1736 * Set the contested bit so that a release in user space
1737 * knows to use the system call for unlock. If this fails
1738 * either some one else has acquired the lock or it has been
1741 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1743 /* The address was invalid. */
1745 umtxq_lock(&uq->uq_key);
1746 umtxq_unbusy(&uq->uq_key);
1747 umtxq_unlock(&uq->uq_key);
1752 umtxq_lock(&uq->uq_key);
1753 umtxq_unbusy(&uq->uq_key);
1755 * We set the contested bit, sleep. Otherwise the lock changed
1756 * and we need to retry or we lost a race to the thread
1757 * unlocking the umtx.
1760 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1762 umtxq_unlock(&uq->uq_key);
1765 umtxq_lock(&uq->uq_key);
1767 umtxq_unlock(&uq->uq_key);
1769 umtx_key_release(&uq->uq_key);
1774 * Unlock a PI mutex.
1777 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1779 struct umtx_key key;
1780 struct umtx_q *uq_first, *uq_first2, *uq_me;
1781 struct umtx_pi *pi, *pi2;
1782 uint32_t owner, old, id;
1789 * Make sure we own this mtx.
1791 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1795 if ((owner & ~UMUTEX_CONTESTED) != id)
1798 /* This should be done in userland */
1799 if ((owner & UMUTEX_CONTESTED) == 0) {
1800 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1808 /* We should only ever be in here for contested locks */
1809 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1815 count = umtxq_count_pi(&key, &uq_first);
1816 if (uq_first != NULL) {
1817 pi = uq_first->uq_pi_blocked;
1818 if (pi->pi_owner != curthread) {
1821 /* userland messed the mutex */
1824 uq_me = curthread->td_umtxq;
1825 mtx_lock_spin(&sched_lock);
1826 pi->pi_owner = NULL;
1827 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1828 uq_first = TAILQ_FIRST(&pi->pi_blocked);
1830 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1831 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1832 if (uq_first2 != NULL) {
1833 if (pri > UPRI(uq_first2->uq_thread))
1834 pri = UPRI(uq_first2->uq_thread);
1837 sched_unlend_user_prio(curthread, pri);
1838 mtx_unlock_spin(&sched_lock);
1843 * When unlocking the umtx, it must be marked as unowned if
1844 * there is zero or one thread only waiting for it.
1845 * Otherwise, it must be marked as contested.
1847 old = casuword32(&m->m_owner, owner,
1848 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1851 if (uq_first != NULL)
1852 umtxq_signal_thread(uq_first);
1855 umtx_key_release(&key);
1867 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1870 struct umtx_q *uq, *uq2;
1874 int error, pri, old_inherited_pri, su;
1878 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1881 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1883 old_inherited_pri = uq->uq_inherited_pri;
1884 umtxq_lock(&uq->uq_key);
1885 umtxq_busy(&uq->uq_key);
1886 umtxq_unlock(&uq->uq_key);
1888 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1889 if (ceiling > RTP_PRIO_MAX) {
1894 mtx_lock_spin(&sched_lock);
1895 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1896 mtx_unlock_spin(&sched_lock);
1900 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1901 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1902 if (uq->uq_inherited_pri < UPRI(td))
1903 sched_lend_user_prio(td, uq->uq_inherited_pri);
1905 mtx_unlock_spin(&sched_lock);
1907 owner = casuword32(&m->m_owner,
1908 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1910 if (owner == UMUTEX_CONTESTED) {
1915 /* The address was invalid. */
1921 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1922 (owner & ~UMUTEX_CONTESTED) == id) {
1933 * If we caught a signal, we have retried and now
1939 umtxq_lock(&uq->uq_key);
1941 umtxq_unbusy(&uq->uq_key);
1942 error = umtxq_sleep(uq, "umtxpp", timo);
1944 umtxq_unlock(&uq->uq_key);
1946 mtx_lock_spin(&sched_lock);
1947 uq->uq_inherited_pri = old_inherited_pri;
1949 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1950 uq2 = TAILQ_FIRST(&pi->pi_blocked);
1952 if (pri > UPRI(uq2->uq_thread))
1953 pri = UPRI(uq2->uq_thread);
1956 if (pri > uq->uq_inherited_pri)
1957 pri = uq->uq_inherited_pri;
1958 sched_unlend_user_prio(td, pri);
1959 mtx_unlock_spin(&sched_lock);
1963 mtx_lock_spin(&sched_lock);
1964 uq->uq_inherited_pri = old_inherited_pri;
1966 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1967 uq2 = TAILQ_FIRST(&pi->pi_blocked);
1969 if (pri > UPRI(uq2->uq_thread))
1970 pri = UPRI(uq2->uq_thread);
1973 if (pri > uq->uq_inherited_pri)
1974 pri = uq->uq_inherited_pri;
1975 sched_unlend_user_prio(td, pri);
1976 mtx_unlock_spin(&sched_lock);
1980 umtxq_lock(&uq->uq_key);
1981 umtxq_unbusy(&uq->uq_key);
1982 umtxq_unlock(&uq->uq_key);
1983 umtx_key_release(&uq->uq_key);
1988 * Unlock a PP mutex.
1991 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1993 struct umtx_key key;
1994 struct umtx_q *uq, *uq2;
1998 int error, pri, new_inherited_pri, su;
2002 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2005 * Make sure we own this mtx.
2007 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2011 if ((owner & ~UMUTEX_CONTESTED) != id)
2014 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2019 new_inherited_pri = PRI_MAX;
2021 rceiling = RTP_PRIO_MAX - rceiling;
2022 if (rceiling > RTP_PRIO_MAX)
2024 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2027 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2034 * For priority protected mutex, always set unlocked state
2035 * to UMUTEX_CONTESTED, so that userland always enters kernel
2036 * to lock the mutex, it is necessary because thread priority
2037 * has to be adjusted for such mutex.
2039 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2044 umtxq_signal(&key, 1);
2051 mtx_lock_spin(&sched_lock);
2053 uq->uq_inherited_pri = new_inherited_pri;
2055 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2056 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2058 if (pri > UPRI(uq2->uq_thread))
2059 pri = UPRI(uq2->uq_thread);
2062 if (pri > uq->uq_inherited_pri)
2063 pri = uq->uq_inherited_pri;
2064 sched_unlend_user_prio(td, pri);
2065 mtx_unlock_spin(&sched_lock);
2067 umtx_key_release(&key);
2072 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2073 uint32_t *old_ceiling)
2076 uint32_t save_ceiling;
2081 flags = fuword32(&m->m_flags);
2082 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2084 if (ceiling > RTP_PRIO_MAX)
2088 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2092 umtxq_lock(&uq->uq_key);
2093 umtxq_busy(&uq->uq_key);
2094 umtxq_unlock(&uq->uq_key);
2096 save_ceiling = fuword32(&m->m_ceilings[0]);
2098 owner = casuword32(&m->m_owner,
2099 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2101 if (owner == UMUTEX_CONTESTED) {
2102 suword32(&m->m_ceilings[0], ceiling);
2103 suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2109 /* The address was invalid. */
2115 if ((owner & ~UMUTEX_CONTESTED) == id) {
2116 suword32(&m->m_ceilings[0], ceiling);
2122 * If we caught a signal, we have retried and now
2129 * We set the contested bit, sleep. Otherwise the lock changed
2130 * and we need to retry or we lost a race to the thread
2131 * unlocking the umtx.
2133 umtxq_lock(&uq->uq_key);
2135 umtxq_unbusy(&uq->uq_key);
2136 error = umtxq_sleep(uq, "umtxpp", 0);
2138 umtxq_unlock(&uq->uq_key);
2140 umtxq_lock(&uq->uq_key);
2142 umtxq_signal(&uq->uq_key, INT_MAX);
2143 umtxq_unbusy(&uq->uq_key);
2144 umtxq_unlock(&uq->uq_key);
2145 umtx_key_release(&uq->uq_key);
2146 if (error == 0 && old_ceiling != NULL)
2147 suword32(old_ceiling, save_ceiling);
2152 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2155 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2157 return (_do_lock_normal(td, m, flags, timo, try));
2158 case UMUTEX_PRIO_INHERIT:
2159 return (_do_lock_pi(td, m, flags, timo, try));
2160 case UMUTEX_PRIO_PROTECT:
2161 return (_do_lock_pp(td, m, flags, timo, try));
2167 * Lock a userland POSIX mutex.
2170 do_lock_umutex(struct thread *td, struct umutex *m,
2171 struct timespec *timeout, int try)
2173 struct timespec ts, ts2, ts3;
2178 flags = fuword32(&m->m_flags);
2182 if (timeout == NULL) {
2183 error = _do_lock_umutex(td, m, flags, 0, try);
2184 /* Mutex locking is restarted if it is interrupted. */
2189 timespecadd(&ts, timeout);
2190 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2192 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try);
2193 if (error != ETIMEDOUT)
2195 getnanouptime(&ts2);
2196 if (timespeccmp(&ts2, &ts, >=)) {
2201 timespecsub(&ts3, &ts2);
2202 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2204 /* Timed-locking is not restarted. */
2205 if (error == ERESTART)
2212 * Unlock a userland POSIX mutex.
2215 do_unlock_umutex(struct thread *td, struct umutex *m)
2219 flags = fuword32(&m->m_flags);
2223 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2225 return (do_unlock_normal(td, m, flags));
2226 case UMUTEX_PRIO_INHERIT:
2227 return (do_unlock_pi(td, m, flags));
2228 case UMUTEX_PRIO_PROTECT:
2229 return (do_unlock_pp(td, m, flags));
2236 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2237 struct timespec *timeout, u_long wflags)
2241 struct timespec cts, ets, tts;
2246 flags = fuword32(&cv->c_flags);
2247 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2250 umtxq_lock(&uq->uq_key);
2251 umtxq_busy(&uq->uq_key);
2253 umtxq_unlock(&uq->uq_key);
2256 * The magic thing is we should set c_has_waiters to 1 before
2257 * releasing user mutex.
2259 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2261 umtxq_lock(&uq->uq_key);
2262 umtxq_unbusy(&uq->uq_key);
2263 umtxq_unlock(&uq->uq_key);
2265 error = do_unlock_umutex(td, m);
2267 umtxq_lock(&uq->uq_key);
2269 if ((wflags & UMTX_CHECK_UNPARKING) &&
2270 (td->td_pflags & TDP_WAKEUP)) {
2271 td->td_pflags &= ~TDP_WAKEUP;
2273 } else if (timeout == NULL) {
2274 error = umtxq_sleep(uq, "ucond", 0);
2276 getnanouptime(&ets);
2277 timespecadd(&ets, timeout);
2278 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2280 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2281 if (error != ETIMEDOUT)
2283 getnanouptime(&cts);
2284 if (timespeccmp(&cts, &ets, >=)) {
2289 timespecsub(&tts, &cts);
2290 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2296 if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2298 * If we concurrently got do_cv_signal()d
2299 * and we got an error or UNIX signals or a timeout,
2300 * then, perform another umtxq_signal to avoid
2301 * consuming the wakeup. This may cause supurious
2302 * wakeup for another thread which was just queued,
2303 * but SUSV3 explicitly allows supurious wakeup to
2304 * occur, and indeed a kernel based implementation
2307 if (!umtxq_signal(&uq->uq_key, 1))
2310 if (error == ERESTART)
2314 umtxq_unlock(&uq->uq_key);
2315 umtx_key_release(&uq->uq_key);
2320 * Signal a userland condition variable.
2323 do_cv_signal(struct thread *td, struct ucond *cv)
2325 struct umtx_key key;
2326 int error, cnt, nwake;
2329 flags = fuword32(&cv->c_flags);
2330 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2334 cnt = umtxq_count(&key);
2335 nwake = umtxq_signal(&key, 1);
2339 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2344 umtx_key_release(&key);
2349 do_cv_broadcast(struct thread *td, struct ucond *cv)
2351 struct umtx_key key;
2355 flags = fuword32(&cv->c_flags);
2356 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2361 umtxq_signal(&key, INT_MAX);
2364 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2370 umtx_key_release(&key);
2375 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2376 /* struct umtx *umtx */
2378 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2382 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2383 /* struct umtx *umtx */
2385 return do_unlock_umtx(td, uap->umtx, td->td_tid);
2389 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2391 struct timespec *ts, timeout;
2394 /* Allow a null timespec (wait forever). */
2395 if (uap->uaddr2 == NULL)
2398 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2401 if (timeout.tv_nsec >= 1000000000 ||
2402 timeout.tv_nsec < 0) {
2407 return (do_lock_umtx(td, uap->obj, uap->val, ts));
2411 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2413 return (do_unlock_umtx(td, uap->obj, uap->val));
2417 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2419 struct timespec *ts, timeout;
2422 if (uap->uaddr2 == NULL)
2425 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2428 if (timeout.tv_nsec >= 1000000000 ||
2429 timeout.tv_nsec < 0)
2433 return do_wait(td, uap->obj, uap->val, ts, 0);
2437 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2439 return (kern_umtx_wake(td, uap->obj, uap->val));
2443 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2445 struct timespec *ts, timeout;
2448 /* Allow a null timespec (wait forever). */
2449 if (uap->uaddr2 == NULL)
2452 error = copyin(uap->uaddr2, &timeout,
2456 if (timeout.tv_nsec >= 1000000000 ||
2457 timeout.tv_nsec < 0) {
2462 return do_lock_umutex(td, uap->obj, ts, 0);
2466 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2468 return do_lock_umutex(td, uap->obj, NULL, 1);
2472 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2474 return do_unlock_umutex(td, uap->obj);
2478 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2480 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2484 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2486 struct timespec *ts, timeout;
2489 /* Allow a null timespec (wait forever). */
2490 if (uap->uaddr2 == NULL)
2493 error = copyin(uap->uaddr2, &timeout,
2497 if (timeout.tv_nsec >= 1000000000 ||
2498 timeout.tv_nsec < 0) {
2503 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2507 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2509 return do_cv_signal(td, uap->obj);
2513 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2515 return do_cv_broadcast(td, uap->obj);
2518 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
2520 static _umtx_op_func op_table[] = {
2521 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */
2522 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */
2523 __umtx_op_wait, /* UMTX_OP_WAIT */
2524 __umtx_op_wake, /* UMTX_OP_WAKE */
2525 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */
2526 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */
2527 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
2528 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
2529 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/
2530 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
2531 __umtx_op_cv_broadcast /* UMTX_OP_CV_BROADCAST */
2535 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
2537 if ((unsigned)uap->op < UMTX_OP_MAX)
2538 return (*op_table[uap->op])(td, uap);
2544 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
2545 /* struct umtx *umtx */
2547 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
2551 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
2552 /* struct umtx *umtx */
2554 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
2563 copyin_timeout32(void *addr, struct timespec *tsp)
2565 struct timespec32 ts32;
2568 error = copyin(addr, &ts32, sizeof(struct timespec32));
2570 tsp->tv_sec = ts32.tv_sec;
2571 tsp->tv_nsec = ts32.tv_nsec;
2577 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2579 struct timespec *ts, timeout;
2582 /* Allow a null timespec (wait forever). */
2583 if (uap->uaddr2 == NULL)
2586 error = copyin_timeout32(uap->uaddr2, &timeout);
2589 if (timeout.tv_nsec >= 1000000000 ||
2590 timeout.tv_nsec < 0) {
2595 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
2599 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2601 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
2605 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2607 struct timespec *ts, timeout;
2610 if (uap->uaddr2 == NULL)
2613 error = copyin_timeout32(uap->uaddr2, &timeout);
2616 if (timeout.tv_nsec >= 1000000000 ||
2617 timeout.tv_nsec < 0)
2621 return do_wait(td, uap->obj, uap->val, ts, 1);
2625 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
2627 struct timespec *ts, timeout;
2630 /* Allow a null timespec (wait forever). */
2631 if (uap->uaddr2 == NULL)
2634 error = copyin_timeout32(uap->uaddr2, &timeout);
2637 if (timeout.tv_nsec >= 1000000000 ||
2638 timeout.tv_nsec < 0)
2642 return do_lock_umutex(td, uap->obj, ts, 0);
2646 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2648 struct timespec *ts, timeout;
2651 /* Allow a null timespec (wait forever). */
2652 if (uap->uaddr2 == NULL)
2655 error = copyin_timeout32(uap->uaddr2, &timeout);
2658 if (timeout.tv_nsec >= 1000000000 ||
2659 timeout.tv_nsec < 0)
2663 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2666 static _umtx_op_func op_table_compat32[] = {
2667 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */
2668 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
2669 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */
2670 __umtx_op_wake, /* UMTX_OP_WAKE */
2671 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */
2672 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
2673 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
2674 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
2675 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/
2676 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
2677 __umtx_op_cv_broadcast /* UMTX_OP_CV_BROADCAST */
2681 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
2683 if ((unsigned)uap->op < UMTX_OP_MAX)
2684 return (*op_table_compat32[uap->op])(td,
2685 (struct _umtx_op_args *)uap);
2691 umtx_thread_init(struct thread *td)
2693 td->td_umtxq = umtxq_alloc();
2694 td->td_umtxq->uq_thread = td;
2698 umtx_thread_fini(struct thread *td)
2700 umtxq_free(td->td_umtxq);
2704 * It will be called when new thread is created, e.g fork().
2707 umtx_thread_alloc(struct thread *td)
2712 uq->uq_inherited_pri = PRI_MAX;
2714 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
2715 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
2716 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
2717 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
2724 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
2725 struct image_params *imgp __unused)
2727 umtx_thread_cleanup(curthread);
2731 * thread_exit() hook.
2734 umtx_thread_exit(struct thread *td)
2736 umtx_thread_cleanup(td);
2740 * clean up umtx data.
2743 umtx_thread_cleanup(struct thread *td)
2748 if ((uq = td->td_umtxq) == NULL)
2751 mtx_lock_spin(&sched_lock);
2752 uq->uq_inherited_pri = PRI_MAX;
2753 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
2754 pi->pi_owner = NULL;
2755 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
2757 td->td_flags &= ~TDF_UBORROWING;
2758 mtx_unlock_spin(&sched_lock);