2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2009-2021 Dmitry Chagin <dchagin@FreeBSD.org>
5 * Copyright (c) 2008 Roman Divacky
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include "opt_compat.h"
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_elf.h>
39 #include <sys/mutex.h>
42 #include <sys/sched.h>
43 #include <sys/umtxvar.h>
46 #include <machine/../linux32/linux.h>
47 #include <machine/../linux32/linux32_proto.h>
49 #include <machine/../linux/linux.h>
50 #include <machine/../linux/linux_proto.h>
52 #include <compat/linux/linux_emul.h>
53 #include <compat/linux/linux_futex.h>
54 #include <compat/linux/linux_misc.h>
55 #include <compat/linux/linux_timer.h>
56 #include <compat/linux/linux_util.h>
58 #define FUTEX_SHARED 0x8 /* shared futex */
60 #define GET_SHARED(a) (a->flags & FUTEX_SHARED) ? AUTO_SHARE : THREAD_SHARE
62 static int futex_atomic_op(struct thread *, int, uint32_t *);
63 static int handle_futex_death(struct thread *td, struct linux_emuldata *,
64 uint32_t *, unsigned int, bool);
65 static int fetch_robust_entry(struct linux_robust_list **,
66 struct linux_robust_list **, unsigned int *);
68 struct linux_futex_args {
81 static inline int futex_key_get(const void *, int, int, struct umtx_key *);
82 static void linux_umtx_abs_timeout_init(struct umtx_abs_timeout *,
83 struct linux_futex_args *);
84 static int linux_futex(struct thread *, struct linux_futex_args *);
85 static int linux_futex_wait(struct thread *, struct linux_futex_args *);
86 static int linux_futex_wake(struct thread *, struct linux_futex_args *);
87 static int linux_futex_requeue(struct thread *, struct linux_futex_args *);
88 static int linux_futex_wakeop(struct thread *, struct linux_futex_args *);
89 static int linux_futex_lock_pi(struct thread *, bool, struct linux_futex_args *);
90 static int linux_futex_unlock_pi(struct thread *, bool,
91 struct linux_futex_args *);
92 static int futex_wake_pi(struct thread *, uint32_t *, bool);
95 futex_key_get(const void *uaddr, int type, int share, struct umtx_key *key)
98 /* Check that futex address is a 32bit aligned. */
99 if (!__is_aligned(uaddr, sizeof(uint32_t)))
101 return (umtx_key_get(uaddr, type, share, key));
105 futex_wake(struct thread *td, uint32_t *uaddr, int val, bool shared)
107 struct linux_futex_args args;
109 bzero(&args, sizeof(args));
110 args.op = LINUX_FUTEX_WAKE;
112 args.flags = shared == true ? FUTEX_SHARED : 0;
114 args.val3 = FUTEX_BITSET_MATCH_ANY;
116 return (linux_futex_wake(td, &args));
120 futex_wake_pi(struct thread *td, uint32_t *uaddr, bool shared)
122 struct linux_futex_args args;
124 bzero(&args, sizeof(args));
125 args.op = LINUX_FUTEX_UNLOCK_PI;
127 args.flags = shared == true ? FUTEX_SHARED : 0;
129 return (linux_futex_unlock_pi(td, true, &args));
133 futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr)
135 int op = (encoded_op >> 28) & 7;
136 int cmp = (encoded_op >> 24) & 15;
137 int oparg = (encoded_op << 8) >> 20;
138 int cmparg = (encoded_op << 20) >> 20;
141 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
146 ret = futex_xchgl(oparg, uaddr, &oldval);
149 ret = futex_addl(oparg, uaddr, &oldval);
152 ret = futex_orl(oparg, uaddr, &oldval);
155 ret = futex_andl(~oparg, uaddr, &oldval);
158 ret = futex_xorl(oparg, uaddr, &oldval);
169 case FUTEX_OP_CMP_EQ:
170 ret = (oldval == cmparg);
172 case FUTEX_OP_CMP_NE:
173 ret = (oldval != cmparg);
175 case FUTEX_OP_CMP_LT:
176 ret = (oldval < cmparg);
178 case FUTEX_OP_CMP_GE:
179 ret = (oldval >= cmparg);
181 case FUTEX_OP_CMP_LE:
182 ret = (oldval <= cmparg);
184 case FUTEX_OP_CMP_GT:
185 ret = (oldval > cmparg);
195 linux_futex(struct thread *td, struct linux_futex_args *args)
197 struct linux_pemuldata *pem;
200 if (args->op & LINUX_FUTEX_PRIVATE_FLAG) {
202 args->op &= ~LINUX_FUTEX_PRIVATE_FLAG;
204 args->flags = FUTEX_SHARED;
206 args->clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME;
207 args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME;
210 args->op != LINUX_FUTEX_WAIT_BITSET &&
211 args->op != LINUX_FUTEX_WAIT_REQUEUE_PI &&
212 args->op != LINUX_FUTEX_LOCK_PI2)
216 case LINUX_FUTEX_WAIT:
217 args->val3 = FUTEX_BITSET_MATCH_ANY;
220 case LINUX_FUTEX_WAIT_BITSET:
221 LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x",
222 args->uaddr, args->val, args->val3);
224 return (linux_futex_wait(td, args));
226 case LINUX_FUTEX_WAKE:
227 args->val3 = FUTEX_BITSET_MATCH_ANY;
230 case LINUX_FUTEX_WAKE_BITSET:
231 LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x",
232 args->uaddr, args->val, args->val3);
234 return (linux_futex_wake(td, args));
236 case LINUX_FUTEX_REQUEUE:
238 * Glibc does not use this operation since version 2.3.3,
239 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
240 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
241 * FUTEX_REQUEUE returned EINVAL.
243 pem = pem_find(td->td_proc);
244 if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
245 linux_msg(td, "unsupported FUTEX_REQUEUE");
246 pem->flags |= LINUX_XDEPR_REQUEUEOP;
250 * The above is true, however musl libc does make use of the
251 * futex requeue operation, allow operation for brands which
252 * set LINUX_BI_FUTEX_REQUEUE bit of Brandinfo flags.
255 Elf_Brandinfo *bi = p->p_elf_brandinfo;
256 if (bi == NULL || ((bi->flags & LINUX_BI_FUTEX_REQUEUE)) == 0)
258 args->val3_compare = false;
261 case LINUX_FUTEX_CMP_REQUEUE:
262 LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p "
263 "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x",
264 args->uaddr, args->val, args->val3, args->uaddr2,
267 return (linux_futex_requeue(td, args));
269 case LINUX_FUTEX_WAKE_OP:
270 LINUX_CTR5(sys_futex, "WAKE_OP "
271 "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x",
272 args->uaddr, args->val, args->uaddr2, args->val3,
275 return (linux_futex_wakeop(td, args));
277 case LINUX_FUTEX_LOCK_PI:
278 args->clockrt = true;
281 case LINUX_FUTEX_LOCK_PI2:
282 LINUX_CTR2(sys_futex, "LOCKPI uaddr %p val 0x%x",
283 args->uaddr, args->val);
285 return (linux_futex_lock_pi(td, false, args));
287 case LINUX_FUTEX_UNLOCK_PI:
288 LINUX_CTR1(sys_futex, "UNLOCKPI uaddr %p",
291 return (linux_futex_unlock_pi(td, false, args));
293 case LINUX_FUTEX_TRYLOCK_PI:
294 LINUX_CTR1(sys_futex, "TRYLOCKPI uaddr %p",
297 return (linux_futex_lock_pi(td, true, args));
300 * Current implementation of FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI
301 * can't be used anymore to implement conditional variables.
302 * A detailed explanation can be found here:
304 * https://sourceware.org/bugzilla/show_bug.cgi?id=13165
305 * and here http://austingroupbugs.net/view.php?id=609
308 * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=ed19993b5b0d05d62cc883571519a67dae481a14
309 * glibc does not use them.
311 case LINUX_FUTEX_WAIT_REQUEUE_PI:
312 /* not yet implemented */
313 pem = pem_find(td->td_proc);
314 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
315 linux_msg(td, "unsupported FUTEX_WAIT_REQUEUE_PI");
316 pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
320 case LINUX_FUTEX_CMP_REQUEUE_PI:
321 /* not yet implemented */
322 pem = pem_find(td->td_proc);
323 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
324 linux_msg(td, "unsupported FUTEX_CMP_REQUEUE_PI");
325 pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
330 linux_msg(td, "unsupported futex op %d", args->op);
337 * - 0 futex word value means unlocked.
338 * - TID futex word value means locked.
339 * Userspace uses atomic ops to lock/unlock these futexes without entering the
340 * kernel. If the lock-acquire fastpath fails, (transition from 0 to TID fails),
341 * then FUTEX_LOCK_PI is called.
342 * The kernel atomically set FUTEX_WAITERS bit in the futex word value, if no
343 * other waiters exists looks up the thread that owns the futex (it has put its
344 * own TID into the futex value) and made this thread the owner of the internal
345 * pi-aware lock object (mutex). Then the kernel tries to lock the internal lock
346 * object, on which it blocks. Once it returns, it has the mutex acquired, and it
347 * sets the futex value to its own TID and returns (futex value contains
348 * FUTEX_WAITERS|TID).
349 * The unlock fastpath would fail (because the FUTEX_WAITERS bit is set) and
350 * FUTEX_UNLOCK_PI will be called.
351 * If a futex is found to be held at exit time, the kernel sets the OWNER_DIED
352 * bit of the futex word and wakes up the next futex waiter (if any), WAITERS
353 * bit is preserved (if any).
354 * If OWNER_DIED bit is set the kernel sanity checks the futex word value against
355 * the internal futex state and if correct, acquire futex.
358 linux_futex_lock_pi(struct thread *td, bool try, struct linux_futex_args *args)
360 struct umtx_abs_timeout timo;
361 struct linux_emuldata *em;
362 struct umtx_pi *pi, *new_pi;
366 uint32_t owner, old_owner;
370 error = futex_key_get(args->uaddr, TYPE_PI_FUTEX, GET_SHARED(args),
374 if (args->ts != NULL)
375 linux_umtx_abs_timeout_init(&timo, args);
377 umtxq_lock(&uq->uq_key);
378 pi = umtx_pi_lookup(&uq->uq_key);
380 new_pi = umtx_pi_alloc(M_NOWAIT);
381 if (new_pi == NULL) {
382 umtxq_unlock(&uq->uq_key);
383 new_pi = umtx_pi_alloc(M_WAITOK);
384 umtxq_lock(&uq->uq_key);
385 pi = umtx_pi_lookup(&uq->uq_key);
387 umtx_pi_free(new_pi);
391 if (new_pi != NULL) {
392 new_pi->pi_key = uq->uq_key;
393 umtx_pi_insert(new_pi);
398 umtxq_unlock(&uq->uq_key);
400 /* Try uncontested case first. */
401 rv = casueword32(args->uaddr, 0, &owner, em->em_tid);
402 /* The acquire succeeded. */
413 * Avoid overwriting a possible error from sleep due
414 * to the pending signal with suspension check result.
417 error = thread_check_susp(td, true);
422 /* The futex word at *uaddr is already locked by the caller. */
423 if ((owner & FUTEX_TID_MASK) == em->em_tid) {
429 * Futex owner died, handle_futex_death() set the OWNER_DIED bit
430 * and clear tid. Try to acquire it.
432 if ((owner & FUTEX_TID_MASK) == 0) {
434 owner = owner & (FUTEX_WAITERS | FUTEX_OWNER_DIED);
436 rv = casueword32(args->uaddr, old_owner, &owner, owner);
443 error = thread_check_susp(td, true);
449 * If this failed the lock could
455 umtxq_lock(&uq->uq_key);
456 umtxq_busy(&uq->uq_key);
457 error = umtx_pi_claim(pi, td);
458 umtxq_unbusy(&uq->uq_key);
459 umtxq_unlock(&uq->uq_key);
462 * Since we're going to return an
463 * error, restore the futex to its
464 * previous, unowned state to avoid
465 * compounding the problem.
467 (void)casuword32(args->uaddr, owner, old_owner);
473 * Inconsistent state: OWNER_DIED is set and tid is not 0.
474 * Linux does some checks of futex state, we return EINVAL,
475 * as the user space can take care of this.
477 if ((owner & FUTEX_OWNER_DIED) != 0) {
488 * If we caught a signal, we have retried and now
494 umtxq_lock(&uq->uq_key);
495 umtxq_busy(&uq->uq_key);
496 umtxq_unlock(&uq->uq_key);
499 * Set the contested bit so that a release in user space knows
500 * to use the system call for unlock. If this fails either some
501 * one else has acquired the lock or it has been released.
503 rv = casueword32(args->uaddr, owner, &owner,
504 owner | FUTEX_WAITERS);
506 umtxq_unbusy_unlocked(&uq->uq_key);
511 umtxq_unbusy_unlocked(&uq->uq_key);
512 error = thread_check_susp(td, true);
517 * The lock changed and we need to retry or we
518 * lost a race to the thread unlocking the umtx.
524 * Substitute Linux thread id by native thread id to
525 * avoid refactoring code of umtxq_sleep_pi().
527 td1 = linux_tdfind(td, owner & FUTEX_TID_MASK, -1);
530 PROC_UNLOCK(td1->td_proc);
532 umtxq_unbusy_unlocked(&uq->uq_key);
537 umtxq_lock(&uq->uq_key);
539 /* We set the contested bit, sleep. */
540 error = umtxq_sleep_pi(uq, pi, owner, "futexp",
541 args->ts == NULL ? NULL : &timo,
542 (args->flags & FUTEX_SHARED) != 0);
546 error = thread_check_susp(td, false);
551 umtxq_lock(&uq->uq_key);
553 umtxq_unlock(&uq->uq_key);
554 umtx_key_release(&uq->uq_key);
559 linux_futex_unlock_pi(struct thread *td, bool rb, struct linux_futex_args *args)
561 struct linux_emuldata *em;
563 uint32_t old, owner, new_owner;
569 * Make sure we own this mtx.
571 error = fueword32(args->uaddr, &owner);
574 if (!rb && (owner & FUTEX_TID_MASK) != em->em_tid)
577 error = futex_key_get(args->uaddr, TYPE_PI_FUTEX, GET_SHARED(args), &key);
582 error = umtx_pi_drop(td, &key, rb, &count);
583 if (error != 0 || rb) {
586 umtx_key_release(&key);
592 * When unlocking the futex, it must be marked as unowned if
593 * there is zero or one thread only waiting for it.
594 * Otherwise, it must be marked as contested.
597 new_owner = FUTEX_WAITERS;
602 error = casueword32(args->uaddr, owner, &old, new_owner);
604 error = thread_check_susp(td, false);
608 umtxq_unbusy_unlocked(&key);
609 umtx_key_release(&key);
612 if (error == 0 && old != owner)
618 linux_futex_wakeop(struct thread *td, struct linux_futex_args *args)
620 struct umtx_key key, key2;
621 int nrwake, op_ret, ret;
624 if (args->uaddr == args->uaddr2)
627 error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), &key);
630 error = futex_key_get(args->uaddr2, TYPE_FUTEX, GET_SHARED(args), &key2);
632 umtx_key_release(&key);
638 op_ret = futex_atomic_op(td, args->val3, args->uaddr2);
640 if (op_ret == -ENOSYS)
649 ret = umtxq_signal_mask(&key, args->val, args->val3);
651 nrwake = (int)(unsigned long)args->ts;
653 count = umtxq_count(&key2);
655 ret += umtxq_signal_mask(&key2, nrwake, args->val3);
657 ret += umtxq_signal_mask(&key, nrwake, args->val3);
660 td->td_retval[0] = ret;
663 umtx_key_release(&key2);
664 umtx_key_release(&key);
669 linux_futex_requeue(struct thread *td, struct linux_futex_args *args)
671 int nrwake, nrrequeue;
672 struct umtx_key key, key2;
677 * Linux allows this, we would not, it is an incorrect
678 * usage of declared ABI, so return EINVAL.
680 if (args->uaddr == args->uaddr2)
683 nrrequeue = (int)(unsigned long)args->ts;
686 * Sanity check to prevent signed integer overflow,
687 * see Linux CVE-2018-6927
689 if (nrwake < 0 || nrrequeue < 0)
692 error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), &key);
695 error = futex_key_get(args->uaddr2, TYPE_FUTEX, GET_SHARED(args), &key2);
697 umtx_key_release(&key);
703 error = fueword32(args->uaddr, &uval);
706 else if (args->val3_compare == true && uval != args->val3)
712 td->td_retval[0] = umtxq_requeue(&key, nrwake, &key2, nrrequeue);
716 umtx_key_release(&key2);
717 umtx_key_release(&key);
722 linux_futex_wake(struct thread *td, struct linux_futex_args *args)
730 error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), &key);
734 td->td_retval[0] = umtxq_signal_mask(&key, args->val, args->val3);
736 umtx_key_release(&key);
741 linux_futex_wait(struct thread *td, struct linux_futex_args *args)
743 struct umtx_abs_timeout timo;
752 error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args),
756 if (args->ts != NULL)
757 linux_umtx_abs_timeout_init(&timo, args);
758 umtxq_lock(&uq->uq_key);
759 umtxq_busy(&uq->uq_key);
760 uq->uq_bitset = args->val3;
762 umtxq_unlock(&uq->uq_key);
763 error = fueword32(args->uaddr, &uval);
766 else if (uval != args->val)
768 umtxq_lock(&uq->uq_key);
769 umtxq_unbusy(&uq->uq_key);
771 error = umtxq_sleep(uq, "futex",
772 args->ts == NULL ? NULL : &timo);
773 if ((uq->uq_flags & UQF_UMTXQ) == 0)
777 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
780 umtxq_unlock(&uq->uq_key);
781 umtx_key_release(&uq->uq_key);
782 if (error == ERESTART)
788 linux_umtx_abs_timeout_init(struct umtx_abs_timeout *timo,
789 struct linux_futex_args *args)
791 int clockid, absolute;
794 * The FUTEX_CLOCK_REALTIME option bit can be employed only with the
795 * FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI, FUTEX_LOCK_PI2.
796 * For FUTEX_WAIT, timeout is interpreted as a relative value, for other
797 * futex operations timeout is interpreted as an absolute value.
798 * If FUTEX_CLOCK_REALTIME option bit is set, the Linux kernel measures
799 * the timeout against the CLOCK_REALTIME clock, otherwise the kernel
800 * measures the timeout against the CLOCK_MONOTONIC clock.
802 clockid = args->clockrt ? CLOCK_REALTIME : CLOCK_MONOTONIC;
803 absolute = args->op == LINUX_FUTEX_WAIT ? false : true;
804 umtx_abs_timeout_init(timo, clockid, absolute, args->ts);
808 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
810 struct linux_futex_args fargs = {
811 .uaddr = args->uaddr,
815 .uaddr2 = args->uaddr2,
817 .val3_compare = true,
819 struct l_timespec lts;
822 switch (args->op & LINUX_FUTEX_CMD_MASK) {
823 case LINUX_FUTEX_WAIT:
824 case LINUX_FUTEX_WAIT_BITSET:
825 case LINUX_FUTEX_LOCK_PI:
826 case LINUX_FUTEX_LOCK_PI2:
827 if (args->timeout != NULL) {
828 error = copyin(args->timeout, <s, sizeof(lts));
831 error = linux_to_native_timespec(&fargs.kts, <s);
834 fargs.ts = &fargs.kts;
838 fargs.ts = PTRIN(args->timeout);
840 return (linux_futex(td, &fargs));
843 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
845 linux_sys_futex_time64(struct thread *td,
846 struct linux_sys_futex_time64_args *args)
848 struct linux_futex_args fargs = {
849 .uaddr = args->uaddr,
853 .uaddr2 = args->uaddr2,
855 .val3_compare = true,
857 struct l_timespec64 lts;
860 switch (args->op & LINUX_FUTEX_CMD_MASK) {
861 case LINUX_FUTEX_WAIT:
862 case LINUX_FUTEX_WAIT_BITSET:
863 case LINUX_FUTEX_LOCK_PI:
864 case LINUX_FUTEX_LOCK_PI2:
865 if (args->timeout != NULL) {
866 error = copyin(args->timeout, <s, sizeof(lts));
869 error = linux_to_native_timespec64(&fargs.kts, <s);
872 fargs.ts = &fargs.kts;
876 fargs.ts = PTRIN(args->timeout);
878 return (linux_futex(td, &fargs));
883 linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args)
885 struct linux_emuldata *em;
887 if (args->len != sizeof(struct linux_robust_list_head))
891 em->robust_futexes = args->head;
897 linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args)
899 struct linux_emuldata *em;
900 struct linux_robust_list_head *head;
907 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
908 head = em->robust_futexes;
910 td2 = linux_tdfind(td, args->pid, -1);
913 if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) {
914 PROC_UNLOCK(td2->td_proc);
919 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
921 if (priv_check(td, PRIV_CRED_SETUID) ||
922 priv_check(td, PRIV_CRED_SETEUID) ||
923 p_candebug(td, td2->td_proc)) {
924 PROC_UNLOCK(td2->td_proc);
927 head = em->robust_futexes;
929 PROC_UNLOCK(td2->td_proc);
932 len = sizeof(struct linux_robust_list_head);
933 error = copyout(&len, args->len, sizeof(l_size_t));
937 return (copyout(&head, args->head, sizeof(head)));
941 handle_futex_death(struct thread *td, struct linux_emuldata *em, uint32_t *uaddr,
942 unsigned int pi, bool pending_op)
944 uint32_t uval, nval, mval;
948 error = fueword32(uaddr, &uval);
953 * Special case for regular (non PI) futexes. The unlock path in
954 * user space has two race scenarios:
956 * 1. The unlock path releases the user space futex value and
957 * before it can execute the futex() syscall to wake up
958 * waiters it is killed.
960 * 2. A woken up waiter is killed before it can acquire the
961 * futex in user space.
963 * In both cases the TID validation below prevents a wakeup of
964 * potential waiters which can cause these waiters to block
967 * In both cases it is safe to attempt waking up a potential
968 * waiter without touching the user space futex value and trying
969 * to set the OWNER_DIED bit.
971 if (pending_op && !pi && !uval) {
972 (void)futex_wake(td, uaddr, 1, true);
976 if ((uval & FUTEX_TID_MASK) == em->em_tid) {
977 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
978 error = casueword32(uaddr, uval, &nval, mval);
982 error = thread_check_susp(td, false);
988 if (!pi && (uval & FUTEX_WAITERS)) {
989 error = futex_wake(td, uaddr, 1, true);
992 } else if (pi && (uval & FUTEX_WAITERS)) {
993 error = futex_wake_pi(td, uaddr, true);
1003 fetch_robust_entry(struct linux_robust_list **entry,
1004 struct linux_robust_list **head, unsigned int *pi)
1009 error = copyin((const void *)head, &uentry, sizeof(uentry));
1013 *entry = (void *)(uentry & ~1UL);
1019 #define LINUX_HANDLE_DEATH_PENDING true
1020 #define LINUX_HANDLE_DEATH_LIST false
1022 /* This walks the list of robust futexes releasing them. */
1024 release_futexes(struct thread *td, struct linux_emuldata *em)
1026 struct linux_robust_list_head *head;
1027 struct linux_robust_list *entry, *next_entry, *pending;
1028 unsigned int limit = 2048, pi, next_pi, pip;
1030 l_long futex_offset;
1033 head = em->robust_futexes;
1037 if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi))
1040 error = copyin(&head->futex_offset, &futex_offset,
1041 sizeof(futex_offset));
1045 if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip))
1048 while (entry != &head->list) {
1049 error = fetch_robust_entry(&next_entry, PTRIN(&entry->next),
1053 * A pending lock might already be on the list, so
1054 * don't process it twice.
1056 if (entry != pending) {
1057 uaddr = (uint32_t *)((caddr_t)entry + futex_offset);
1058 if (handle_futex_death(td, em, uaddr, pi,
1059 LINUX_HANDLE_DEATH_LIST))
1071 sched_relinquish(curthread);
1075 uaddr = (uint32_t *)((caddr_t)pending + futex_offset);
1076 (void)handle_futex_death(td, em, uaddr, pip,
1077 LINUX_HANDLE_DEATH_PENDING);