2 * Copyright (c) 2015 Nuxi, https://nuxi.nl/
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD$");
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/limits.h>
33 #include <sys/malloc.h>
34 #include <sys/mutex.h>
37 #include <sys/systm.h>
40 #include <contrib/cloudabi/cloudabi_types_common.h>
42 #include <compat/cloudabi/cloudabi_proto.h>
43 #include <compat/cloudabi/cloudabi_util.h>
46 * Futexes for CloudABI.
48 * On most systems, futexes are implemented as objects of a single type
49 * on which a set of operations can be performed. CloudABI makes a clear
50 * distinction between locks and condition variables. A lock may have
51 * zero or more associated condition variables. A condition variable is
52 * always associated with exactly one lock. There is a strict topology.
53 * This approach has two advantages:
55 * - This topology is guaranteed to be acyclic. Requeueing of threads
56 * only happens in one direction (from condition variables to locks).
58 * - It means that a futex object for a lock exists when it is unlocked,
59 * but has threads waiting on associated condition variables. Threads
60 * can be requeued to a lock even if the thread performing the wakeup
61 * does not have the lock mapped in its address space.
63 * This futex implementation only implements a single lock type, namely
64 * a read-write lock. A regular mutex type would not be necessary, as
65 * the read-write lock is as efficient as a mutex if used as such.
66 * Userspace futex locks are 32 bits in size:
68 * - 1 bit: has threads waiting in kernel-space.
69 * - 1 bit: is write-locked.
71 * - if write-locked: thread ID of owner.
72 * - if not write-locked: number of read locks held.
74 * Condition variables are also 32 bits in size. Its value is modified
75 * by kernel-space exclusively. Zero indicates that it has no waiting
76 * threads. Non-zero indicates the opposite.
78 * This implementation is optimal, in the sense that it only wakes up
79 * threads if they can actually continue execution. It does not suffer
80 * from the thundering herd problem. If multiple threads waiting on a
81 * condition variable need to be woken up, only a single thread is
82 * scheduled. All other threads are 'donated' to this thread. After the
83 * thread manages to reacquire the lock, it requeues its donated threads
86 * TODO(ed): Integrate this functionality into kern_umtx.c instead.
87 * TODO(ed): Store futex objects in a hash table.
88 * TODO(ed): Add actual priority inheritance.
89 * TODO(ed): Let futex_queue also take priorities into account.
90 * TODO(ed): Make locking fine-grained.
91 * TODO(ed): Perform sleeps until an actual absolute point in time,
92 * instead of converting the timestamp to a relative value.
101 /* Identifier of a location in memory. */
102 struct futex_address {
103 struct umtx_key fa_key;
106 /* A set of waiting threads. */
108 STAILQ_HEAD(, futex_waiter) fq_list;
109 unsigned int fq_count;
112 /* Condition variables. */
113 struct futex_condvar {
114 /* Address of the condition variable. */
115 struct futex_address fc_address;
117 /* The lock the waiters should be moved to when signalled. */
118 struct futex_lock * fc_lock;
120 /* Threads waiting on the condition variable. */
121 struct futex_queue fc_waiters;
123 * Number of threads blocked on this condition variable, or
124 * being blocked on the lock after being requeued.
126 unsigned int fc_waitcount;
128 /* Global list pointers. */
129 LIST_ENTRY(futex_condvar) fc_next;
132 /* Read-write locks. */
134 /* Address of the lock. */
135 struct futex_address fl_address;
138 * Current owner of the lock. LOCK_UNMANAGED if the lock is
139 * currently not owned by the kernel. LOCK_OWNER_UNKNOWN in case
140 * the owner is not known (e.g., when the lock is read-locked).
142 cloudabi_tid_t fl_owner;
143 #define LOCK_UNMANAGED 0x0
144 #define LOCK_OWNER_UNKNOWN 0x1
146 /* Writers blocked on the lock. */
147 struct futex_queue fl_writers;
148 /* Readers blocked on the lock. */
149 struct futex_queue fl_readers;
150 /* Number of threads blocked on this lock + condition variables. */
151 unsigned int fl_waitcount;
153 /* Global list pointers. */
154 LIST_ENTRY(futex_lock) fl_next;
157 /* Information associated with a thread blocked on an object. */
158 struct futex_waiter {
160 cloudabi_tid_t fw_tid;
161 /* Condition variable used for waiting. */
164 /* Queue this waiter is currently placed in. */
165 struct futex_queue * fw_queue;
166 /* List pointers of fw_queue. */
167 STAILQ_ENTRY(futex_waiter) fw_next;
169 /* Lock has been acquired. */
171 /* If not locked, threads that should block after acquiring. */
172 struct futex_queue fw_donated;
175 /* Global data structures. */
176 static MALLOC_DEFINE(M_FUTEX, "futex", "CloudABI futex");
178 static struct sx futex_global_lock;
179 SX_SYSINIT(futex_global_lock, &futex_global_lock, "CloudABI futex global lock");
181 static LIST_HEAD(, futex_lock) futex_lock_list =
182 LIST_HEAD_INITIALIZER(&futex_lock_list);
183 static LIST_HEAD(, futex_condvar) futex_condvar_list =
184 LIST_HEAD_INITIALIZER(&futex_condvar_list);
186 /* Utility functions. */
187 static void futex_lock_assert(const struct futex_lock *);
188 static struct futex_lock *futex_lock_lookup_locked(struct futex_address *);
189 static void futex_lock_release(struct futex_lock *);
190 static int futex_lock_tryrdlock(struct futex_lock *, cloudabi_lock_t *);
191 static int futex_lock_unmanage(struct futex_lock *, cloudabi_lock_t *);
192 static int futex_lock_update_owner(struct futex_lock *, cloudabi_lock_t *);
193 static int futex_lock_wake_up_next(struct futex_lock *, cloudabi_lock_t *);
194 static unsigned int futex_queue_count(const struct futex_queue *);
195 static void futex_queue_init(struct futex_queue *);
196 static void futex_queue_requeue(struct futex_queue *, struct futex_queue *,
198 static int futex_queue_sleep(struct futex_queue *, struct futex_lock *,
199 struct futex_waiter *, struct thread *, cloudabi_clockid_t,
200 cloudabi_timestamp_t, cloudabi_timestamp_t, bool);
201 static cloudabi_tid_t futex_queue_tid_best(const struct futex_queue *);
202 static void futex_queue_wake_up_all(struct futex_queue *);
203 static void futex_queue_wake_up_best(struct futex_queue *);
204 static void futex_queue_wake_up_donate(struct futex_queue *, unsigned int);
205 static int futex_user_load(uint32_t *, uint32_t *);
206 static int futex_user_store(uint32_t *, uint32_t);
207 static int futex_user_cmpxchg(uint32_t *, uint32_t, uint32_t *, uint32_t);
210 * futex_address operations.
214 futex_address_create(struct futex_address *fa, struct thread *td,
215 const void *object, cloudabi_scope_t scope)
218 KASSERT(td == curthread,
219 ("Can only create umtx keys for the current thread"));
221 case CLOUDABI_SCOPE_PRIVATE:
222 return (umtx_key_get(object, TYPE_FUTEX, THREAD_SHARE,
224 case CLOUDABI_SCOPE_SHARED:
225 return (umtx_key_get(object, TYPE_FUTEX, AUTO_SHARE,
233 futex_address_free(struct futex_address *fa)
236 umtx_key_release(&fa->fa_key);
240 futex_address_match(const struct futex_address *fa1,
241 const struct futex_address *fa2)
244 return (umtx_key_match(&fa1->fa_key, &fa2->fa_key));
248 * futex_condvar operations.
252 futex_condvar_assert(const struct futex_condvar *fc)
255 KASSERT(fc->fc_waitcount >= futex_queue_count(&fc->fc_waiters),
256 ("Total number of waiters cannot be smaller than the wait queue"));
257 futex_lock_assert(fc->fc_lock);
261 futex_condvar_lookup(struct thread *td, const cloudabi_condvar_t *address,
262 cloudabi_scope_t scope, struct futex_condvar **fcret)
264 struct futex_address fa_condvar;
265 struct futex_condvar *fc;
268 error = futex_address_create(&fa_condvar, td, address, scope);
272 sx_xlock(&futex_global_lock);
273 LIST_FOREACH(fc, &futex_condvar_list, fc_next) {
274 if (futex_address_match(&fc->fc_address, &fa_condvar)) {
275 /* Found matching lock object. */
276 futex_address_free(&fa_condvar);
277 futex_condvar_assert(fc);
282 sx_xunlock(&futex_global_lock);
283 futex_address_free(&fa_condvar);
288 futex_condvar_lookup_or_create(struct thread *td,
289 const cloudabi_condvar_t *condvar, cloudabi_scope_t condvar_scope,
290 const cloudabi_lock_t *lock, cloudabi_scope_t lock_scope,
291 struct futex_condvar **fcret)
293 struct futex_address fa_condvar, fa_lock;
294 struct futex_condvar *fc;
295 struct futex_lock *fl;
298 error = futex_address_create(&fa_condvar, td, condvar, condvar_scope);
301 error = futex_address_create(&fa_lock, td, lock, lock_scope);
303 futex_address_free(&fa_condvar);
307 sx_xlock(&futex_global_lock);
308 LIST_FOREACH(fc, &futex_condvar_list, fc_next) {
309 if (!futex_address_match(&fc->fc_address, &fa_condvar))
312 if (!futex_address_match(&fl->fl_address, &fa_lock)) {
313 /* Condition variable is owned by a different lock. */
314 futex_address_free(&fa_condvar);
315 futex_address_free(&fa_lock);
316 sx_xunlock(&futex_global_lock);
320 /* Found fully matching condition variable. */
321 futex_address_free(&fa_condvar);
322 futex_address_free(&fa_lock);
323 futex_condvar_assert(fc);
328 /* None found. Create new condition variable object. */
329 fc = malloc(sizeof(*fc), M_FUTEX, M_WAITOK);
330 fc->fc_address = fa_condvar;
331 fc->fc_lock = futex_lock_lookup_locked(&fa_lock);
332 futex_queue_init(&fc->fc_waiters);
333 fc->fc_waitcount = 0;
334 LIST_INSERT_HEAD(&futex_condvar_list, fc, fc_next);
340 futex_condvar_release(struct futex_condvar *fc)
342 struct futex_lock *fl;
344 futex_condvar_assert(fc);
346 if (fc->fc_waitcount == 0) {
347 /* Condition variable has no waiters. Deallocate it. */
348 futex_address_free(&fc->fc_address);
349 LIST_REMOVE(fc, fc_next);
352 futex_lock_release(fl);
356 futex_condvar_unmanage(struct futex_condvar *fc,
357 cloudabi_condvar_t *condvar)
360 if (futex_queue_count(&fc->fc_waiters) != 0)
362 return (futex_user_store(condvar, CLOUDABI_CONDVAR_HAS_NO_WAITERS));
366 * futex_lock operations.
370 futex_lock_assert(const struct futex_lock *fl)
374 * A futex lock can only be kernel-managed if it has waiters.
375 * Vice versa: if a futex lock has waiters, it must be
378 KASSERT((fl->fl_owner == LOCK_UNMANAGED) ==
379 (futex_queue_count(&fl->fl_readers) == 0 &&
380 futex_queue_count(&fl->fl_writers) == 0),
381 ("Managed locks must have waiting threads"));
382 KASSERT(fl->fl_waitcount != 0 || fl->fl_owner == LOCK_UNMANAGED,
383 ("Lock with no waiters must be unmanaged"));
387 futex_lock_lookup(struct thread *td, const cloudabi_lock_t *address,
388 cloudabi_scope_t scope, struct futex_lock **flret)
390 struct futex_address fa;
393 error = futex_address_create(&fa, td, address, scope);
397 sx_xlock(&futex_global_lock);
398 *flret = futex_lock_lookup_locked(&fa);
402 static struct futex_lock *
403 futex_lock_lookup_locked(struct futex_address *fa)
405 struct futex_lock *fl;
407 LIST_FOREACH(fl, &futex_lock_list, fl_next) {
408 if (futex_address_match(&fl->fl_address, fa)) {
409 /* Found matching lock object. */
410 futex_address_free(fa);
411 futex_lock_assert(fl);
416 /* None found. Create new lock object. */
417 fl = malloc(sizeof(*fl), M_FUTEX, M_WAITOK);
418 fl->fl_address = *fa;
419 fl->fl_owner = LOCK_UNMANAGED;
420 futex_queue_init(&fl->fl_readers);
421 futex_queue_init(&fl->fl_writers);
422 fl->fl_waitcount = 0;
423 LIST_INSERT_HEAD(&futex_lock_list, fl, fl_next);
428 futex_lock_rdlock(struct futex_lock *fl, struct thread *td,
429 cloudabi_lock_t *lock, cloudabi_clockid_t clock_id,
430 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
432 struct futex_waiter fw;
435 error = futex_lock_tryrdlock(fl, lock);
436 if (error == EBUSY) {
437 /* Suspend execution. */
438 KASSERT(fl->fl_owner != LOCK_UNMANAGED,
439 ("Attempted to sleep on an unmanaged lock"));
440 error = futex_queue_sleep(&fl->fl_readers, fl, &fw, td,
441 clock_id, timeout, precision, abstime);
442 KASSERT((error == 0) == fw.fw_locked,
443 ("Should have locked write lock on success"));
444 KASSERT(futex_queue_count(&fw.fw_donated) == 0,
445 ("Lock functions cannot receive threads"));
448 futex_lock_unmanage(fl, lock);
453 futex_lock_release(struct futex_lock *fl)
456 futex_lock_assert(fl);
457 if (fl->fl_waitcount == 0) {
458 /* Lock object is unreferenced. Deallocate it. */
459 KASSERT(fl->fl_owner == LOCK_UNMANAGED,
460 ("Attempted to free a managed lock"));
461 futex_address_free(&fl->fl_address);
462 LIST_REMOVE(fl, fl_next);
465 sx_xunlock(&futex_global_lock);
469 futex_lock_unmanage(struct futex_lock *fl, cloudabi_lock_t *lock)
471 cloudabi_lock_t cmp, old;
474 if (futex_queue_count(&fl->fl_readers) == 0 &&
475 futex_queue_count(&fl->fl_writers) == 0) {
476 /* Lock should be unmanaged. */
477 fl->fl_owner = LOCK_UNMANAGED;
479 /* Clear kernel-managed bit. */
480 error = futex_user_load(lock, &old);
485 error = futex_user_cmpxchg(lock, cmp, &old,
486 cmp & ~CLOUDABI_LOCK_KERNEL_MANAGED);
496 /* Sets an owner of a lock, based on a userspace lock value. */
498 futex_lock_set_owner(struct futex_lock *fl, cloudabi_lock_t lock)
501 /* Lock has no explicit owner. */
502 if ((lock & ~CLOUDABI_LOCK_WRLOCKED) == 0) {
503 fl->fl_owner = LOCK_OWNER_UNKNOWN;
506 lock &= ~(CLOUDABI_LOCK_WRLOCKED | CLOUDABI_LOCK_KERNEL_MANAGED);
508 /* Don't allow userspace to silently unlock. */
509 if (lock == LOCK_UNMANAGED) {
510 fl->fl_owner = LOCK_OWNER_UNKNOWN;
517 futex_lock_unlock(struct futex_lock *fl, struct thread *td,
518 cloudabi_lock_t *lock)
522 /* Validate that this thread is allowed to unlock. */
523 error = futex_lock_update_owner(fl, lock);
526 if (fl->fl_owner != LOCK_UNMANAGED && fl->fl_owner != td->td_tid)
528 return (futex_lock_wake_up_next(fl, lock));
531 /* Syncs in the owner of the lock from userspace if needed. */
533 futex_lock_update_owner(struct futex_lock *fl, cloudabi_lock_t *address)
535 cloudabi_lock_t lock;
538 if (fl->fl_owner == LOCK_OWNER_UNKNOWN) {
539 error = futex_user_load(address, &lock);
542 futex_lock_set_owner(fl, lock);
548 futex_lock_tryrdlock(struct futex_lock *fl, cloudabi_lock_t *address)
550 cloudabi_lock_t old, cmp;
553 if (fl->fl_owner != LOCK_UNMANAGED) {
554 /* Lock is already acquired. */
558 old = CLOUDABI_LOCK_UNLOCKED;
560 if ((old & CLOUDABI_LOCK_KERNEL_MANAGED) != 0) {
562 * Userspace lock is kernel-managed, even though
563 * the kernel disagrees.
568 if ((old & CLOUDABI_LOCK_WRLOCKED) == 0) {
570 * Lock is not write-locked. Attempt to acquire
571 * it by increasing the read count.
574 error = futex_user_cmpxchg(address, cmp, &old, cmp + 1);
582 /* Lock is write-locked. Make it kernel-managed. */
584 error = futex_user_cmpxchg(address, cmp, &old,
585 cmp | CLOUDABI_LOCK_KERNEL_MANAGED);
590 futex_lock_set_owner(fl, cmp);
598 futex_lock_trywrlock(struct futex_lock *fl, cloudabi_lock_t *address,
599 cloudabi_tid_t tid, bool force_kernel_managed)
601 cloudabi_lock_t old, new, cmp;
604 if (fl->fl_owner == tid) {
605 /* Attempted to acquire lock recursively. */
608 if (fl->fl_owner != LOCK_UNMANAGED) {
609 /* Lock is already acquired. */
613 old = CLOUDABI_LOCK_UNLOCKED;
615 if ((old & CLOUDABI_LOCK_KERNEL_MANAGED) != 0) {
617 * Userspace lock is kernel-managed, even though
618 * the kernel disagrees.
622 if (old == (tid | CLOUDABI_LOCK_WRLOCKED)) {
623 /* Attempted to acquire lock recursively. */
627 if (old == CLOUDABI_LOCK_UNLOCKED) {
628 /* Lock is unlocked. Attempt to acquire it. */
629 new = tid | CLOUDABI_LOCK_WRLOCKED;
630 if (force_kernel_managed)
631 new |= CLOUDABI_LOCK_KERNEL_MANAGED;
632 error = futex_user_cmpxchg(address,
633 CLOUDABI_LOCK_UNLOCKED, &old, new);
636 if (old == CLOUDABI_LOCK_UNLOCKED) {
638 if (force_kernel_managed)
643 /* Lock is still locked. Make it kernel-managed. */
645 error = futex_user_cmpxchg(address, cmp, &old,
646 cmp | CLOUDABI_LOCK_KERNEL_MANAGED);
651 futex_lock_set_owner(fl, cmp);
659 futex_lock_wake_up_next(struct futex_lock *fl, cloudabi_lock_t *lock)
665 * Determine which thread(s) to wake up. Prefer waking up
666 * writers over readers to prevent write starvation.
668 if (futex_queue_count(&fl->fl_writers) > 0) {
669 /* Transfer ownership to a single write-locker. */
670 if (futex_queue_count(&fl->fl_writers) > 1 ||
671 futex_queue_count(&fl->fl_readers) > 0) {
672 /* Lock should remain managed afterwards. */
673 tid = futex_queue_tid_best(&fl->fl_writers);
674 error = futex_user_store(lock,
675 tid | CLOUDABI_LOCK_WRLOCKED |
676 CLOUDABI_LOCK_KERNEL_MANAGED);
680 futex_queue_wake_up_best(&fl->fl_writers);
683 /* Lock can become unmanaged afterwards. */
684 error = futex_user_store(lock,
685 futex_queue_tid_best(&fl->fl_writers) |
686 CLOUDABI_LOCK_WRLOCKED);
690 futex_queue_wake_up_best(&fl->fl_writers);
691 fl->fl_owner = LOCK_UNMANAGED;
694 /* Transfer ownership to all read-lockers (if any). */
695 error = futex_user_store(lock,
696 futex_queue_count(&fl->fl_readers));
700 /* Wake up all threads. */
701 futex_queue_wake_up_all(&fl->fl_readers);
702 fl->fl_owner = LOCK_UNMANAGED;
708 futex_lock_wrlock(struct futex_lock *fl, struct thread *td,
709 cloudabi_lock_t *lock, cloudabi_clockid_t clock_id,
710 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime,
711 struct futex_queue *donated)
713 struct futex_waiter fw;
716 error = futex_lock_trywrlock(fl, lock, td->td_tid,
717 futex_queue_count(donated) > 0);
719 if (error == 0 || error == EBUSY) {
720 /* Put donated threads in queue before suspending. */
721 KASSERT(futex_queue_count(donated) == 0 ||
722 fl->fl_owner != LOCK_UNMANAGED,
723 ("Lock should be managed if we are going to donate"));
724 futex_queue_requeue(donated, &fl->fl_writers, UINT_MAX);
727 * This thread cannot deal with the donated threads.
728 * Wake up the next thread and let it try it by itself.
730 futex_queue_wake_up_donate(donated, UINT_MAX);
733 if (error == EBUSY) {
734 /* Suspend execution if the lock was busy. */
735 KASSERT(fl->fl_owner != LOCK_UNMANAGED,
736 ("Attempted to sleep on an unmanaged lock"));
737 error = futex_queue_sleep(&fl->fl_writers, fl, &fw, td,
738 clock_id, timeout, precision, abstime);
739 KASSERT((error == 0) == fw.fw_locked,
740 ("Should have locked write lock on success"));
741 KASSERT(futex_queue_count(&fw.fw_donated) == 0,
742 ("Lock functions cannot receive threads"));
745 futex_lock_unmanage(fl, lock);
750 * futex_queue operations.
753 static cloudabi_tid_t
754 futex_queue_tid_best(const struct futex_queue *fq)
757 return (STAILQ_FIRST(&fq->fq_list)->fw_tid);
761 futex_queue_count(const struct futex_queue *fq)
764 return (fq->fq_count);
768 futex_queue_init(struct futex_queue *fq)
771 STAILQ_INIT(&fq->fq_list);
775 /* Converts a relative timestamp to an sbintime. */
777 futex_queue_convert_timestamp_relative(cloudabi_timestamp_t ts)
779 cloudabi_timestamp_t s, ns;
782 ns = ts % 1000000000;
785 return ((s << 32) + (ns << 32) / 1000000000);
788 /* Converts an absolute timestamp and precision to a pair of sbintime values. */
790 futex_queue_convert_timestamp(struct thread *td, cloudabi_clockid_t clock_id,
791 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision,
792 sbintime_t *sbttimeout, sbintime_t *sbtprecision, bool abstime)
794 cloudabi_timestamp_t now;
798 /* Make the time relative. */
799 error = cloudabi_clock_time_get(td, clock_id, &now);
802 timeout = timeout < now ? 0 : timeout - now;
805 *sbttimeout = futex_queue_convert_timestamp_relative(timeout);
806 *sbtprecision = futex_queue_convert_timestamp_relative(precision);
811 futex_queue_sleep(struct futex_queue *fq, struct futex_lock *fl,
812 struct futex_waiter *fw, struct thread *td, cloudabi_clockid_t clock_id,
813 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
815 sbintime_t sbttimeout, sbtprecision;
818 /* Initialize futex_waiter object. */
819 fw->fw_tid = td->td_tid;
820 fw->fw_locked = false;
821 futex_queue_init(&fw->fw_donated);
823 if (timeout != UINT64_MAX) {
824 /* Convert timeout duration. */
825 error = futex_queue_convert_timestamp(td, clock_id, timeout,
826 precision, &sbttimeout, &sbtprecision, abstime);
831 /* Place object in the queue. */
833 STAILQ_INSERT_TAIL(&fq->fq_list, fw, fw_next);
836 cv_init(&fw->fw_wait, "futex");
839 futex_lock_assert(fl);
840 if (timeout == UINT64_MAX) {
841 /* Wait without a timeout. */
842 error = cv_wait_sig(&fw->fw_wait, &futex_global_lock);
844 /* Wait respecting the timeout. */
845 error = cv_timedwait_sig_sbt(&fw->fw_wait, &futex_global_lock,
846 sbttimeout, sbtprecision, 0);
847 futex_lock_assert(fl);
848 if (error == EWOULDBLOCK &&
849 fw->fw_queue != NULL && fw->fw_queue != fq) {
851 * We got signalled on a condition variable, but
852 * observed a timeout while waiting to reacquire
853 * the lock. In other words, we didn't actually
854 * time out. Go back to sleep and wait for the
855 * lock to be reacquired.
857 error = cv_wait_sig(&fw->fw_wait, &futex_global_lock);
860 futex_lock_assert(fl);
863 cv_destroy(&fw->fw_wait);
867 /* Thread got dequeued, so we've slept successfully. */
871 /* Thread is still enqueued. Remove it. */
872 KASSERT(error != 0, ("Woken up thread is still enqueued"));
873 STAILQ_REMOVE(&fq->fq_list, fw, futex_waiter, fw_next);
875 return (error == EWOULDBLOCK ? ETIMEDOUT : error);
878 /* Moves up to nwaiters waiters from one queue to another. */
880 futex_queue_requeue(struct futex_queue *fqfrom, struct futex_queue *fqto,
881 unsigned int nwaiters)
883 struct futex_waiter *fw;
885 /* Move waiters to the target queue. */
886 while (nwaiters-- > 0 && !STAILQ_EMPTY(&fqfrom->fq_list)) {
887 fw = STAILQ_FIRST(&fqfrom->fq_list);
888 STAILQ_REMOVE_HEAD(&fqfrom->fq_list, fw_next);
892 STAILQ_INSERT_TAIL(&fqto->fq_list, fw, fw_next);
897 /* Wakes up all waiters in a queue. */
899 futex_queue_wake_up_all(struct futex_queue *fq)
901 struct futex_waiter *fw;
903 STAILQ_FOREACH(fw, &fq->fq_list, fw_next) {
904 fw->fw_locked = true;
906 cv_signal(&fw->fw_wait);
909 STAILQ_INIT(&fq->fq_list);
914 * Wakes up the best waiter (i.e., the waiter having the highest
915 * priority) in a queue.
918 futex_queue_wake_up_best(struct futex_queue *fq)
920 struct futex_waiter *fw;
922 fw = STAILQ_FIRST(&fq->fq_list);
923 fw->fw_locked = true;
925 cv_signal(&fw->fw_wait);
927 STAILQ_REMOVE_HEAD(&fq->fq_list, fw_next);
932 futex_queue_wake_up_donate(struct futex_queue *fq, unsigned int nwaiters)
934 struct futex_waiter *fw;
936 fw = STAILQ_FIRST(&fq->fq_list);
939 fw->fw_locked = false;
941 cv_signal(&fw->fw_wait);
943 STAILQ_REMOVE_HEAD(&fq->fq_list, fw_next);
945 futex_queue_requeue(fq, &fw->fw_donated, nwaiters);
949 * futex_user operations. Used to adjust values in userspace.
953 futex_user_load(uint32_t *obj, uint32_t *val)
956 return (fueword32(obj, val) != 0 ? EFAULT : 0);
960 futex_user_store(uint32_t *obj, uint32_t val)
963 return (suword32(obj, val) != 0 ? EFAULT : 0);
967 futex_user_cmpxchg(uint32_t *obj, uint32_t cmp, uint32_t *old, uint32_t new)
970 return (casueword32(obj, cmp, old, new) != 0 ? EFAULT : 0);
974 * Blocking calls: acquiring locks, waiting on condition variables.
978 cloudabi_futex_condvar_wait(struct thread *td, cloudabi_condvar_t *condvar,
979 cloudabi_scope_t condvar_scope, cloudabi_lock_t *lock,
980 cloudabi_scope_t lock_scope, cloudabi_clockid_t clock_id,
981 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
983 struct futex_condvar *fc;
984 struct futex_lock *fl;
985 struct futex_waiter fw;
988 /* Lookup condition variable object. */
989 error = futex_condvar_lookup_or_create(td, condvar, condvar_scope, lock,
996 * Set the condition variable to something other than
997 * CLOUDABI_CONDVAR_HAS_NO_WAITERS to make userspace threads
998 * call into the kernel to perform wakeups.
1000 error = futex_user_store(condvar, ~CLOUDABI_CONDVAR_HAS_NO_WAITERS);
1002 futex_condvar_release(fc);
1006 /* Drop the lock. */
1007 error = futex_lock_unlock(fl, td, lock);
1009 futex_condvar_unmanage(fc, condvar);
1010 futex_condvar_release(fc);
1016 error = futex_queue_sleep(&fc->fc_waiters, fc->fc_lock, &fw, td,
1017 clock_id, timeout, precision, abstime);
1019 /* Waited and got the lock assigned to us. */
1020 KASSERT(futex_queue_count(&fw.fw_donated) == 0,
1021 ("Received threads while being locked"));
1022 } else if (error == 0 || error == ETIMEDOUT) {
1024 futex_condvar_unmanage(fc, condvar);
1026 * Got woken up without having the lock assigned to us.
1027 * This can happen in two cases:
1029 * 1. We observed a timeout on a condition variable.
1030 * 2. We got signalled on a condition variable while the
1031 * associated lock is unlocked. We are the first
1032 * thread that gets woken up. This thread is
1033 * responsible for reacquiring the userspace lock.
1035 error2 = futex_lock_wrlock(fl, td, lock,
1036 CLOUDABI_CLOCK_MONOTONIC, UINT64_MAX, 0, abstime,
1041 KASSERT(futex_queue_count(&fw.fw_donated) == 0,
1042 ("Received threads on error"));
1043 futex_condvar_unmanage(fc, condvar);
1044 futex_lock_unmanage(fl, lock);
1047 futex_condvar_release(fc);
1052 cloudabi_futex_lock_rdlock(struct thread *td, cloudabi_lock_t *lock,
1053 cloudabi_scope_t scope, cloudabi_clockid_t clock_id,
1054 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
1056 struct futex_lock *fl;
1059 /* Look up lock object. */
1060 error = futex_lock_lookup(td, lock, scope, &fl);
1064 error = futex_lock_rdlock(fl, td, lock, clock_id, timeout,
1065 precision, abstime);
1066 futex_lock_release(fl);
1071 cloudabi_futex_lock_wrlock(struct thread *td, cloudabi_lock_t *lock,
1072 cloudabi_scope_t scope, cloudabi_clockid_t clock_id,
1073 cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
1075 struct futex_lock *fl;
1076 struct futex_queue fq;
1079 /* Look up lock object. */
1080 error = futex_lock_lookup(td, lock, scope, &fl);
1084 futex_queue_init(&fq);
1085 error = futex_lock_wrlock(fl, td, lock, clock_id, timeout,
1086 precision, abstime, &fq);
1087 futex_lock_release(fl);
1092 * Non-blocking calls: releasing locks, signalling condition variables.
1096 cloudabi_sys_condvar_signal(struct thread *td,
1097 struct cloudabi_sys_condvar_signal_args *uap)
1099 struct futex_condvar *fc;
1100 struct futex_lock *fl;
1101 cloudabi_nthreads_t nwaiters;
1104 nwaiters = uap->nwaiters;
1105 if (nwaiters == 0) {
1106 /* No threads to wake up. */
1110 /* Look up futex object. */
1111 error = futex_condvar_lookup(td, uap->condvar, uap->scope, &fc);
1113 /* Race condition: condition variable with no waiters. */
1114 return (error == ENOENT ? 0 : error);
1118 if (fl->fl_owner == LOCK_UNMANAGED) {
1120 * The lock is currently not managed by the kernel,
1121 * meaning we must attempt to acquire the userspace lock
1122 * first. We cannot requeue threads to an unmanaged lock,
1123 * as these threads will then never be scheduled.
1125 * Unfortunately, the memory address of the lock is
1126 * unknown from this context, meaning that we cannot
1127 * acquire the lock on behalf of the first thread to be
1128 * scheduled. The lock may even not be mapped within the
1129 * address space of the current thread.
1131 * To solve this, wake up a single waiter that will
1132 * attempt to acquire the lock. Donate all of the other
1133 * waiters that need to be woken up to this waiter, so
1134 * it can requeue them after acquiring the lock.
1136 futex_queue_wake_up_donate(&fc->fc_waiters, nwaiters - 1);
1139 * Lock is already managed by the kernel. This makes it
1140 * easy, as we can requeue the threads from the
1141 * condition variable directly to the associated lock.
1143 futex_queue_requeue(&fc->fc_waiters, &fl->fl_writers, nwaiters);
1146 /* Clear userspace condition variable if all waiters are gone. */
1147 error = futex_condvar_unmanage(fc, uap->condvar);
1148 futex_condvar_release(fc);
1153 cloudabi_sys_lock_unlock(struct thread *td,
1154 struct cloudabi_sys_lock_unlock_args *uap)
1156 struct futex_lock *fl;
1159 error = futex_lock_lookup(td, uap->lock, uap->scope, &fl);
1162 error = futex_lock_unlock(fl, td, uap->lock);
1163 futex_lock_release(fl);