]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libthr/thread/thr_cond.c
MFV r338866: 9700 ZFS resilvered mirror does not balance reads
[FreeBSD/FreeBSD.git] / lib / libthr / thread / thr_cond.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
5  * Copyright (c) 2015 The FreeBSD Foundation
6  * All rights reserved.
7  *
8  * Portions of this software were developed by Konstantin Belousov
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice unmodified, this list of conditions, and the following
16  *    disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include "namespace.h"
37 #include <stdlib.h>
38 #include <errno.h>
39 #include <string.h>
40 #include <pthread.h>
41 #include <limits.h>
42 #include "un-namespace.h"
43
44 #include "thr_private.h"
45
46 _Static_assert(sizeof(struct pthread_cond) <= PAGE_SIZE,
47     "pthread_cond too large");
48
49 /*
50  * Prototypes
51  */
52 int     __pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex);
53 int     __pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
54                        const struct timespec * abstime);
55 static int cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr);
56 static int cond_wait_common(pthread_cond_t *cond, pthread_mutex_t *mutex,
57                     const struct timespec *abstime, int cancel);
58 static int cond_signal_common(pthread_cond_t *cond);
59 static int cond_broadcast_common(pthread_cond_t *cond);
60
61 /*
62  * Double underscore versions are cancellation points.  Single underscore
63  * versions are not and are provided for libc internal usage (which
64  * shouldn't introduce cancellation points).
65  */
66 __weak_reference(__pthread_cond_wait, pthread_cond_wait);
67 __weak_reference(__pthread_cond_timedwait, pthread_cond_timedwait);
68
69 __weak_reference(_pthread_cond_init, pthread_cond_init);
70 __weak_reference(_pthread_cond_destroy, pthread_cond_destroy);
71 __weak_reference(_pthread_cond_signal, pthread_cond_signal);
72 __weak_reference(_pthread_cond_broadcast, pthread_cond_broadcast);
73
74 #define CV_PSHARED(cvp) (((cvp)->kcond.c_flags & USYNC_PROCESS_SHARED) != 0)
75
76 static void
77 cond_init_body(struct pthread_cond *cvp, const struct pthread_cond_attr *cattr)
78 {
79
80         if (cattr == NULL) {
81                 cvp->kcond.c_clockid = CLOCK_REALTIME;
82         } else {
83                 if (cattr->c_pshared)
84                         cvp->kcond.c_flags |= USYNC_PROCESS_SHARED;
85                 cvp->kcond.c_clockid = cattr->c_clockid;
86         }
87 }
88
89 static int
90 cond_init(pthread_cond_t *cond, const pthread_condattr_t *cond_attr)
91 {
92         struct pthread_cond *cvp;
93         const struct pthread_cond_attr *cattr;
94         int pshared;
95
96         cattr = cond_attr != NULL ? *cond_attr : NULL;
97         if (cattr == NULL || cattr->c_pshared == PTHREAD_PROCESS_PRIVATE) {
98                 pshared = 0;
99                 cvp = calloc(1, sizeof(struct pthread_cond));
100                 if (cvp == NULL)
101                         return (ENOMEM);
102         } else {
103                 pshared = 1;
104                 cvp = __thr_pshared_offpage(cond, 1);
105                 if (cvp == NULL)
106                         return (EFAULT);
107         }
108
109         /*
110          * Initialise the condition variable structure:
111          */
112         cond_init_body(cvp, cattr);
113         *cond = pshared ? THR_PSHARED_PTR : cvp;
114         return (0);
115 }
116
117 static int
118 init_static(struct pthread *thread, pthread_cond_t *cond)
119 {
120         int ret;
121
122         THR_LOCK_ACQUIRE(thread, &_cond_static_lock);
123
124         if (*cond == NULL)
125                 ret = cond_init(cond, NULL);
126         else
127                 ret = 0;
128
129         THR_LOCK_RELEASE(thread, &_cond_static_lock);
130
131         return (ret);
132 }
133
134 #define CHECK_AND_INIT_COND                                                     \
135         if (*cond == THR_PSHARED_PTR) {                                         \
136                 cvp = __thr_pshared_offpage(cond, 0);                           \
137                 if (cvp == NULL)                                                \
138                         return (EINVAL);                                        \
139         } else if (__predict_false((cvp = (*cond)) <= THR_COND_DESTROYED)) {    \
140                 if (cvp == THR_COND_INITIALIZER) {                              \
141                         int ret;                                                \
142                         ret = init_static(_get_curthread(), cond);              \
143                         if (ret)                                                \
144                                 return (ret);                                   \
145                 } else if (cvp == THR_COND_DESTROYED) {                         \
146                         return (EINVAL);                                        \
147                 }                                                               \
148                 cvp = *cond;                                                    \
149         }
150
151 int
152 _pthread_cond_init(pthread_cond_t * __restrict cond,
153     const pthread_condattr_t * __restrict cond_attr)
154 {
155
156         *cond = NULL;
157         return (cond_init(cond, cond_attr));
158 }
159
160 int
161 _pthread_cond_destroy(pthread_cond_t *cond)
162 {
163         struct pthread_cond *cvp;
164         int error;
165
166         error = 0;
167         if (*cond == THR_PSHARED_PTR) {
168                 cvp = __thr_pshared_offpage(cond, 0);
169                 if (cvp != NULL)
170                         __thr_pshared_destroy(cond);
171                 *cond = THR_COND_DESTROYED;
172         } else if ((cvp = *cond) == THR_COND_INITIALIZER) {
173                 /* nothing */
174         } else if (cvp == THR_COND_DESTROYED) {
175                 error = EINVAL;
176         } else {
177                 cvp = *cond;
178                 *cond = THR_COND_DESTROYED;
179                 free(cvp);
180         }
181         return (error);
182 }
183
184 /*
185  * Cancellation behavior:
186  *   Thread may be canceled at start, if thread is canceled, it means it
187  *   did not get a wakeup from pthread_cond_signal(), otherwise, it is
188  *   not canceled.
189  *   Thread cancellation never cause wakeup from pthread_cond_signal()
190  *   to be lost.
191  */
192 static int
193 cond_wait_kernel(struct pthread_cond *cvp, struct pthread_mutex *mp,
194     const struct timespec *abstime, int cancel)
195 {
196         struct pthread *curthread;
197         int error, error2, recurse, robust;
198
199         curthread = _get_curthread();
200         robust = _mutex_enter_robust(curthread, mp);
201
202         error = _mutex_cv_detach(mp, &recurse);
203         if (error != 0) {
204                 if (robust)
205                         _mutex_leave_robust(curthread, mp);
206                 return (error);
207         }
208
209         if (cancel)
210                 _thr_cancel_enter2(curthread, 0);
211         error = _thr_ucond_wait(&cvp->kcond, &mp->m_lock, abstime,
212             CVWAIT_ABSTIME | CVWAIT_CLOCKID);
213         if (cancel)
214                 _thr_cancel_leave(curthread, 0);
215
216         /*
217          * Note that PP mutex and ROBUST mutex may return
218          * interesting error codes.
219          */
220         if (error == 0) {
221                 error2 = _mutex_cv_lock(mp, recurse, true);
222         } else if (error == EINTR || error == ETIMEDOUT) {
223                 error2 = _mutex_cv_lock(mp, recurse, true);
224                 /*
225                  * Do not do cancellation on EOWNERDEAD there.  The
226                  * cancellation cleanup handler will use the protected
227                  * state and unlock the mutex without making the state
228                  * consistent and the state will be unrecoverable.
229                  */
230                 if (error2 == 0 && cancel) {
231                         if (robust) {
232                                 _mutex_leave_robust(curthread, mp);
233                                 robust = false;
234                         }
235                         _thr_testcancel(curthread);
236                 }
237
238                 if (error == EINTR)
239                         error = 0;
240         } else {
241                 /* We know that it didn't unlock the mutex. */
242                 _mutex_cv_attach(mp, recurse);
243                 if (cancel) {
244                         if (robust) {
245                                 _mutex_leave_robust(curthread, mp);
246                                 robust = false;
247                         }
248                         _thr_testcancel(curthread);
249                 }
250                 error2 = 0;
251         }
252         if (robust)
253                 _mutex_leave_robust(curthread, mp);
254         return (error2 != 0 ? error2 : error);
255 }
256
257 /*
258  * Thread waits in userland queue whenever possible, when thread
259  * is signaled or broadcasted, it is removed from the queue, and
260  * is saved in curthread's defer_waiters[] buffer, but won't be
261  * woken up until mutex is unlocked.
262  */
263
264 static int
265 cond_wait_user(struct pthread_cond *cvp, struct pthread_mutex *mp,
266     const struct timespec *abstime, int cancel)
267 {
268         struct pthread *curthread;
269         struct sleepqueue *sq;
270         int deferred, error, error2, recurse;
271
272         curthread = _get_curthread();
273         if (curthread->wchan != NULL)
274                 PANIC("thread %p was already on queue.", curthread);
275
276         if (cancel)
277                 _thr_testcancel(curthread);
278
279         _sleepq_lock(cvp);
280         /*
281          * set __has_user_waiters before unlocking mutex, this allows
282          * us to check it without locking in pthread_cond_signal().
283          */
284         cvp->__has_user_waiters = 1; 
285         deferred = 0;
286         (void)_mutex_cv_unlock(mp, &recurse, &deferred);
287         curthread->mutex_obj = mp;
288         _sleepq_add(cvp, curthread);
289         for(;;) {
290                 _thr_clear_wake(curthread);
291                 _sleepq_unlock(cvp);
292                 if (deferred) {
293                         deferred = 0;
294                         if ((mp->m_lock.m_owner & UMUTEX_CONTESTED) == 0)
295                                 (void)_umtx_op_err(&mp->m_lock,
296                                     UMTX_OP_MUTEX_WAKE2, mp->m_lock.m_flags,
297                                     0, 0);
298                 }
299                 if (curthread->nwaiter_defer > 0) {
300                         _thr_wake_all(curthread->defer_waiters,
301                             curthread->nwaiter_defer);
302                         curthread->nwaiter_defer = 0;
303                 }
304
305                 if (cancel)
306                         _thr_cancel_enter2(curthread, 0);
307                 error = _thr_sleep(curthread, cvp->kcond.c_clockid, abstime);
308                 if (cancel)
309                         _thr_cancel_leave(curthread, 0);
310
311                 _sleepq_lock(cvp);
312                 if (curthread->wchan == NULL) {
313                         error = 0;
314                         break;
315                 } else if (cancel && SHOULD_CANCEL(curthread)) {
316                         sq = _sleepq_lookup(cvp);
317                         cvp->__has_user_waiters = _sleepq_remove(sq, curthread);
318                         _sleepq_unlock(cvp);
319                         curthread->mutex_obj = NULL;
320                         error2 = _mutex_cv_lock(mp, recurse, false);
321                         if (!THR_IN_CRITICAL(curthread))
322                                 _pthread_exit(PTHREAD_CANCELED);
323                         else /* this should not happen */
324                                 return (error2);
325                 } else if (error == ETIMEDOUT) {
326                         sq = _sleepq_lookup(cvp);
327                         cvp->__has_user_waiters =
328                             _sleepq_remove(sq, curthread);
329                         break;
330                 }
331         }
332         _sleepq_unlock(cvp);
333         curthread->mutex_obj = NULL;
334         error2 = _mutex_cv_lock(mp, recurse, false);
335         if (error == 0)
336                 error = error2;
337         return (error);
338 }
339
340 static int
341 cond_wait_common(pthread_cond_t *cond, pthread_mutex_t *mutex,
342         const struct timespec *abstime, int cancel)
343 {
344         struct pthread  *curthread = _get_curthread();
345         struct pthread_cond *cvp;
346         struct pthread_mutex *mp;
347         int     error;
348
349         CHECK_AND_INIT_COND
350
351         if (*mutex == THR_PSHARED_PTR) {
352                 mp = __thr_pshared_offpage(mutex, 0);
353                 if (mp == NULL)
354                         return (EINVAL);
355         } else {
356                 mp = *mutex;
357         }
358
359         if ((error = _mutex_owned(curthread, mp)) != 0)
360                 return (error);
361
362         if (curthread->attr.sched_policy != SCHED_OTHER ||
363             (mp->m_lock.m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT |
364             USYNC_PROCESS_SHARED)) != 0 || CV_PSHARED(cvp))
365                 return (cond_wait_kernel(cvp, mp, abstime, cancel));
366         else
367                 return (cond_wait_user(cvp, mp, abstime, cancel));
368 }
369
370 int
371 _pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
372 {
373
374         return (cond_wait_common(cond, mutex, NULL, 0));
375 }
376
377 int
378 __pthread_cond_wait(pthread_cond_t * __restrict cond,
379     pthread_mutex_t * __restrict mutex)
380 {
381
382         return (cond_wait_common(cond, mutex, NULL, 1));
383 }
384
385 int
386 _pthread_cond_timedwait(pthread_cond_t * __restrict cond,
387     pthread_mutex_t * __restrict mutex,
388     const struct timespec * __restrict abstime)
389 {
390
391         if (abstime == NULL || abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
392             abstime->tv_nsec >= 1000000000)
393                 return (EINVAL);
394
395         return (cond_wait_common(cond, mutex, abstime, 0));
396 }
397
398 int
399 __pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
400                        const struct timespec *abstime)
401 {
402
403         if (abstime == NULL || abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
404             abstime->tv_nsec >= 1000000000)
405                 return (EINVAL);
406
407         return (cond_wait_common(cond, mutex, abstime, 1));
408 }
409
410 static int
411 cond_signal_common(pthread_cond_t *cond)
412 {
413         struct pthread  *curthread = _get_curthread();
414         struct pthread *td;
415         struct pthread_cond *cvp;
416         struct pthread_mutex *mp;
417         struct sleepqueue *sq;
418         int     *waddr;
419         int     pshared;
420
421         /*
422          * If the condition variable is statically initialized, perform dynamic
423          * initialization.
424          */
425         CHECK_AND_INIT_COND
426
427         pshared = CV_PSHARED(cvp);
428
429         _thr_ucond_signal(&cvp->kcond);
430
431         if (pshared || cvp->__has_user_waiters == 0)
432                 return (0);
433
434         curthread = _get_curthread();
435         waddr = NULL;
436         _sleepq_lock(cvp);
437         sq = _sleepq_lookup(cvp);
438         if (sq == NULL) {
439                 _sleepq_unlock(cvp);
440                 return (0);
441         }
442
443         td = _sleepq_first(sq);
444         mp = td->mutex_obj;
445         cvp->__has_user_waiters = _sleepq_remove(sq, td);
446         if (PMUTEX_OWNER_ID(mp) == TID(curthread)) {
447                 if (curthread->nwaiter_defer >= MAX_DEFER_WAITERS) {
448                         _thr_wake_all(curthread->defer_waiters,
449                             curthread->nwaiter_defer);
450                         curthread->nwaiter_defer = 0;
451                 }
452                 curthread->defer_waiters[curthread->nwaiter_defer++] =
453                     &td->wake_addr->value;
454                 mp->m_flags |= PMUTEX_FLAG_DEFERRED;
455         } else {
456                 waddr = &td->wake_addr->value;
457         }
458         _sleepq_unlock(cvp);
459         if (waddr != NULL)
460                 _thr_set_wake(waddr);
461         return (0);
462 }
463
464 struct broadcast_arg {
465         struct pthread *curthread;
466         unsigned int *waddrs[MAX_DEFER_WAITERS];
467         int count;
468 };
469
470 static void
471 drop_cb(struct pthread *td, void *arg)
472 {
473         struct broadcast_arg *ba = arg;
474         struct pthread_mutex *mp;
475         struct pthread *curthread = ba->curthread;
476
477         mp = td->mutex_obj;
478         if (PMUTEX_OWNER_ID(mp) == TID(curthread)) {
479                 if (curthread->nwaiter_defer >= MAX_DEFER_WAITERS) {
480                         _thr_wake_all(curthread->defer_waiters,
481                             curthread->nwaiter_defer);
482                         curthread->nwaiter_defer = 0;
483                 }
484                 curthread->defer_waiters[curthread->nwaiter_defer++] =
485                     &td->wake_addr->value;
486                 mp->m_flags |= PMUTEX_FLAG_DEFERRED;
487         } else {
488                 if (ba->count >= MAX_DEFER_WAITERS) {
489                         _thr_wake_all(ba->waddrs, ba->count);
490                         ba->count = 0;
491                 }
492                 ba->waddrs[ba->count++] = &td->wake_addr->value;
493         }
494 }
495
496 static int
497 cond_broadcast_common(pthread_cond_t *cond)
498 {
499         int    pshared;
500         struct pthread_cond *cvp;
501         struct sleepqueue *sq;
502         struct broadcast_arg ba;
503
504         /*
505          * If the condition variable is statically initialized, perform dynamic
506          * initialization.
507          */
508         CHECK_AND_INIT_COND
509
510         pshared = CV_PSHARED(cvp);
511
512         _thr_ucond_broadcast(&cvp->kcond);
513
514         if (pshared || cvp->__has_user_waiters == 0)
515                 return (0);
516
517         ba.curthread = _get_curthread();
518         ba.count = 0;
519         
520         _sleepq_lock(cvp);
521         sq = _sleepq_lookup(cvp);
522         if (sq == NULL) {
523                 _sleepq_unlock(cvp);
524                 return (0);
525         }
526         _sleepq_drop(sq, drop_cb, &ba);
527         cvp->__has_user_waiters = 0;
528         _sleepq_unlock(cvp);
529         if (ba.count > 0)
530                 _thr_wake_all(ba.waddrs, ba.count);
531         return (0);
532 }
533
534 int
535 _pthread_cond_signal(pthread_cond_t * cond)
536 {
537
538         return (cond_signal_common(cond));
539 }
540
541 int
542 _pthread_cond_broadcast(pthread_cond_t * cond)
543 {
544
545         return (cond_broadcast_common(cond));
546 }