sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c

   1 /*
   2  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
   3  *  Copyright (C) 2007 The Regents of the University of California.
   4  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
   5  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
   6  *  UCRL-CODE-235197
   7  *
   8  *  This file is part of the SPL, Solaris Porting Layer.
   9  *  For details, see <http://zfsonlinux.org/>.
  10  *
  11  *  The SPL is free software; you can redistribute it and/or modify it
  12  *  under the terms of the GNU General Public License as published by the
  13  *  Free Software Foundation; either version 2 of the License, or (at your
  14  *  option) any later version.
  15  *
  16  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
  17  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  18  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  19  *  for more details.
  20  *
  21  *  You should have received a copy of the GNU General Public License along
  22  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  23  *
  24  *  Solaris Porting Layer (SPL) Credential Implementation.
  25  */
  26
  27 #include <sys/condvar.h>
  28 #include <sys/time.h>
  29 #include <sys/sysmacros.h>
  30 #include <linux/hrtimer.h>
  31 #include <linux/compiler_compat.h>
  32 #include <linux/mod_compat.h>
  33
  34 #include <linux/sched.h>
  35
  36 #ifdef HAVE_SCHED_SIGNAL_HEADER
  37 #include <linux/sched/signal.h>
  38 #endif
  39
  40 #define MAX_HRTIMEOUT_SLACK_US  1000
  41 unsigned int spl_schedule_hrtimeout_slack_us = 0;
  42
  43 static int
  44 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
  45 {
  46         unsigned long val;
  47         int error;
  48
  49         error = kstrtoul(buf, 0, &val);
  50         if (error)
  51                 return (error);
  52
  53         if (val > MAX_HRTIMEOUT_SLACK_US)
  54                 return (-EINVAL);
  55
  56         error = param_set_uint(buf, kp);
  57         if (error < 0)
  58                 return (error);
  59
  60         return (0);
  61 }
  62
  63 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
  64         param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
  65 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
  66         "schedule_hrtimeout_range() delta/slack value in us, default(0)");
  67
  68 void
  69 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
  70 {
  71         ASSERT(cvp);
  72         ASSERT(name == NULL);
  73         ASSERT(type == CV_DEFAULT);
  74         ASSERT(arg == NULL);
  75
  76         cvp->cv_magic = CV_MAGIC;
  77         init_waitqueue_head(&cvp->cv_event);
  78         init_waitqueue_head(&cvp->cv_destroy);
  79         atomic_set(&cvp->cv_waiters, 0);
  80         atomic_set(&cvp->cv_refs, 1);
  81         cvp->cv_mutex = NULL;
  82 }
  83 EXPORT_SYMBOL(__cv_init);
  84
  85 static int
  86 cv_destroy_wakeup(kcondvar_t *cvp)
  87 {
  88         if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
  89                 ASSERT(cvp->cv_mutex == NULL);
  90                 ASSERT(!waitqueue_active(&cvp->cv_event));
  91                 return (1);
  92         }
  93
  94         return (0);
  95 }
  96
  97 void
  98 __cv_destroy(kcondvar_t *cvp)
  99 {
 100         ASSERT(cvp);
 101         ASSERT(cvp->cv_magic == CV_MAGIC);
 102
 103         cvp->cv_magic = CV_DESTROY;
 104         atomic_dec(&cvp->cv_refs);
 105
 106         /* Block until all waiters are woken and references dropped. */
 107         while (cv_destroy_wakeup(cvp) == 0)
 108                 wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
 109
 110         ASSERT3P(cvp->cv_mutex, ==, NULL);
 111         ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
 112         ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
 113         ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
 114 }
 115 EXPORT_SYMBOL(__cv_destroy);
 116
 117 static void
 118 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
 119 {
 120         DEFINE_WAIT(wait);
 121         kmutex_t *m;
 122
 123         ASSERT(cvp);
 124         ASSERT(mp);
 125         ASSERT(cvp->cv_magic == CV_MAGIC);
 126         ASSERT(mutex_owned(mp));
 127         atomic_inc(&cvp->cv_refs);
 128
 129         m = READ_ONCE(cvp->cv_mutex);
 130         if (!m)
 131                 m = xchg(&cvp->cv_mutex, mp);
 132         /* Ensure the same mutex is used by all callers */
 133         ASSERT(m == NULL || m == mp);
 134
 135         prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
 136         atomic_inc(&cvp->cv_waiters);
 137
 138         /*
 139          * Mutex should be dropped after prepare_to_wait() this
 140          * ensures we're linked in to the waiters list and avoids the
 141          * race where 'cvp->cv_waiters > 0' but the list is empty.
 142          */
 143         mutex_exit(mp);
 144         if (io)
 145                 io_schedule();
 146         else
 147                 schedule();
 148
 149         /* No more waiters a different mutex could be used */
 150         if (atomic_dec_and_test(&cvp->cv_waiters)) {
 151                 /*
 152                  * This is set without any lock, so it's racy. But this is
 153                  * just for debug anyway, so make it best-effort
 154                  */
 155                 cvp->cv_mutex = NULL;
 156                 wake_up(&cvp->cv_destroy);
 157         }
 158
 159         finish_wait(&cvp->cv_event, &wait);
 160         atomic_dec(&cvp->cv_refs);
 161
 162         /*
 163          * Hold mutex after we release the cvp, otherwise we could dead lock
 164          * with a thread holding the mutex and call cv_destroy.
 165          */
 166         mutex_enter(mp);
 167 }
 168
 169 void
 170 __cv_wait(kcondvar_t *cvp, kmutex_t *mp)
 171 {
 172         cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
 173 }
 174 EXPORT_SYMBOL(__cv_wait);
 175
 176 void
 177 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
 178 {
 179         cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
 180 }
 181 EXPORT_SYMBOL(__cv_wait_io);
 182
 183 int
 184 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
 185 {
 186         cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
 187
 188         return (signal_pending(current) ? 0 : 1);
 189 }
 190 EXPORT_SYMBOL(__cv_wait_io_sig);
 191
 192 int
 193 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
 194 {
 195         cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
 196
 197         return (signal_pending(current) ? 0 : 1);
 198 }
 199 EXPORT_SYMBOL(__cv_wait_sig);
 200
 201 void
 202 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)
 203 {
 204         sigset_t blocked, saved;
 205
 206         sigfillset(&blocked);
 207         (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
 208         cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
 209         (void) sigprocmask(SIG_SETMASK, &saved, NULL);
 210 }
 211 EXPORT_SYMBOL(__cv_wait_idle);
 212
 213 #if defined(HAVE_IO_SCHEDULE_TIMEOUT)
 214 #define spl_io_schedule_timeout(t)      io_schedule_timeout(t)
 215 #else
 216
 217 struct spl_task_timer {
 218         struct timer_list timer;
 219         struct task_struct *task;
 220 };
 221
 222 static void
 223 __cv_wakeup(spl_timer_list_t t)
 224 {
 225         struct timer_list *tmr = (struct timer_list *)t;
 226         struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer);
 227
 228         wake_up_process(task_timer->task);
 229 }
 230
 231 static long
 232 spl_io_schedule_timeout(long time_left)
 233 {
 234         long expire_time = jiffies + time_left;
 235         struct spl_task_timer task_timer;
 236         struct timer_list *timer = &task_timer.timer;
 237
 238         task_timer.task = current;
 239
 240         timer_setup(timer, __cv_wakeup, 0);
 241
 242         timer->expires = expire_time;
 243         add_timer(timer);
 244
 245         io_schedule();
 246
 247         del_timer_sync(timer);
 248
 249         time_left = expire_time - jiffies;
 250
 251         return (time_left < 0 ? 0 : time_left);
 252 }
 253 #endif
 254
 255 /*
 256  * 'expire_time' argument is an absolute wall clock time in jiffies.
 257  * Return value is time left (expire_time - now) or -1 if timeout occurred.
 258  */
 259 static clock_t
 260 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
 261     int state, int io)
 262 {
 263         DEFINE_WAIT(wait);
 264         kmutex_t *m;
 265         clock_t time_left;
 266
 267         ASSERT(cvp);
 268         ASSERT(mp);
 269         ASSERT(cvp->cv_magic == CV_MAGIC);
 270         ASSERT(mutex_owned(mp));
 271
 272         /* XXX - Does not handle jiffie wrap properly */
 273         time_left = expire_time - jiffies;
 274         if (time_left <= 0)
 275                 return (-1);
 276
 277         atomic_inc(&cvp->cv_refs);
 278         m = READ_ONCE(cvp->cv_mutex);
 279         if (!m)
 280                 m = xchg(&cvp->cv_mutex, mp);
 281         /* Ensure the same mutex is used by all callers */
 282         ASSERT(m == NULL || m == mp);
 283
 284         prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
 285         atomic_inc(&cvp->cv_waiters);
 286
 287         /*
 288          * Mutex should be dropped after prepare_to_wait() this
 289          * ensures we're linked in to the waiters list and avoids the
 290          * race where 'cvp->cv_waiters > 0' but the list is empty.
 291          */
 292         mutex_exit(mp);
 293         if (io)
 294                 time_left = spl_io_schedule_timeout(time_left);
 295         else
 296                 time_left = schedule_timeout(time_left);
 297
 298         /* No more waiters a different mutex could be used */
 299         if (atomic_dec_and_test(&cvp->cv_waiters)) {
 300                 /*
 301                  * This is set without any lock, so it's racy. But this is
 302                  * just for debug anyway, so make it best-effort
 303                  */
 304                 cvp->cv_mutex = NULL;
 305                 wake_up(&cvp->cv_destroy);
 306         }
 307
 308         finish_wait(&cvp->cv_event, &wait);
 309         atomic_dec(&cvp->cv_refs);
 310
 311         /*
 312          * Hold mutex after we release the cvp, otherwise we could dead lock
 313          * with a thread holding the mutex and call cv_destroy.
 314          */
 315         mutex_enter(mp);
 316         return (time_left > 0 ? 1 : -1);
 317 }
 318
 319 int
 320 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
 321 {
 322         return (__cv_timedwait_common(cvp, mp, exp_time,
 323             TASK_UNINTERRUPTIBLE, 0));
 324 }
 325 EXPORT_SYMBOL(__cv_timedwait);
 326
 327 int
 328 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
 329 {
 330         return (__cv_timedwait_common(cvp, mp, exp_time,
 331             TASK_UNINTERRUPTIBLE, 1));
 332 }
 333 EXPORT_SYMBOL(__cv_timedwait_io);
 334
 335 int
 336 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
 337 {
 338         int rc;
 339
 340         rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);
 341         return (signal_pending(current) ? 0 : rc);
 342 }
 343 EXPORT_SYMBOL(__cv_timedwait_sig);
 344
 345 int
 346 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
 347 {
 348         sigset_t blocked, saved;
 349         int rc;
 350
 351         sigfillset(&blocked);
 352         (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
 353         rc = __cv_timedwait_common(cvp, mp, exp_time,
 354             TASK_INTERRUPTIBLE, 0);
 355         (void) sigprocmask(SIG_SETMASK, &saved, NULL);
 356
 357         return (rc);
 358 }
 359 EXPORT_SYMBOL(__cv_timedwait_idle);
 360 /*
 361  * 'expire_time' argument is an absolute clock time in nanoseconds.
 362  * Return value is time left (expire_time - now) or -1 if timeout occurred.
 363  */
 364 static clock_t
 365 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
 366     hrtime_t res, int state)
 367 {
 368         DEFINE_WAIT(wait);
 369         kmutex_t *m;
 370         hrtime_t time_left;
 371         ktime_t ktime_left;
 372         u64 slack = 0;
 373         int rc;
 374
 375         ASSERT(cvp);
 376         ASSERT(mp);
 377         ASSERT(cvp->cv_magic == CV_MAGIC);
 378         ASSERT(mutex_owned(mp));
 379
 380         time_left = expire_time - gethrtime();
 381         if (time_left <= 0)
 382                 return (-1);
 383
 384         atomic_inc(&cvp->cv_refs);
 385         m = READ_ONCE(cvp->cv_mutex);
 386         if (!m)
 387                 m = xchg(&cvp->cv_mutex, mp);
 388         /* Ensure the same mutex is used by all callers */
 389         ASSERT(m == NULL || m == mp);
 390
 391         prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
 392         atomic_inc(&cvp->cv_waiters);
 393
 394         /*
 395          * Mutex should be dropped after prepare_to_wait() this
 396          * ensures we're linked in to the waiters list and avoids the
 397          * race where 'cvp->cv_waiters > 0' but the list is empty.
 398          */
 399         mutex_exit(mp);
 400
 401         ktime_left = ktime_set(0, time_left);
 402         slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
 403             MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
 404         rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
 405
 406         /* No more waiters a different mutex could be used */
 407         if (atomic_dec_and_test(&cvp->cv_waiters)) {
 408                 /*
 409                  * This is set without any lock, so it's racy. But this is
 410                  * just for debug anyway, so make it best-effort
 411                  */
 412                 cvp->cv_mutex = NULL;
 413                 wake_up(&cvp->cv_destroy);
 414         }
 415
 416         finish_wait(&cvp->cv_event, &wait);
 417         atomic_dec(&cvp->cv_refs);
 418
 419         mutex_enter(mp);
 420         return (rc == -EINTR ? 1 : -1);
 421 }
 422
 423 /*
 424  * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
 425  */
 426 static int
 427 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
 428     hrtime_t res, int flag, int state)
 429 {
 430         if (!(flag & CALLOUT_FLAG_ABSOLUTE))
 431                 tim += gethrtime();
 432
 433         return (__cv_timedwait_hires(cvp, mp, tim, res, state));
 434 }
 435
 436 int
 437 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
 438     int flag)
 439 {
 440         return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
 441             TASK_UNINTERRUPTIBLE));
 442 }
 443 EXPORT_SYMBOL(cv_timedwait_hires);
 444
 445 int
 446 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
 447     hrtime_t res, int flag)
 448 {
 449         int rc;
 450
 451         rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
 452             TASK_INTERRUPTIBLE);
 453         return (signal_pending(current) ? 0 : rc);
 454 }
 455 EXPORT_SYMBOL(cv_timedwait_sig_hires);
 456
 457 int
 458 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
 459     hrtime_t res, int flag)
 460 {
 461         sigset_t blocked, saved;
 462         int rc;
 463
 464         sigfillset(&blocked);
 465         (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
 466         rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
 467             TASK_INTERRUPTIBLE);
 468         (void) sigprocmask(SIG_SETMASK, &saved, NULL);
 469
 470         return (rc);
 471 }
 472 EXPORT_SYMBOL(cv_timedwait_idle_hires);
 473
 474 void
 475 __cv_signal(kcondvar_t *cvp)
 476 {
 477         ASSERT(cvp);
 478         ASSERT(cvp->cv_magic == CV_MAGIC);
 479         atomic_inc(&cvp->cv_refs);
 480
 481         /*
 482          * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
 483          * waiter will be set runnable with each call to wake_up().
 484          * Additionally wake_up() holds a spin_lock associated with
 485          * the wait queue to ensure we don't race waking up processes.
 486          */
 487         if (atomic_read(&cvp->cv_waiters) > 0)
 488                 wake_up(&cvp->cv_event);
 489
 490         atomic_dec(&cvp->cv_refs);
 491 }
 492 EXPORT_SYMBOL(__cv_signal);
 493
 494 void
 495 __cv_broadcast(kcondvar_t *cvp)
 496 {
 497         ASSERT(cvp);
 498         ASSERT(cvp->cv_magic == CV_MAGIC);
 499         atomic_inc(&cvp->cv_refs);
 500
 501         /*
 502          * Wake_up_all() will wake up all waiters even those which
 503          * have the WQ_FLAG_EXCLUSIVE flag set.
 504          */
 505         if (atomic_read(&cvp->cv_waiters) > 0)
 506                 wake_up_all(&cvp->cv_event);
 507
 508         atomic_dec(&cvp->cv_refs);
 509 }
 510 EXPORT_SYMBOL(__cv_broadcast);