sys/geom/sched/gs_rr.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3  *
   4  * Copyright (c) 2009-2010 Fabio Checconi
   5  * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
   6  * All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  */
  29
  30 /*
  31  * $Id$
  32  * $FreeBSD$
  33  *
  34  * A round-robin (RR) anticipatory scheduler, with per-client queues.
  35  *
  36  * The goal of this implementation is to improve throughput compared
  37  * to the pure elevator algorithm, and insure some fairness among
  38  * clients.
  39  *
  40  * Requests coming from the same client are put in the same queue.
  41  * We use anticipation to help reducing seeks, and each queue
  42  * is never served continuously for more than a given amount of
  43  * time or data. Queues are then served in a round-robin fashion.
  44  *
  45  * Each queue can be in any of the following states:
  46  *     READY    immediately serve the first pending request;
  47  *     BUSY     one request is under service, wait for completion;
  48  *     IDLING   do not serve incoming requests immediately, unless
  49  *              they are "eligible" as defined later.
  50  *
  51  * Scheduling is made looking at the status of all queues,
  52  * and the first one in round-robin order is privileged.
  53  */
  54
  55 #include <sys/param.h>
  56 #include <sys/systm.h>
  57 #include <sys/kernel.h>
  58 #include <sys/bio.h>
  59 #include <sys/callout.h>
  60 #include <sys/malloc.h>
  61 #include <sys/module.h>
  62 #include <sys/proc.h>
  63 #include <sys/queue.h>
  64 #include <sys/sbuf.h>
  65 #include <sys/sysctl.h>
  66 #include "gs_scheduler.h"
  67
  68 /* possible states of the scheduler */
  69 enum g_rr_state {
  70         G_QUEUE_READY = 0,      /* Ready to dispatch. */
  71         G_QUEUE_BUSY,           /* Waiting for a completion. */
  72         G_QUEUE_IDLING          /* Waiting for a new request. */
  73 };
  74
  75 /* possible queue flags */
  76 enum g_rr_flags {
  77         /* G_FLAG_COMPLETED means that the field q_slice_end is valid. */
  78         G_FLAG_COMPLETED = 1,   /* Completed a req. in the current budget. */
  79 };
  80
  81 struct g_rr_softc;
  82
  83 /*
  84  * Queue descriptor, containing reference count, scheduling
  85  * state, a queue of pending requests, configuration parameters.
  86  * Queues with pending request(s) and not under service are also
  87  * stored in a Round Robin (RR) list.
  88  */
  89 struct g_rr_queue {
  90         struct g_rr_softc *q_sc;        /* link to the parent */
  91
  92         enum g_rr_state q_status;
  93         unsigned int    q_service;      /* service received so far */
  94         int             q_slice_end;    /* actual slice end time, in ticks */
  95         enum g_rr_flags q_flags;        /* queue flags */
  96         struct bio_queue_head q_bioq;
  97
  98         /* Scheduling parameters */
  99         unsigned int    q_budget;       /* slice size in bytes */
 100         unsigned int    q_slice_duration; /* slice size in ticks */
 101         unsigned int    q_wait_ticks;   /* wait time for anticipation */
 102
 103         /* Stats to drive the various heuristics. */
 104         struct g_savg   q_thinktime;    /* Thinktime average. */
 105         struct g_savg   q_seekdist;     /* Seek distance average. */
 106
 107         int             q_bionum;       /* Number of requests. */
 108
 109         off_t           q_lastoff;      /* Last submitted req. offset. */
 110         int             q_lastsub;      /* Last submitted req. time. */
 111
 112         /* Expiration deadline for an empty queue. */
 113         int             q_expire;
 114
 115         TAILQ_ENTRY(g_rr_queue) q_tailq; /* RR list link field */
 116 };
 117
 118 /* List types. */
 119 TAILQ_HEAD(g_rr_tailq, g_rr_queue);
 120
 121 /* list of scheduler instances */
 122 LIST_HEAD(g_scheds, g_rr_softc);
 123
 124 /* Default quantum for RR between queues. */
 125 #define G_RR_DEFAULT_BUDGET     0x00800000
 126
 127 /*
 128  * Per device descriptor, holding the Round Robin list of queues
 129  * accessing the disk, a reference to the geom, and the timer.
 130  */
 131 struct g_rr_softc {
 132         struct g_geom   *sc_geom;
 133
 134         /*
 135          * sc_active is the queue we are anticipating for.
 136          * It is set only in gs_rr_next(), and possibly cleared
 137          * only in gs_rr_next() or on a timeout.
 138          * The active queue is never in the Round Robin list
 139          * even if it has requests queued.
 140          */
 141         struct g_rr_queue *sc_active;
 142         struct callout  sc_wait;        /* timer for sc_active */
 143
 144         struct g_rr_tailq sc_rr_tailq;  /* the round-robin list */
 145         int             sc_nqueues;     /* number of queues */
 146
 147         /* Statistics */
 148         int             sc_in_flight;   /* requests in the driver */
 149
 150         LIST_ENTRY(g_rr_softc)  sc_next;
 151 };
 152
 153 /* Descriptor for bounded values, min and max are constant. */
 154 struct x_bound {
 155         const int       x_min;
 156         int             x_cur;
 157         const int       x_max;
 158 };
 159
 160 /*
 161  * parameters, config and stats
 162  */
 163 struct g_rr_params {
 164         int     queues;                 /* total number of queues */
 165         int     w_anticipate;           /* anticipate writes */
 166         int     bypass;                 /* bypass scheduling writes */
 167
 168         int     units;                  /* how many instances */
 169         /* sc_head is used for debugging */
 170         struct g_scheds sc_head;        /* first scheduler instance */
 171
 172         struct x_bound queue_depth;     /* max parallel requests */
 173         struct x_bound wait_ms;         /* wait time, milliseconds */
 174         struct x_bound quantum_ms;      /* quantum size, milliseconds */
 175         struct x_bound quantum_kb;      /* quantum size, Kb (1024 bytes) */
 176
 177         /* statistics */
 178         int     wait_hit;               /* success in anticipation */
 179         int     wait_miss;              /* failure in anticipation */
 180 };
 181
 182 /*
 183  * Default parameters for the scheduler.  The quantum sizes target
 184  * a 80MB/s disk; if the hw is faster or slower the minimum of the
 185  * two will have effect: the clients will still be isolated but
 186  * the fairness may be limited.  A complete solution would involve
 187  * the on-line measurement of the actual disk throughput to derive
 188  * these parameters.  Or we may just choose to ignore service domain
 189  * fairness and accept what can be achieved with time-only budgets.
 190  */
 191 static struct g_rr_params me = {
 192         .sc_head = LIST_HEAD_INITIALIZER(&me.sc_head),
 193         .w_anticipate = 1,
 194         .queue_depth =  { 1,    1,      50 },
 195         .wait_ms =      { 1,    10,     30 },
 196         .quantum_ms =   { 1,    100,    500 },
 197         .quantum_kb =   { 16,   8192,   65536 },
 198 };
 199
 200 struct g_rr_params *gs_rr_me = &me;
 201
 202 SYSCTL_DECL(_kern_geom_sched);
 203 static SYSCTL_NODE(_kern_geom_sched, OID_AUTO, rr, CTLFLAG_RW, 0,
 204     "GEOM_SCHED ROUND ROBIN stuff");
 205 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, units, CTLFLAG_RD,
 206     &me.units, 0, "Scheduler instances");
 207 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, queues, CTLFLAG_RD,
 208     &me.queues, 0, "Total rr queues");
 209 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_ms, CTLFLAG_RW,
 210     &me.wait_ms.x_cur, 0, "Wait time milliseconds");
 211 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, quantum_ms, CTLFLAG_RW,
 212     &me.quantum_ms.x_cur, 0, "Quantum size milliseconds");
 213 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, bypass, CTLFLAG_RW,
 214     &me.bypass, 0, "Bypass scheduler");
 215 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, w_anticipate, CTLFLAG_RW,
 216     &me.w_anticipate, 0, "Do anticipation on writes");
 217 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, quantum_kb, CTLFLAG_RW,
 218     &me.quantum_kb.x_cur, 0, "Quantum size Kbytes");
 219 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, queue_depth, CTLFLAG_RW,
 220     &me.queue_depth.x_cur, 0, "Maximum simultaneous requests");
 221 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_hit, CTLFLAG_RW,
 222     &me.wait_hit, 0, "Hits in anticipation");
 223 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_miss, CTLFLAG_RW,
 224     &me.wait_miss, 0, "Misses in anticipation");
 225
 226 #ifdef DEBUG_QUEUES
 227 /* print the status of a queue */
 228 static void
 229 gs_rr_dump_q(struct g_rr_queue *qp, int index)
 230 {
 231         int l = 0;
 232         struct bio *bp;
 233
 234         TAILQ_FOREACH(bp, &(qp->q_bioq.queue), bio_queue) {
 235                 l++;
 236         }
 237         printf("--- rr queue %d %p status %d len %d ---\n",
 238             index, qp, qp->q_status, l);
 239 }
 240
 241 /*
 242  * Dump the scheduler status when writing to this sysctl variable.
 243  * XXX right now we only dump the status of the last instance created.
 244  * not a severe issue because this is only for debugging
 245  */
 246 static int
 247 gs_rr_sysctl_status(SYSCTL_HANDLER_ARGS)
 248 {
 249         int error, val = 0;
 250         struct g_rr_softc *sc;
 251
 252         error = sysctl_handle_int(oidp, &val, 0, req);
 253         if (error || !req->newptr )
 254                 return (error);
 255
 256         printf("called %s\n", __FUNCTION__);
 257
 258         LIST_FOREACH(sc, &me.sc_head, sc_next) {
 259                 int i, tot = 0;
 260                 printf("--- sc %p active %p nqueues %d "
 261                     "callout %d in_flight %d ---\n",
 262                     sc, sc->sc_active, sc->sc_nqueues,
 263                     callout_active(&sc->sc_wait),
 264                     sc->sc_in_flight);
 265                 for (i = 0; i < G_RR_HASH_SIZE; i++) {
 266                         struct g_rr_queue *qp;
 267                         LIST_FOREACH(qp, &sc->sc_hash[i], q_hash) {
 268                                 gs_rr_dump_q(qp, tot);
 269                                 tot++;
 270                         }
 271                 }
 272         }
 273         return (0);
 274 }
 275
 276 SYSCTL_PROC(_kern_geom_sched_rr, OID_AUTO, status,
 277         CTLTYPE_UINT | CTLFLAG_RW,
 278     0, sizeof(int), gs_rr_sysctl_status, "I", "status");
 279
 280 #endif  /* DEBUG_QUEUES */
 281
 282 /*
 283  * Get a bounded value, optionally convert to a min of t_min ticks.
 284  */
 285 static int
 286 get_bounded(struct x_bound *v, int t_min)
 287 {
 288         int x;
 289
 290         x = v->x_cur;
 291         if (x < v->x_min)
 292                 x = v->x_min;
 293         else if (x > v->x_max)
 294                 x = v->x_max;
 295         if (t_min) {
 296                 x = x * hz / 1000;      /* convert to ticks */
 297                 if (x < t_min)
 298                         x = t_min;
 299         }
 300         return x;
 301 }
 302
 303 /*
 304  * Get a reference to the queue for bp, using the generic
 305  * classification mechanism.
 306  */
 307 static struct g_rr_queue *
 308 g_rr_queue_get(struct g_rr_softc *sc, struct bio *bp)
 309 {
 310
 311         return (g_sched_get_class(sc->sc_geom, bp));
 312 }
 313
 314 static int
 315 g_rr_init_class(void *data, void *priv)
 316 {
 317         struct g_rr_softc *sc = data;
 318         struct g_rr_queue *qp = priv;
 319
 320         bioq_init(&qp->q_bioq);
 321
 322         /*
 323          * Set the initial parameters for the client:
 324          * slice size in bytes and ticks, and wait ticks.
 325          * Right now these are constant, but we could have
 326          * autoconfiguration code to adjust the values based on
 327          * the actual workload.
 328          */
 329         qp->q_budget = 1024 * get_bounded(&me.quantum_kb, 0);
 330         qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
 331         qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
 332
 333         qp->q_sc = sc;          /* link to the parent */
 334         qp->q_sc->sc_nqueues++;
 335         me.queues++;
 336
 337         return (0);
 338 }
 339
 340 /*
 341  * Release a reference to the queue.
 342  */
 343 static void
 344 g_rr_queue_put(struct g_rr_queue *qp)
 345 {
 346
 347         g_sched_put_class(qp->q_sc->sc_geom, qp);
 348 }
 349
 350 static void
 351 g_rr_fini_class(void *data, void *priv)
 352 {
 353         struct g_rr_queue *qp = priv;
 354
 355         KASSERT(bioq_first(&qp->q_bioq) == NULL,
 356                         ("released nonempty queue"));
 357         qp->q_sc->sc_nqueues--;
 358         me.queues--;
 359 }
 360
 361 static inline int
 362 g_rr_queue_expired(struct g_rr_queue *qp)
 363 {
 364
 365         if (qp->q_service >= qp->q_budget)
 366                 return (1);
 367
 368         if ((qp->q_flags & G_FLAG_COMPLETED) &&
 369             ticks - qp->q_slice_end >= 0)
 370                 return (1);
 371
 372         return (0);
 373 }
 374
 375 static inline int
 376 g_rr_should_anticipate(struct g_rr_queue *qp, struct bio *bp)
 377 {
 378         int wait = get_bounded(&me.wait_ms, 2);
 379
 380         if (!me.w_anticipate && (bp->bio_cmd == BIO_WRITE))
 381                 return (0);
 382
 383         if (g_savg_valid(&qp->q_thinktime) &&
 384             g_savg_read(&qp->q_thinktime) > wait)
 385                 return (0);
 386
 387         if (g_savg_valid(&qp->q_seekdist) &&
 388             g_savg_read(&qp->q_seekdist) > 8192)
 389                 return (0);
 390
 391         return (1);
 392 }
 393
 394 /*
 395  * Called on a request arrival, timeout or completion.
 396  * Try to serve a request among those queued.
 397  */
 398 static struct bio *
 399 g_rr_next(void *data, int force)
 400 {
 401         struct g_rr_softc *sc = data;
 402         struct g_rr_queue *qp;
 403         struct bio *bp, *next;
 404         int expired;
 405
 406         qp = sc->sc_active;
 407         if (me.bypass == 0 && !force) {
 408                 if (sc->sc_in_flight >= get_bounded(&me.queue_depth, 0))
 409                         return (NULL);
 410
 411                 /* Try with the queue under service first. */
 412                 if (qp != NULL && qp->q_status != G_QUEUE_READY) {
 413                         /*
 414                          * Queue is anticipating, ignore request.
 415                          * We should check that we are not past
 416                          * the timeout, but in that case the timeout
 417                          * will fire immediately afterwards so we
 418                          * don't bother.
 419                          */
 420                         return (NULL);
 421                 }
 422         } else if (qp != NULL && qp->q_status != G_QUEUE_READY) {
 423                 g_rr_queue_put(qp);
 424                 sc->sc_active = qp = NULL;
 425         }
 426
 427         /*
 428          * No queue under service, look for the first in RR order.
 429          * If we find it, select if as sc_active, clear service
 430          * and record the end time of the slice.
 431          */
 432         if (qp == NULL) {
 433                 qp = TAILQ_FIRST(&sc->sc_rr_tailq);
 434                 if (qp == NULL)
 435                         return (NULL); /* no queues at all, return */
 436                 /* otherwise select the new queue for service. */
 437                 TAILQ_REMOVE(&sc->sc_rr_tailq, qp, q_tailq);
 438                 sc->sc_active = qp;
 439                 qp->q_service = 0;
 440                 qp->q_flags &= ~G_FLAG_COMPLETED;
 441         }
 442
 443         bp = bioq_takefirst(&qp->q_bioq);       /* surely not NULL */
 444         qp->q_service += bp->bio_length;        /* charge the service */
 445
 446         /*
 447          * The request at the head of the active queue is always
 448          * dispatched, and gs_rr_next() will be called again
 449          * immediately.
 450          * We need to prepare for what to do next:
 451          *
 452          * 1. have we reached the end of the (time or service) slice ?
 453          *    If so, clear sc_active and possibly requeue the previous
 454          *    active queue if it has more requests pending;
 455          * 2. do we have more requests in sc_active ?
 456          *    If yes, do not anticipate, as gs_rr_next() will run again;
 457          *    if no, decide whether or not to anticipate depending
 458          *    on read or writes (e.g., anticipate only on reads).
 459          */
 460         expired = g_rr_queue_expired(qp);       /* are we expired ? */
 461         next = bioq_first(&qp->q_bioq); /* do we have one more ? */
 462         if (expired) {
 463                 sc->sc_active = NULL;
 464                 /* Either requeue or release reference. */
 465                 if (next != NULL)
 466                         TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
 467                 else
 468                         g_rr_queue_put(qp);
 469         } else if (next != NULL) {
 470                 qp->q_status = G_QUEUE_READY;
 471         } else {
 472                 if (!force && g_rr_should_anticipate(qp, bp)) {
 473                         /* anticipate */
 474                         qp->q_status = G_QUEUE_BUSY;
 475                 } else {
 476                         /* do not anticipate, release reference */
 477                         g_rr_queue_put(qp);
 478                         sc->sc_active = NULL;
 479                 }
 480         }
 481         /* If sc_active != NULL, its q_status is always correct. */
 482
 483         sc->sc_in_flight++;
 484
 485         return (bp);
 486 }
 487
 488 static inline void
 489 g_rr_update_thinktime(struct g_rr_queue *qp)
 490 {
 491         int delta = ticks - qp->q_lastsub, wait = get_bounded(&me.wait_ms, 2);
 492
 493         if (qp->q_sc->sc_active != qp)
 494                 return;
 495
 496         qp->q_lastsub = ticks;
 497         delta = (delta > 2 * wait) ? 2 * wait : delta;
 498         if (qp->q_bionum > 7)
 499                 g_savg_add_sample(&qp->q_thinktime, delta);
 500 }
 501
 502 static inline void
 503 g_rr_update_seekdist(struct g_rr_queue *qp, struct bio *bp)
 504 {
 505         off_t dist;
 506
 507         if (qp->q_lastoff > bp->bio_offset)
 508                 dist = qp->q_lastoff - bp->bio_offset;
 509         else
 510                 dist = bp->bio_offset - qp->q_lastoff;
 511
 512         if (dist > (8192 * 8))
 513                 dist = 8192 * 8;
 514
 515         qp->q_lastoff = bp->bio_offset + bp->bio_length;
 516
 517         if (qp->q_bionum > 7)
 518                 g_savg_add_sample(&qp->q_seekdist, dist);
 519 }
 520
 521 /*
 522  * Called when a real request for disk I/O arrives.
 523  * Locate the queue associated with the client.
 524  * If the queue is the one we are anticipating for, reset its timeout;
 525  * if the queue is not in the round robin list, insert it in the list.
 526  * On any error, do not queue the request and return -1, the caller
 527  * will take care of this request.
 528  */
 529 static int
 530 g_rr_start(void *data, struct bio *bp)
 531 {
 532         struct g_rr_softc *sc = data;
 533         struct g_rr_queue *qp;
 534
 535         if (me.bypass)
 536                 return (-1);    /* bypass the scheduler */
 537
 538         /* Get the queue for the request. */
 539         qp = g_rr_queue_get(sc, bp);
 540         if (qp == NULL)
 541                 return (-1); /* allocation failed, tell upstream */
 542
 543         if (bioq_first(&qp->q_bioq) == NULL) {
 544                 /*
 545                  * We are inserting into an empty queue.
 546                  * Reset its state if it is sc_active,
 547                  * otherwise insert it in the RR list.
 548                  */
 549                 if (qp == sc->sc_active) {
 550                         qp->q_status = G_QUEUE_READY;
 551                         callout_stop(&sc->sc_wait);
 552                 } else {
 553                         g_sched_priv_ref(qp);
 554                         TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
 555                 }
 556         }
 557
 558         qp->q_bionum = 1 + qp->q_bionum - (qp->q_bionum >> 3);
 559
 560         g_rr_update_thinktime(qp);
 561         g_rr_update_seekdist(qp, bp);
 562
 563         /* Inherit the reference returned by g_rr_queue_get(). */
 564         bp->bio_caller1 = qp;
 565         bioq_disksort(&qp->q_bioq, bp);
 566
 567         return (0);
 568 }
 569
 570 /*
 571  * Callout executed when a queue times out anticipating a new request.
 572  */
 573 static void
 574 g_rr_wait_timeout(void *data)
 575 {
 576         struct g_rr_softc *sc = data;
 577         struct g_geom *geom = sc->sc_geom;
 578
 579         g_sched_lock(geom);
 580         /*
 581          * We can race with other events, so check if
 582          * sc_active is still valid.
 583          */
 584         if (sc->sc_active != NULL) {
 585                 /* Release the reference to the queue. */
 586                 g_rr_queue_put(sc->sc_active);
 587                 sc->sc_active = NULL;
 588                 me.wait_hit--;
 589                 me.wait_miss++; /* record the miss */
 590         }
 591         g_sched_dispatch(geom);
 592         g_sched_unlock(geom);
 593 }
 594
 595 /*
 596  * Module glue: allocate descriptor, initialize its fields.
 597  */
 598 static void *
 599 g_rr_init(struct g_geom *geom)
 600 {
 601         struct g_rr_softc *sc;
 602
 603         /* XXX check whether we can sleep */
 604         sc = malloc(sizeof *sc, M_GEOM_SCHED, M_NOWAIT | M_ZERO);
 605         sc->sc_geom = geom;
 606         TAILQ_INIT(&sc->sc_rr_tailq);
 607         callout_init(&sc->sc_wait, 1);
 608         LIST_INSERT_HEAD(&me.sc_head, sc, sc_next);
 609         me.units++;
 610
 611         return (sc);
 612 }
 613
 614 /*
 615  * Module glue -- drain the callout structure, destroy the
 616  * hash table and its element, and free the descriptor.
 617  */
 618 static void
 619 g_rr_fini(void *data)
 620 {
 621         struct g_rr_softc *sc = data;
 622
 623         callout_drain(&sc->sc_wait);
 624         KASSERT(sc->sc_active == NULL, ("still a queue under service"));
 625         KASSERT(TAILQ_EMPTY(&sc->sc_rr_tailq), ("still scheduled queues"));
 626
 627         LIST_REMOVE(sc, sc_next);
 628         me.units--;
 629         free(sc, M_GEOM_SCHED);
 630 }
 631
 632 /*
 633  * Called when the request under service terminates.
 634  * Start the anticipation timer if needed.
 635  */
 636 static void
 637 g_rr_done(void *data, struct bio *bp)
 638 {
 639         struct g_rr_softc *sc = data;
 640         struct g_rr_queue *qp;
 641
 642         sc->sc_in_flight--;
 643
 644         qp = bp->bio_caller1;
 645
 646         /*
 647          * When the first request for this queue completes, update the
 648          * duration and end of the slice. We do not do it when the
 649          * slice starts to avoid charging to the queue the time for
 650          * the first seek.
 651          */
 652         if (!(qp->q_flags & G_FLAG_COMPLETED)) {
 653                 qp->q_flags |= G_FLAG_COMPLETED;
 654                 /*
 655                  * recompute the slice duration, in case we want
 656                  * to make it adaptive. This is not used right now.
 657                  * XXX should we do the same for q_quantum and q_wait_ticks ?
 658                  */
 659                 qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
 660                 qp->q_slice_end = ticks + qp->q_slice_duration;
 661         }
 662
 663         if (qp == sc->sc_active && qp->q_status == G_QUEUE_BUSY) {
 664                 /* The queue is trying anticipation, start the timer. */
 665                 qp->q_status = G_QUEUE_IDLING;
 666                 /* may make this adaptive */
 667                 qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
 668                 me.wait_hit++;
 669                 callout_reset(&sc->sc_wait, qp->q_wait_ticks,
 670                     g_rr_wait_timeout, sc);
 671         } else
 672                 g_sched_dispatch(sc->sc_geom);
 673
 674         /* Release a reference to the queue. */
 675         g_rr_queue_put(qp);
 676 }
 677
 678 static void
 679 g_rr_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 680     struct g_consumer *cp, struct g_provider *pp)
 681 {
 682         if (indent == NULL) {   /* plaintext */
 683                 sbuf_printf(sb, " units %d queues %d",
 684                         me.units, me.queues);
 685         }
 686 }
 687
 688 static struct g_gsched g_rr = {
 689         .gs_name = "rr",
 690         .gs_priv_size = sizeof(struct g_rr_queue),
 691         .gs_init = g_rr_init,
 692         .gs_fini = g_rr_fini,
 693         .gs_start = g_rr_start,
 694         .gs_done = g_rr_done,
 695         .gs_next = g_rr_next,
 696         .gs_dumpconf = g_rr_dumpconf,
 697         .gs_init_class = g_rr_init_class,
 698         .gs_fini_class = g_rr_fini_class,
 699 };
 700
 701 DECLARE_GSCHED_MODULE(rr, &g_rr);