sys/geom/sched/gs_rr.c

   1 /*-
   2  * Copyright (c) 2009-2010 Fabio Checconi
   3  * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25  * SUCH DAMAGE.
  26  */
  27
  28 /*
  29  * $Id$
  30  * $FreeBSD$
  31  *
  32  * A round-robin (RR) anticipatory scheduler, with per-client queues.
  33  *
  34  * The goal of this implementation is to improve throughput compared
  35  * to the pure elevator algorithm, and insure some fairness among
  36  * clients.
  37  *
  38  * Requests coming from the same client are put in the same queue.
  39  * We use anticipation to help reducing seeks, and each queue
  40  * is never served continuously for more than a given amount of
  41  * time or data. Queues are then served in a round-robin fashion.
  42  *
  43  * Each queue can be in any of the following states:
  44  *     READY    immediately serve the first pending request;
  45  *     BUSY     one request is under service, wait for completion;
  46  *     IDLING   do not serve incoming requests immediately, unless
  47  *              they are "eligible" as defined later.
  48  *
  49  * Scheduling is made looking at the status of all queues,
  50  * and the first one in round-robin order is privileged.
  51  */
  52
  53 #include <sys/param.h>
  54 #include <sys/systm.h>
  55 #include <sys/kernel.h>
  56 #include <sys/bio.h>
  57 #include <sys/callout.h>
  58 #include <sys/malloc.h>
  59 #include <sys/module.h>
  60 #include <sys/proc.h>
  61 #include <sys/queue.h>
  62 #include <sys/sbuf.h>
  63 #include <sys/sysctl.h>
  64 #include "gs_scheduler.h"
  65
  66 /* possible states of the scheduler */
  67 enum g_rr_state {
  68         G_QUEUE_READY = 0,      /* Ready to dispatch. */
  69         G_QUEUE_BUSY,           /* Waiting for a completion. */
  70         G_QUEUE_IDLING          /* Waiting for a new request. */
  71 };
  72
  73 /* possible queue flags */
  74 enum g_rr_flags {
  75         /* G_FLAG_COMPLETED means that the field q_slice_end is valid. */
  76         G_FLAG_COMPLETED = 1,   /* Completed a req. in the current budget. */
  77 };
  78
  79 struct g_rr_softc;
  80
  81 /*
  82  * Queue descriptor, containing reference count, scheduling
  83  * state, a queue of pending requests, configuration parameters.
  84  * Queues with pending request(s) and not under service are also
  85  * stored in a Round Robin (RR) list.
  86  */
  87 struct g_rr_queue {
  88         struct g_rr_softc *q_sc;        /* link to the parent */
  89
  90         enum g_rr_state q_status;
  91         unsigned int    q_service;      /* service received so far */
  92         int             q_slice_end;    /* actual slice end time, in ticks */
  93         enum g_rr_flags q_flags;        /* queue flags */
  94         struct bio_queue_head q_bioq;
  95
  96         /* Scheduling parameters */
  97         unsigned int    q_budget;       /* slice size in bytes */
  98         unsigned int    q_slice_duration; /* slice size in ticks */
  99         unsigned int    q_wait_ticks;   /* wait time for anticipation */
 100
 101         /* Stats to drive the various heuristics. */
 102         struct g_savg   q_thinktime;    /* Thinktime average. */
 103         struct g_savg   q_seekdist;     /* Seek distance average. */
 104
 105         int             q_bionum;       /* Number of requests. */
 106
 107         off_t           q_lastoff;      /* Last submitted req. offset. */
 108         int             q_lastsub;      /* Last submitted req. time. */
 109
 110         /* Expiration deadline for an empty queue. */
 111         int             q_expire;
 112
 113         TAILQ_ENTRY(g_rr_queue) q_tailq; /* RR list link field */
 114 };
 115
 116 /* List types. */
 117 TAILQ_HEAD(g_rr_tailq, g_rr_queue);
 118
 119 /* list of scheduler instances */
 120 LIST_HEAD(g_scheds, g_rr_softc);
 121
 122 /* Default quantum for RR between queues. */
 123 #define G_RR_DEFAULT_BUDGET     0x00800000
 124
 125 /*
 126  * Per device descriptor, holding the Round Robin list of queues
 127  * accessing the disk, a reference to the geom, and the timer.
 128  */
 129 struct g_rr_softc {
 130         struct g_geom   *sc_geom;
 131
 132         /*
 133          * sc_active is the queue we are anticipating for.
 134          * It is set only in gs_rr_next(), and possibly cleared
 135          * only in gs_rr_next() or on a timeout.
 136          * The active queue is never in the Round Robin list
 137          * even if it has requests queued.
 138          */
 139         struct g_rr_queue *sc_active;
 140         struct callout  sc_wait;        /* timer for sc_active */
 141
 142         struct g_rr_tailq sc_rr_tailq;  /* the round-robin list */
 143         int             sc_nqueues;     /* number of queues */
 144
 145         /* Statistics */
 146         int             sc_in_flight;   /* requests in the driver */
 147
 148         LIST_ENTRY(g_rr_softc)  sc_next;
 149 };
 150
 151 /* Descriptor for bounded values, min and max are constant. */
 152 struct x_bound {
 153         const int       x_min;
 154         int             x_cur;
 155         const int       x_max;
 156 };
 157
 158 /*
 159  * parameters, config and stats
 160  */
 161 struct g_rr_params {
 162         int     queues;                 /* total number of queues */
 163         int     w_anticipate;           /* anticipate writes */
 164         int     bypass;                 /* bypass scheduling writes */
 165
 166         int     units;                  /* how many instances */
 167         /* sc_head is used for debugging */
 168         struct g_scheds sc_head;        /* first scheduler instance */
 169
 170         struct x_bound queue_depth;     /* max parallel requests */
 171         struct x_bound wait_ms;         /* wait time, milliseconds */
 172         struct x_bound quantum_ms;      /* quantum size, milliseconds */
 173         struct x_bound quantum_kb;      /* quantum size, Kb (1024 bytes) */
 174
 175         /* statistics */
 176         int     wait_hit;               /* success in anticipation */
 177         int     wait_miss;              /* failure in anticipation */
 178 };
 179
 180 /*
 181  * Default parameters for the scheduler.  The quantum sizes target
 182  * a 80MB/s disk; if the hw is faster or slower the minimum of the
 183  * two will have effect: the clients will still be isolated but
 184  * the fairness may be limited.  A complete solution would involve
 185  * the on-line measurement of the actual disk throughput to derive
 186  * these parameters.  Or we may just choose to ignore service domain
 187  * fairness and accept what can be achieved with time-only budgets.
 188  */
 189 static struct g_rr_params me = {
 190         .sc_head = LIST_HEAD_INITIALIZER(&me.sc_head),
 191         .w_anticipate = 1,
 192         .queue_depth =  { 1,    1,      50 },
 193         .wait_ms =      { 1,    10,     30 },
 194         .quantum_ms =   { 1,    100,    500 },
 195         .quantum_kb =   { 16,   8192,   65536 },
 196 };
 197
 198 struct g_rr_params *gs_rr_me = &me;
 199
 200 SYSCTL_DECL(_kern_geom_sched);
 201 static SYSCTL_NODE(_kern_geom_sched, OID_AUTO, rr, CTLFLAG_RW, 0,
 202     "GEOM_SCHED ROUND ROBIN stuff");
 203 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, units, CTLFLAG_RD,
 204     &me.units, 0, "Scheduler instances");
 205 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, queues, CTLFLAG_RD,
 206     &me.queues, 0, "Total rr queues");
 207 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_ms, CTLFLAG_RW,
 208     &me.wait_ms.x_cur, 0, "Wait time milliseconds");
 209 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, quantum_ms, CTLFLAG_RW,
 210     &me.quantum_ms.x_cur, 0, "Quantum size milliseconds");
 211 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, bypass, CTLFLAG_RW,
 212     &me.bypass, 0, "Bypass scheduler");
 213 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, w_anticipate, CTLFLAG_RW,
 214     &me.w_anticipate, 0, "Do anticipation on writes");
 215 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, quantum_kb, CTLFLAG_RW,
 216     &me.quantum_kb.x_cur, 0, "Quantum size Kbytes");
 217 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, queue_depth, CTLFLAG_RW,
 218     &me.queue_depth.x_cur, 0, "Maximum simultaneous requests");
 219 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_hit, CTLFLAG_RW,
 220     &me.wait_hit, 0, "Hits in anticipation");
 221 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_miss, CTLFLAG_RW,
 222     &me.wait_miss, 0, "Misses in anticipation");
 223
 224 #ifdef DEBUG_QUEUES
 225 /* print the status of a queue */
 226 static void
 227 gs_rr_dump_q(struct g_rr_queue *qp, int index)
 228 {
 229         int l = 0;
 230         struct bio *bp;
 231
 232         TAILQ_FOREACH(bp, &(qp->q_bioq.queue), bio_queue) {
 233                 l++;
 234         }
 235         printf("--- rr queue %d %p status %d len %d ---\n",
 236             index, qp, qp->q_status, l);
 237 }
 238
 239 /*
 240  * Dump the scheduler status when writing to this sysctl variable.
 241  * XXX right now we only dump the status of the last instance created.
 242  * not a severe issue because this is only for debugging
 243  */
 244 static int
 245 gs_rr_sysctl_status(SYSCTL_HANDLER_ARGS)
 246 {
 247         int error, val = 0;
 248         struct g_rr_softc *sc;
 249
 250         error = sysctl_handle_int(oidp, &val, 0, req);
 251         if (error || !req->newptr )
 252                 return (error);
 253
 254         printf("called %s\n", __FUNCTION__);
 255
 256         LIST_FOREACH(sc, &me.sc_head, sc_next) {
 257                 int i, tot = 0;
 258                 printf("--- sc %p active %p nqueues %d "
 259                     "callout %d in_flight %d ---\n",
 260                     sc, sc->sc_active, sc->sc_nqueues,
 261                     callout_active(&sc->sc_wait),
 262                     sc->sc_in_flight);
 263                 for (i = 0; i < G_RR_HASH_SIZE; i++) {
 264                         struct g_rr_queue *qp;
 265                         LIST_FOREACH(qp, &sc->sc_hash[i], q_hash) {
 266                                 gs_rr_dump_q(qp, tot);
 267                                 tot++;
 268                         }
 269                 }
 270         }
 271         return (0);
 272 }
 273
 274 SYSCTL_PROC(_kern_geom_sched_rr, OID_AUTO, status,
 275         CTLTYPE_UINT | CTLFLAG_RW,
 276     0, sizeof(int), gs_rr_sysctl_status, "I", "status");
 277
 278 #endif  /* DEBUG_QUEUES */
 279
 280 /*
 281  * Get a bounded value, optionally convert to a min of t_min ticks.
 282  */
 283 static int
 284 get_bounded(struct x_bound *v, int t_min)
 285 {
 286         int x;
 287
 288         x = v->x_cur;
 289         if (x < v->x_min)
 290                 x = v->x_min;
 291         else if (x > v->x_max)
 292                 x = v->x_max;
 293         if (t_min) {
 294                 x = x * hz / 1000;      /* convert to ticks */
 295                 if (x < t_min)
 296                         x = t_min;
 297         }
 298         return x;
 299 }
 300
 301 /*
 302  * Get a reference to the queue for bp, using the generic
 303  * classification mechanism.
 304  */
 305 static struct g_rr_queue *
 306 g_rr_queue_get(struct g_rr_softc *sc, struct bio *bp)
 307 {
 308
 309         return (g_sched_get_class(sc->sc_geom, bp));
 310 }
 311
 312 static int
 313 g_rr_init_class(void *data, void *priv)
 314 {
 315         struct g_rr_softc *sc = data;
 316         struct g_rr_queue *qp = priv;
 317
 318         gs_bioq_init(&qp->q_bioq);
 319
 320         /*
 321          * Set the initial parameters for the client:
 322          * slice size in bytes and ticks, and wait ticks.
 323          * Right now these are constant, but we could have
 324          * autoconfiguration code to adjust the values based on
 325          * the actual workload.
 326          */
 327         qp->q_budget = 1024 * get_bounded(&me.quantum_kb, 0);
 328         qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
 329         qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
 330
 331         qp->q_sc = sc;          /* link to the parent */
 332         qp->q_sc->sc_nqueues++;
 333         me.queues++;
 334
 335         return (0);
 336 }
 337
 338 /*
 339  * Release a reference to the queue.
 340  */
 341 static void
 342 g_rr_queue_put(struct g_rr_queue *qp)
 343 {
 344
 345         g_sched_put_class(qp->q_sc->sc_geom, qp);
 346 }
 347
 348 static void
 349 g_rr_fini_class(void *data, void *priv)
 350 {
 351         struct g_rr_queue *qp = priv;
 352
 353         KASSERT(gs_bioq_first(&qp->q_bioq) == NULL,
 354                         ("released nonempty queue"));
 355         qp->q_sc->sc_nqueues--;
 356         me.queues--;
 357 }
 358
 359 static inline int
 360 g_rr_queue_expired(struct g_rr_queue *qp)
 361 {
 362
 363         if (qp->q_service >= qp->q_budget)
 364                 return (1);
 365
 366         if ((qp->q_flags & G_FLAG_COMPLETED) &&
 367             ticks - qp->q_slice_end >= 0)
 368                 return (1);
 369
 370         return (0);
 371 }
 372
 373 static inline int
 374 g_rr_should_anticipate(struct g_rr_queue *qp, struct bio *bp)
 375 {
 376         int wait = get_bounded(&me.wait_ms, 2);
 377
 378         if (!me.w_anticipate && (bp->bio_cmd & BIO_WRITE))
 379                 return (0);
 380
 381         if (g_savg_valid(&qp->q_thinktime) &&
 382             g_savg_read(&qp->q_thinktime) > wait)
 383                 return (0);
 384
 385         if (g_savg_valid(&qp->q_seekdist) &&
 386             g_savg_read(&qp->q_seekdist) > 8192)
 387                 return (0);
 388
 389         return (1);
 390 }
 391
 392 /*
 393  * Called on a request arrival, timeout or completion.
 394  * Try to serve a request among those queued.
 395  */
 396 static struct bio *
 397 g_rr_next(void *data, int force)
 398 {
 399         struct g_rr_softc *sc = data;
 400         struct g_rr_queue *qp;
 401         struct bio *bp, *next;
 402         int expired;
 403
 404         qp = sc->sc_active;
 405         if (me.bypass == 0 && !force) {
 406                 if (sc->sc_in_flight >= get_bounded(&me.queue_depth, 0))
 407                         return (NULL);
 408
 409                 /* Try with the queue under service first. */
 410                 if (qp != NULL && qp->q_status != G_QUEUE_READY) {
 411                         /*
 412                          * Queue is anticipating, ignore request.
 413                          * We should check that we are not past
 414                          * the timeout, but in that case the timeout
 415                          * will fire immediately afterwards so we
 416                          * don't bother.
 417                          */
 418                         return (NULL);
 419                 }
 420         } else if (qp != NULL && qp->q_status != G_QUEUE_READY) {
 421                 g_rr_queue_put(qp);
 422                 sc->sc_active = qp = NULL;
 423         }
 424
 425         /*
 426          * No queue under service, look for the first in RR order.
 427          * If we find it, select if as sc_active, clear service
 428          * and record the end time of the slice.
 429          */
 430         if (qp == NULL) {
 431                 qp = TAILQ_FIRST(&sc->sc_rr_tailq);
 432                 if (qp == NULL)
 433                         return (NULL); /* no queues at all, return */
 434                 /* otherwise select the new queue for service. */
 435                 TAILQ_REMOVE(&sc->sc_rr_tailq, qp, q_tailq);
 436                 sc->sc_active = qp;
 437                 qp->q_service = 0;
 438                 qp->q_flags &= ~G_FLAG_COMPLETED;
 439         }
 440
 441         bp = gs_bioq_takefirst(&qp->q_bioq);    /* surely not NULL */
 442         qp->q_service += bp->bio_length;        /* charge the service */
 443
 444         /*
 445          * The request at the head of the active queue is always
 446          * dispatched, and gs_rr_next() will be called again
 447          * immediately.
 448          * We need to prepare for what to do next:
 449          *
 450          * 1. have we reached the end of the (time or service) slice ?
 451          *    If so, clear sc_active and possibly requeue the previous
 452          *    active queue if it has more requests pending;
 453          * 2. do we have more requests in sc_active ?
 454          *    If yes, do not anticipate, as gs_rr_next() will run again;
 455          *    if no, decide whether or not to anticipate depending
 456          *    on read or writes (e.g., anticipate only on reads).
 457          */
 458         expired = g_rr_queue_expired(qp);       /* are we expired ? */
 459         next = gs_bioq_first(&qp->q_bioq);      /* do we have one more ? */
 460         if (expired) {
 461                 sc->sc_active = NULL;
 462                 /* Either requeue or release reference. */
 463                 if (next != NULL)
 464                         TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
 465                 else
 466                         g_rr_queue_put(qp);
 467         } else if (next != NULL) {
 468                 qp->q_status = G_QUEUE_READY;
 469         } else {
 470                 if (!force && g_rr_should_anticipate(qp, bp)) {
 471                         /* anticipate */
 472                         qp->q_status = G_QUEUE_BUSY;
 473                 } else {
 474                         /* do not anticipate, release reference */
 475                         g_rr_queue_put(qp);
 476                         sc->sc_active = NULL;
 477                 }
 478         }
 479         /* If sc_active != NULL, its q_status is always correct. */
 480
 481         sc->sc_in_flight++;
 482
 483         return (bp);
 484 }
 485
 486 static inline void
 487 g_rr_update_thinktime(struct g_rr_queue *qp)
 488 {
 489         int delta = ticks - qp->q_lastsub, wait = get_bounded(&me.wait_ms, 2);
 490
 491         if (qp->q_sc->sc_active != qp)
 492                 return;
 493
 494         qp->q_lastsub = ticks;
 495         delta = (delta > 2 * wait) ? 2 * wait : delta;
 496         if (qp->q_bionum > 7)
 497                 g_savg_add_sample(&qp->q_thinktime, delta);
 498 }
 499
 500 static inline void
 501 g_rr_update_seekdist(struct g_rr_queue *qp, struct bio *bp)
 502 {
 503         off_t dist;
 504
 505         if (qp->q_lastoff > bp->bio_offset)
 506                 dist = qp->q_lastoff - bp->bio_offset;
 507         else
 508                 dist = bp->bio_offset - qp->q_lastoff;
 509
 510         if (dist > (8192 * 8))
 511                 dist = 8192 * 8;
 512
 513         qp->q_lastoff = bp->bio_offset + bp->bio_length;
 514
 515         if (qp->q_bionum > 7)
 516                 g_savg_add_sample(&qp->q_seekdist, dist);
 517 }
 518
 519 /*
 520  * Called when a real request for disk I/O arrives.
 521  * Locate the queue associated with the client.
 522  * If the queue is the one we are anticipating for, reset its timeout;
 523  * if the queue is not in the round robin list, insert it in the list.
 524  * On any error, do not queue the request and return -1, the caller
 525  * will take care of this request.
 526  */
 527 static int
 528 g_rr_start(void *data, struct bio *bp)
 529 {
 530         struct g_rr_softc *sc = data;
 531         struct g_rr_queue *qp;
 532
 533         if (me.bypass)
 534                 return (-1);    /* bypass the scheduler */
 535
 536         /* Get the queue for the request. */
 537         qp = g_rr_queue_get(sc, bp);
 538         if (qp == NULL)
 539                 return (-1); /* allocation failed, tell upstream */
 540
 541         if (gs_bioq_first(&qp->q_bioq) == NULL) {
 542                 /*
 543                  * We are inserting into an empty queue.
 544                  * Reset its state if it is sc_active,
 545                  * otherwise insert it in the RR list.
 546                  */
 547                 if (qp == sc->sc_active) {
 548                         qp->q_status = G_QUEUE_READY;
 549                         callout_stop(&sc->sc_wait);
 550                 } else {
 551                         g_sched_priv_ref(qp);
 552                         TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
 553                 }
 554         }
 555
 556         qp->q_bionum = 1 + qp->q_bionum - (qp->q_bionum >> 3);
 557
 558         g_rr_update_thinktime(qp);
 559         g_rr_update_seekdist(qp, bp);
 560
 561         /* Inherit the reference returned by g_rr_queue_get(). */
 562         bp->bio_caller1 = qp;
 563         gs_bioq_disksort(&qp->q_bioq, bp);
 564
 565         return (0);
 566 }
 567
 568 /*
 569  * Callout executed when a queue times out anticipating a new request.
 570  */
 571 static void
 572 g_rr_wait_timeout(void *data)
 573 {
 574         struct g_rr_softc *sc = data;
 575         struct g_geom *geom = sc->sc_geom;
 576
 577         g_sched_lock(geom);
 578         /*
 579          * We can race with other events, so check if
 580          * sc_active is still valid.
 581          */
 582         if (sc->sc_active != NULL) {
 583                 /* Release the reference to the queue. */
 584                 g_rr_queue_put(sc->sc_active);
 585                 sc->sc_active = NULL;
 586                 me.wait_hit--;
 587                 me.wait_miss++; /* record the miss */
 588         }
 589         g_sched_dispatch(geom);
 590         g_sched_unlock(geom);
 591 }
 592
 593 /*
 594  * Module glue: allocate descriptor, initialize its fields.
 595  */
 596 static void *
 597 g_rr_init(struct g_geom *geom)
 598 {
 599         struct g_rr_softc *sc;
 600
 601         /* XXX check whether we can sleep */
 602         sc = malloc(sizeof *sc, M_GEOM_SCHED, M_NOWAIT | M_ZERO);
 603         sc->sc_geom = geom;
 604         TAILQ_INIT(&sc->sc_rr_tailq);
 605         callout_init(&sc->sc_wait, CALLOUT_MPSAFE);
 606         LIST_INSERT_HEAD(&me.sc_head, sc, sc_next);
 607         me.units++;
 608
 609         return (sc);
 610 }
 611
 612 /*
 613  * Module glue -- drain the callout structure, destroy the
 614  * hash table and its element, and free the descriptor.
 615  */
 616 static void
 617 g_rr_fini(void *data)
 618 {
 619         struct g_rr_softc *sc = data;
 620
 621         callout_drain(&sc->sc_wait);
 622         KASSERT(sc->sc_active == NULL, ("still a queue under service"));
 623         KASSERT(TAILQ_EMPTY(&sc->sc_rr_tailq), ("still scheduled queues"));
 624
 625         LIST_REMOVE(sc, sc_next);
 626         me.units--;
 627         free(sc, M_GEOM_SCHED);
 628 }
 629
 630 /*
 631  * Called when the request under service terminates.
 632  * Start the anticipation timer if needed.
 633  */
 634 static void
 635 g_rr_done(void *data, struct bio *bp)
 636 {
 637         struct g_rr_softc *sc = data;
 638         struct g_rr_queue *qp;
 639
 640         sc->sc_in_flight--;
 641
 642         qp = bp->bio_caller1;
 643
 644         /*
 645          * When the first request for this queue completes, update the
 646          * duration and end of the slice. We do not do it when the
 647          * slice starts to avoid charging to the queue the time for
 648          * the first seek.
 649          */
 650         if (!(qp->q_flags & G_FLAG_COMPLETED)) {
 651                 qp->q_flags |= G_FLAG_COMPLETED;
 652                 /*
 653                  * recompute the slice duration, in case we want
 654                  * to make it adaptive. This is not used right now.
 655                  * XXX should we do the same for q_quantum and q_wait_ticks ?
 656                  */
 657                 qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
 658                 qp->q_slice_end = ticks + qp->q_slice_duration;
 659         }
 660
 661         if (qp == sc->sc_active && qp->q_status == G_QUEUE_BUSY) {
 662                 /* The queue is trying anticipation, start the timer. */
 663                 qp->q_status = G_QUEUE_IDLING;
 664                 /* may make this adaptive */
 665                 qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
 666                 me.wait_hit++;
 667                 callout_reset(&sc->sc_wait, qp->q_wait_ticks,
 668                     g_rr_wait_timeout, sc);
 669         } else
 670                 g_sched_dispatch(sc->sc_geom);
 671
 672         /* Release a reference to the queue. */
 673         g_rr_queue_put(qp);
 674 }
 675
 676 static void
 677 g_rr_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 678     struct g_consumer *cp, struct g_provider *pp)
 679 {
 680         if (indent == NULL) {   /* plaintext */
 681                 sbuf_printf(sb, " units %d queues %d",
 682                         me.units, me.queues);
 683         }
 684 }
 685
 686 static struct g_gsched g_rr = {
 687         .gs_name = "rr",
 688         .gs_priv_size = sizeof(struct g_rr_queue),
 689         .gs_init = g_rr_init,
 690         .gs_fini = g_rr_fini,
 691         .gs_start = g_rr_start,
 692         .gs_done = g_rr_done,
 693         .gs_next = g_rr_next,
 694         .gs_dumpconf = g_rr_dumpconf,
 695         .gs_init_class = g_rr_init_class,
 696         .gs_fini_class = g_rr_fini_class,
 697 };
 698
 699 DECLARE_GSCHED_MODULE(rr, &g_rr);