sys/geom/sched/gs_delay.c

   1 /*-
   2  * Copyright (c) 2015 Netflix, Inc.
   3  *
   4  * Derived from gs_rr.c:
   5  * Copyright (c) 2009-2010 Fabio Checconi
   6  * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
   7  * All rights reserved.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  28  * SUCH DAMAGE.
  29  */
  30
  31 /*
  32  * $Id$
  33  * $FreeBSD$
  34  *
  35  * A simple scheduler that just delays certain transactions by a certain
  36  * amount. We collect all the transactions that are 'done' and put them on
  37  * a queue. The queue is run through every so often and the transactions that
  38  * have taken longer than the threshold delay are completed.
  39  */
  40
  41 #include <sys/param.h>
  42 #include <sys/systm.h>
  43 #include <sys/kernel.h>
  44 #include <sys/bio.h>
  45 #include <sys/callout.h>
  46 #include <sys/malloc.h>
  47 #include <sys/module.h>
  48 #include <sys/proc.h>
  49 #include <sys/queue.h>
  50 #include <sys/sbuf.h>
  51 #include <sys/sysctl.h>
  52 #include "gs_scheduler.h"
  53
  54 /* Useful constants */
  55 #define BTFRAC_1US 18446744073709ULL    /* 2^64 / 1000000 */
  56
  57 /* list of scheduler instances */
  58 LIST_HEAD(g_scheds, g_delay_softc);
  59
  60 /*
  61  * Per device descriptor, holding the Round Robin list of queues
  62  * accessing the disk, a reference to the geom, and the timer.
  63  */
  64 struct g_delay_softc {
  65         struct g_geom   *sc_geom;
  66
  67         struct bio_queue_head sc_bioq;  /* queue of pending requests */
  68         struct callout  sc_wait;        /* timer for completing with delays */
  69
  70         /* Statistics */
  71         int             sc_in_flight;   /* requests in the driver */
  72 };
  73
  74 /*
  75  * parameters, config and stats
  76  */
  77 struct g_delay_params {
  78         uint64_t io;
  79         int     bypass;                 /* bypass scheduling */
  80         int     units;                  /* how many instances */
  81         int     latency;                /* How big a latncy are hoping for */
  82 };
  83
  84 static struct g_delay_params me = {
  85         .bypass = 0,
  86         .units = 0,
  87         .latency = 0,
  88         .io = 0,
  89 };
  90 struct g_delay_params *gs_delay_me = &me;
  91
  92 SYSCTL_DECL(_kern_geom_sched);
  93 static SYSCTL_NODE(_kern_geom_sched, OID_AUTO, delay, CTLFLAG_RW, 0,
  94     "GEOM_SCHED DELAY stuff");
  95 SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, bypass, CTLFLAG_RD,
  96     &me.bypass, 0, "Scheduler bypass");
  97 SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, units, CTLFLAG_RD,
  98     &me.units, 0, "Scheduler instances");
  99 SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, latency, CTLFLAG_RW,
 100     &me.latency, 0, "Minimum latency for requests, in microseconds (1/hz resolution)");
 101 SYSCTL_QUAD(_kern_geom_sched_delay, OID_AUTO, io, CTLFLAG_RW,
 102     &me.io, 0, "I/Os delayed\n");
 103
 104 static int
 105 g_delay_init_class(void *data, void *priv)
 106 {
 107         return (0);
 108 }
 109
 110 static void
 111 g_delay_fini_class(void *data, void *priv)
 112 {
 113 }
 114
 115 /*
 116  * Called on a request arrival, timeout or completion.
 117  * Try to serve a request among those queued.
 118  */
 119 static struct bio *
 120 g_delay_next(void *data, int force)
 121 {
 122         struct g_delay_softc *sc = data;
 123         struct bio *bp;
 124         struct bintime bt;
 125
 126         bp = bioq_first(&sc->sc_bioq);
 127         if (bp == NULL)
 128                 return (NULL);
 129
 130         /*
 131          * If the time isn't yet ripe for this bp to be let loose,
 132          * then the time isn't ripe for any of its friends either
 133          * since we insert in-order. Terminate if the bio hasn't
 134          * aged appropriately. Note that there's pathology here
 135          * such that we may be up to one tick early in releasing
 136          * this I/O. We could implement this up to a tick late too
 137          * but choose not to.
 138          */
 139         getbinuptime(&bt);      /* BIO's bio_t0 is uptime */
 140         if (bintime_cmp(&bp->bio_t0, &bt, >))
 141                 return (NULL);
 142         me.io++;
 143
 144         /*
 145          * The bp has mellowed enough, let it through and update stats.
 146          * If there's others, we'll catch them next time we get called.
 147          */
 148         sc->sc_in_flight++;
 149
 150         bp = bioq_takefirst(&sc->sc_bioq);
 151         return (bp);
 152 }
 153
 154 /*
 155  * Called when a real request for disk I/O arrives.
 156  * Locate the queue associated with the client.
 157  * If the queue is the one we are anticipating for, reset its timeout;
 158  * if the queue is not in the round robin list, insert it in the list.
 159  * On any error, do not queue the request and return -1, the caller
 160  * will take care of this request.
 161  */
 162 static int
 163 g_delay_start(void *data, struct bio *bp)
 164 {
 165         struct g_delay_softc *sc = data;
 166
 167         if (me.bypass)
 168                 return (-1);    /* bypass the scheduler */
 169
 170         bp->bio_caller1 = sc;
 171         getbinuptime(&bp->bio_t0);      /* BIO's bio_t0 is uptime */
 172         bintime_addx(&bp->bio_t0, BTFRAC_1US * me.latency);
 173
 174         /*
 175          * Keep the I/Os ordered. Lower layers will reorder as we release them down.
 176          * We rely on this in g_delay_next() so that we delay all things equally. Even
 177          * if we move to multiple queues to push stuff down the stack, we'll want to
 178          * insert in order and let the lower layers do whatever reordering they want.
 179          */
 180         bioq_insert_tail(&sc->sc_bioq, bp);
 181
 182         return (0);
 183 }
 184
 185 static void
 186 g_delay_timeout(void *data)
 187 {
 188         struct g_delay_softc *sc = data;
 189
 190         g_sched_lock(sc->sc_geom);
 191         g_sched_dispatch(sc->sc_geom);
 192         g_sched_unlock(sc->sc_geom);
 193         callout_reset(&sc->sc_wait, 1, g_delay_timeout, sc);
 194 }
 195
 196 /*
 197  * Module glue: allocate descriptor, initialize its fields.
 198  */
 199 static void *
 200 g_delay_init(struct g_geom *geom)
 201 {
 202         struct g_delay_softc *sc;
 203
 204         sc = malloc(sizeof *sc, M_GEOM_SCHED, M_WAITOK | M_ZERO);
 205         sc->sc_geom = geom;
 206         bioq_init(&sc->sc_bioq);
 207         callout_init(&sc->sc_wait, CALLOUT_MPSAFE);
 208         callout_reset(&sc->sc_wait, 1, g_delay_timeout, sc);
 209         me.units++;
 210
 211         return (sc);
 212 }
 213
 214 /*
 215  * Module glue -- drain the callout structure, destroy the
 216  * hash table and its element, and free the descriptor.
 217  */
 218 static void
 219 g_delay_fini(void *data)
 220 {
 221         struct g_delay_softc *sc = data;
 222
 223         /* We're force drained before getting here */
 224
 225         /* Kick out timers */
 226         callout_drain(&sc->sc_wait);
 227         me.units--;
 228         free(sc, M_GEOM_SCHED);
 229 }
 230
 231 /*
 232  * Called when the request under service terminates.
 233  * Start the anticipation timer if needed.
 234  */
 235 static void
 236 g_delay_done(void *data, struct bio *bp)
 237 {
 238         struct g_delay_softc *sc = data;
 239
 240         sc->sc_in_flight--;
 241
 242         g_sched_dispatch(sc->sc_geom);
 243 }
 244
 245 static void
 246 g_delay_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 247     struct g_consumer *cp, struct g_provider *pp)
 248 {
 249 }
 250
 251 static struct g_gsched g_delay = {
 252         .gs_name = "delay",
 253         .gs_priv_size = 0,
 254         .gs_init = g_delay_init,
 255         .gs_fini = g_delay_fini,
 256         .gs_start = g_delay_start,
 257         .gs_done = g_delay_done,
 258         .gs_next = g_delay_next,
 259         .gs_dumpconf = g_delay_dumpconf,
 260         .gs_init_class = g_delay_init_class,
 261         .gs_fini_class = g_delay_fini_class,
 262 };
 263
 264 DECLARE_GSCHED_MODULE(delay, &g_delay);