2 * Copyright (c) 2009-2010 Fabio Checconi
3 * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * Prototypes for GEOM-based disk scheduling algorithms.
33 * See g_sched.c for generic documentation.
35 * This file is used by the kernel modules implementing the various
36 * scheduling algorithms. They should provide all the methods
37 * defined in struct g_gsched, and also invoke the macro
38 * DECLARE_GSCHED_MODULE
39 * which registers the scheduling algorithm with the geom_sched module.
41 * The various scheduling algorithms do not need to know anything
42 * about geom, they only need to handle the 'bio' requests they
43 * receive, pass them down when needed, and use the locking interface
51 #include <sys/param.h>
52 #include <sys/kernel.h>
54 #include <sys/module.h>
55 #include <sys/queue.h>
56 #include <geom/geom.h>
60 * This is the interface exported to scheduling modules.
62 * gs_init() is called when our scheduling algorithm
63 * starts being used by a geom 'sched'
65 * gs_fini() is called when the algorithm is released.
67 * gs_start() is called when a new request comes in. It should
68 * enqueue the request and return 0 if success, or return non-zero
69 * in case of failure (meaning the request is passed down).
70 * The scheduler can use bio->bio_caller1 to store a non-null
71 * pointer meaning the request is under its control.
73 * gs_next() is called in a loop by g_sched_dispatch(), right after
74 * gs_start(), or on timeouts or 'done' events. It should return
75 * immediately, either a pointer to the bio to be served or NULL
76 * if no bio should be served now. If force is specified, a
77 * work-conserving behavior is expected.
79 * gs_done() is called when a request under service completes.
80 * In turn the scheduler may decide to call the dispatch loop
81 * to serve other pending requests (or make sure there is a pending
82 * timeout to avoid stalls).
84 * gs_init_class() is called when a new client (as determined by
85 * the classifier) starts being used.
87 * gs_hash_unref() is called right before the class hashtable is
88 * destroyed; after this call, the scheduler is supposed to hold no
89 * more references to the elements in the table.
92 /* Forward declarations for prototypes. */
96 typedef void *gs_init_t (struct g_geom *geom);
97 typedef void gs_fini_t (void *data);
98 typedef int gs_start_t (void *data, struct bio *bio);
99 typedef void gs_done_t (void *data, struct bio *bio);
100 typedef struct bio *gs_next_t (void *data, int force);
101 typedef int gs_init_class_t (void *data, void *priv);
102 typedef void gs_fini_class_t (void *data, void *priv);
103 typedef void gs_hash_unref_t (void *data);
112 gs_start_t *gs_start;
115 g_dumpconf_t *gs_dumpconf;
117 gs_init_class_t *gs_init_class;
118 gs_fini_class_t *gs_fini_class;
119 gs_hash_unref_t *gs_hash_unref;
121 LIST_ENTRY(g_gsched) glist;
124 #define KTR_GSCHED KTR_SPARE4
126 MALLOC_DECLARE(M_GEOM_SCHED);
129 * Basic classification mechanism. Each request is associated to
130 * a g_sched_class, and each scheduler has the opportunity to set
131 * its own private data for the given (class, geom) pair. The
132 * private data have a base type of g_sched_private, and are
133 * extended at the end with the actual private fields of each
136 struct g_sched_class {
140 LIST_ENTRY(g_sched_class) gsc_clist;
146 * Manipulate the classifier's data. g_sched_get_class() gets a reference
147 * to the class corresponding to bp in gp, allocating and initializing
148 * it if necessary. g_sched_put_class() releases the reference.
149 * The returned value points to the private data for the class.
151 void *g_sched_get_class(struct g_geom *gp, struct bio *bp);
152 void g_sched_put_class(struct g_geom *gp, void *priv);
154 static inline struct g_sched_class *
155 g_sched_priv2class(void *priv)
158 return ((struct g_sched_class *)((u_long)priv -
159 offsetof(struct g_sched_class, gsc_priv)));
163 g_sched_priv_ref(void *priv)
165 struct g_sched_class *gsc;
167 gsc = g_sched_priv2class(priv);
172 * Locking interface. When each operation registered with the
173 * scheduler is invoked, a per-instance lock is taken to protect
174 * the data associated with it. If the scheduler needs something
175 * else to access the same data (e.g., a callout) it must use
178 void g_sched_lock(struct g_geom *gp);
179 void g_sched_unlock(struct g_geom *gp);
182 * Restart request dispatching. Must be called with the per-instance
185 void g_sched_dispatch(struct g_geom *geom);
188 * Simple gathering of statistical data, used by schedulers to collect
189 * info on process history. Just keep an exponential average of the
190 * samples, with some extra bits of precision.
194 unsigned int gs_smpl;
198 g_savg_add_sample(struct g_savg *ss, uint64_t sample)
201 /* EMA with alpha = 0.125, fixed point, 3 bits of precision. */
202 ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3);
203 ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3);
207 g_savg_valid(struct g_savg *ss)
210 /* We want at least 8 samples to deem an average as valid. */
211 return (ss->gs_smpl > 7);
214 static inline uint64_t
215 g_savg_read(struct g_savg *ss)
218 return (ss->gs_avg / ss->gs_smpl);
222 * Declaration of a scheduler module.
224 int g_gsched_modevent(module_t mod, int cmd, void *arg);
226 #define DECLARE_GSCHED_MODULE(name, gsched) \
227 static moduledata_t name##_mod = { \
232 DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \
233 MODULE_DEPEND(name, geom_sched, 0, 0, 0);
237 #endif /* _G_GSCHED_H_ */