2 * Copyright (c) 2004 Lukas Ertl
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/kthread.h>
35 #include <sys/libkern.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/mutex.h>
40 #include <sys/systm.h>
42 #include <geom/geom.h>
43 #include <geom/vinum/geom_vinum_var.h>
44 #include <geom/vinum/geom_vinum.h>
46 static void gv_vol_completed_request(struct gv_volume *, struct bio *);
47 static void gv_vol_normal_request(struct gv_volume *, struct bio *);
50 gv_volume_orphan(struct g_consumer *cp)
58 g_trace(G_T_TOPOLOGY, "gv_volume_orphan(%s)", gp->name);
59 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
60 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
61 error = cp->provider->error;
65 g_destroy_consumer(cp);
66 if (!LIST_EMPTY(&gp->consumer))
70 gv_kill_vol_thread(v);
74 g_wither_geom(gp, error);
77 /* We end up here after the requests to our plexes are done. */
79 gv_volume_done(struct bio *bp)
83 v = bp->bio_from->geom->softc;
84 bp->bio_cflags |= GV_BIO_DONE;
85 mtx_lock(&v->bqueue_mtx);
86 bioq_insert_tail(v->bqueue, bp);
88 mtx_unlock(&v->bqueue_mtx);
92 gv_volume_start(struct bio *bp)
103 g_io_deliver(bp, EOPNOTSUPP);
107 v = bp->bio_to->geom->softc;
108 if (v->state != GV_VOL_UP) {
109 g_io_deliver(bp, ENXIO);
113 mtx_lock(&v->bqueue_mtx);
114 bioq_disksort(v->bqueue, bp);
116 mtx_unlock(&v->bqueue_mtx);
120 gv_vol_worker(void *arg)
126 KASSERT(v != NULL, ("NULL v"));
127 mtx_lock(&v->bqueue_mtx);
129 /* We were signaled to exit. */
130 if (v->flags & GV_VOL_THREAD_DIE)
133 /* Take the first BIO from our queue. */
134 bp = bioq_takefirst(v->bqueue);
136 msleep(v, &v->bqueue_mtx, PRIBIO, "-", hz/10);
139 mtx_unlock(&v->bqueue_mtx);
141 if (bp->bio_cflags & GV_BIO_DONE)
142 gv_vol_completed_request(v, bp);
144 gv_vol_normal_request(v, bp);
146 mtx_lock(&v->bqueue_mtx);
148 mtx_unlock(&v->bqueue_mtx);
149 v->flags |= GV_VOL_THREAD_DEAD;
156 gv_vol_completed_request(struct gv_volume *v, struct bio *bp)
160 struct g_consumer *cp, *cp2;
162 pbp = bp->bio_parent;
164 if (pbp->bio_error == 0)
165 pbp->bio_error = bp->bio_error;
167 switch (pbp->bio_cmd) {
169 if (bp->bio_error == 0)
172 if (pbp->bio_cflags & GV_BIO_RETRY)
175 /* Check if we have another plex left. */
178 cp2 = LIST_NEXT(cp, consumer);
182 if (LIST_NEXT(cp2, consumer) == NULL)
183 pbp->bio_cflags |= GV_BIO_RETRY;
187 mtx_lock(&v->bqueue_mtx);
188 bioq_disksort(v->bqueue, pbp);
189 mtx_unlock(&v->bqueue_mtx);
194 /* Remember if this write request succeeded. */
195 if (bp->bio_error == 0)
196 pbp->bio_cflags |= GV_BIO_SUCCEED;
200 /* When the original request is finished, we deliver it. */
202 if (pbp->bio_inbed == pbp->bio_children) {
203 if (pbp->bio_cflags & GV_BIO_SUCCEED)
205 pbp->bio_completed = bp->bio_length;
206 g_io_deliver(pbp, pbp->bio_error);
213 gv_vol_normal_request(struct gv_volume *v, struct bio *bp)
215 struct bio_queue_head queue;
217 struct gv_plex *p, *lp;
222 switch (bp->bio_cmd) {
224 cbp = g_clone_bio(bp);
226 g_io_deliver(bp, ENOMEM);
229 cbp->bio_done = gv_volume_done;
231 * Try to find a good plex where we can send the request to.
232 * The plex either has to be up, or it's a degraded RAID5 plex.
234 lp = v->last_read_plex;
236 lp = LIST_FIRST(&v->plexes);
237 p = LIST_NEXT(lp, in_volume);
240 p = LIST_FIRST(&v->plexes);
241 if ((p->state > GV_PLEX_DEGRADED) ||
242 (p->state >= GV_PLEX_DEGRADED &&
243 p->org == GV_PLEX_RAID5))
245 p = LIST_NEXT(p, in_volume);
249 (p->org == GV_PLEX_RAID5 && p->state < GV_PLEX_DEGRADED) ||
250 (p->org != GV_PLEX_RAID5 && p->state <= GV_PLEX_DEGRADED)) {
253 g_io_deliver(bp, ENXIO);
256 g_io_request(cbp, p->consumer);
257 v->last_read_plex = p;
264 LIST_FOREACH(p, &v->plexes, in_volume) {
265 if (p->state < GV_PLEX_DEGRADED)
267 cbp = g_clone_bio(bp);
269 for (cbp = bioq_first(&queue); cbp != NULL;
270 cbp = bioq_first(&queue)) {
271 bioq_remove(&queue, cbp);
274 if (bp->bio_error == 0)
275 bp->bio_error = ENOMEM;
276 g_io_deliver(bp, bp->bio_error);
279 bioq_insert_tail(&queue, cbp);
280 cbp->bio_done = gv_volume_done;
281 cbp->bio_caller1 = p->consumer;
283 /* Fire off all sub-requests. */
284 for (cbp = bioq_first(&queue); cbp != NULL;
285 cbp = bioq_first(&queue)) {
286 bioq_remove(&queue, cbp);
287 g_io_request(cbp, cbp->bio_caller1);
294 gv_volume_access(struct g_provider *pp, int dr, int dw, int de)
297 struct g_consumer *cp, *cp2;
303 LIST_FOREACH(cp, &gp->consumer, consumer) {
304 error = g_access(cp, dr, dw, de);
306 LIST_FOREACH(cp2, &gp->consumer, consumer) {
309 g_access(cp2, -dr, -dw, -de);
317 static struct g_geom *
318 gv_volume_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
321 struct g_provider *pp2;
322 struct g_consumer *cp, *ocp;
328 g_trace(G_T_TOPOLOGY, "gv_volume_taste(%s, %s)", mp->name, pp->name);
331 /* First, find the VINUM class and its associated geom. */
332 gp = find_vinum_geom();
337 KASSERT(sc != NULL, ("gv_volume_taste: NULL sc"));
341 /* We only want to attach to plexes. */
342 if (strcmp(gp->class->name, "VINUMPLEX"))
348 /* Let's see if the volume this plex wants is already configured. */
349 v = gv_find_vol(sc, p->volume);
352 if (v->geom == NULL) {
353 gp = g_new_geomf(mp, "%s", p->volume);
354 gp->start = gv_volume_start;
355 gp->orphan = gv_volume_orphan;
356 gp->access = gv_volume_access;
362 /* Create bio queue, queue mutex, and worker thread, if necessary. */
363 if (v->bqueue == NULL) {
364 v->bqueue = g_malloc(sizeof(struct bio_queue_head),
366 bioq_init(v->bqueue);
368 if (mtx_initialized(&v->bqueue_mtx) == 0)
369 mtx_init(&v->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
371 if (!(v->flags & GV_VOL_THREAD_ACTIVE)) {
372 kthread_create(gv_vol_worker, v, NULL, 0, 0, "gv_v %s",
374 v->flags |= GV_VOL_THREAD_ACTIVE;
378 * Create a new consumer and attach it to the plex geom. Since this
379 * volume might already have a plex attached, we need to adjust the
380 * access counts of the new consumer.
382 ocp = LIST_FIRST(&gp->consumer);
383 cp = g_new_consumer(gp);
385 if ((ocp != NULL) && (ocp->acr > 0 || ocp->acw > 0 || ocp->ace > 0)) {
386 error = g_access(cp, ocp->acr, ocp->acw, ocp->ace);
388 printf("GEOM_VINUM: failed g_access %s -> %s; "
389 "errno %d\n", v->name, p->name, error);
391 g_destroy_consumer(cp);
400 if (p->vol_sc != v) {
403 LIST_INSERT_HEAD(&v->plexes, p, in_volume);
406 /* We need to setup a new VINUMVOLUME geom. */
408 pp2 = g_new_providerf(gp, "gvinum/%s", v->name);
409 pp2->mediasize = pp->mediasize;
410 pp2->sectorsize = pp->sectorsize;
411 g_error_provider(pp2, 0);
412 v->size = pp2->mediasize;
421 gv_volume_destroy_geom(struct gctl_req *req, struct g_class *mp,
426 g_trace(G_T_TOPOLOGY, "gv_volume_destroy_geom: %s", gp->name);
430 gv_kill_vol_thread(v);
431 g_wither_geom(gp, ENXIO);
435 #define VINUMVOLUME_CLASS_NAME "VINUMVOLUME"
437 static struct g_class g_vinum_volume_class = {
438 .name = VINUMVOLUME_CLASS_NAME,
439 .version = G_VERSION,
440 .taste = gv_volume_taste,
441 .destroy_geom = gv_volume_destroy_geom,
444 DECLARE_GEOM_CLASS(g_vinum_volume_class, g_vinum_volume);