2 * Copyright (c) 2006-2007 Matthew Jacob <mjacob@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * Based upon work by Pawel Jakub Dawidek <pjd@FreeBSD.org> for all of the
28 * fine geom examples, and by Poul Henning Kamp <phk@FreeBSD.org> for GEOM
29 * itself, all of which is most gratefully acknowledged.
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/module.h>
39 #include <sys/mutex.h>
41 #include <sys/sysctl.h>
42 #include <sys/kthread.h>
43 #include <sys/malloc.h>
44 #include <geom/geom.h>
45 #include <geom/multipath/g_multipath.h>
48 SYSCTL_DECL(_kern_geom);
49 SYSCTL_NODE(_kern_geom, OID_AUTO, multipath, CTLFLAG_RW, 0,
50 "GEOM_MULTIPATH tunables");
51 static u_int g_multipath_debug = 0;
52 SYSCTL_UINT(_kern_geom_multipath, OID_AUTO, debug, CTLFLAG_RW,
53 &g_multipath_debug, 0, "Debug level");
59 } g_multipath_kt_state;
60 static struct bio_queue_head gmtbq;
61 static struct mtx gmtbq_mtx;
63 static void g_multipath_orphan(struct g_consumer *);
64 static void g_multipath_start(struct bio *);
65 static void g_multipath_done(struct bio *);
66 static void g_multipath_done_error(struct bio *);
67 static void g_multipath_kt(void *);
69 static int g_multipath_destroy(struct g_geom *);
71 g_multipath_destroy_geom(struct gctl_req *, struct g_class *, struct g_geom *);
73 static int g_multipath_rotate(struct g_geom *);
75 static g_taste_t g_multipath_taste;
76 static g_ctl_req_t g_multipath_config;
77 static g_init_t g_multipath_init;
78 static g_fini_t g_multipath_fini;
80 struct g_class g_multipath_class = {
81 .name = G_MULTIPATH_CLASS_NAME,
83 .ctlreq = g_multipath_config,
84 .taste = g_multipath_taste,
85 .destroy_geom = g_multipath_destroy_geom,
86 .init = g_multipath_init,
87 .fini = g_multipath_fini
94 g_mpd(void *arg, int flags __unused)
96 struct g_consumer *cp;
100 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
101 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
103 printf("GEOM_MULTIPATH: %s removed from %s\n",
104 cp->provider->name, cp->geom->name);
107 g_destroy_consumer(cp);
111 g_multipath_orphan(struct g_consumer *cp)
113 if ((cp->index & MP_POSTED) == 0) {
114 cp->index |= MP_POSTED;
115 printf("GEOM_MULTIPATH: %s orphaned in %s\n",
116 cp->provider->name, cp->geom->name);
122 g_multipath_start(struct bio *bp)
124 struct g_multipath_softc *sc;
126 struct g_consumer *cp;
129 gp = bp->bio_to->geom;
131 KASSERT(sc != NULL, ("NULL sc"));
134 g_io_deliver(bp, ENXIO);
137 cbp = g_clone_bio(bp);
139 g_io_deliver(bp, ENOMEM);
142 cbp->bio_done = g_multipath_done;
143 g_io_request(cbp, cp);
147 g_multipath_done(struct bio *bp)
149 if (bp->bio_error == ENXIO || bp->bio_error == EIO) {
150 mtx_lock(&gmtbq_mtx);
151 bioq_insert_tail(&gmtbq, bp);
152 wakeup(&g_multipath_kt_state);
153 mtx_unlock(&gmtbq_mtx);
160 g_multipath_done_error(struct bio *bp)
164 struct g_multipath_softc *sc;
165 struct g_consumer *cp;
166 struct g_provider *pp;
169 * If we had a failure, we have to check first to see
170 * whether the consumer it failed on was the currently
171 * active consumer (i.e., this is the first in perhaps
172 * a number of failures). If so, we then switch consumers
173 * to the next available consumer.
177 pbp = bp->bio_parent;
178 gp = pbp->bio_to->geom;
184 if (cp->nend == cp->nstart && pp->nend == pp->nstart) {
185 cp->index |= MP_POSTED;
186 g_post_event(g_mpd, cp, M_NOWAIT, NULL);
188 if (cp == sc->cp_active) {
189 struct g_consumer *lcp;
190 printf("GEOM_MULTIPATH: %s failed in %s\n",
191 pp->name, sc->sc_name);
192 sc->cp_active = NULL;
193 LIST_FOREACH(lcp, &gp->consumer, consumer) {
194 if ((lcp->index & MP_BAD) == 0) {
199 if (sc->cp_active == NULL) {
200 printf("GEOM_MULTIPATH: out of providers for %s\n",
205 printf("GEOM_MULTIPATH: %s now active path in %s\n",
206 sc->cp_active->provider->name, sc->sc_name);
212 * If we can fruitfully restart the I/O, do so.
217 g_multipath_start(pbp);
224 g_multipath_kt(void *arg)
227 g_multipath_kt_state = GKT_RUN;
228 mtx_lock(&gmtbq_mtx);
229 while (g_multipath_kt_state == GKT_RUN) {
233 bp = bioq_takefirst(&gmtbq);
236 mtx_unlock(&gmtbq_mtx);
237 g_multipath_done_error(bp);
238 mtx_lock(&gmtbq_mtx);
240 msleep(&g_multipath_kt_state, &gmtbq_mtx, PRIBIO,
241 "gkt:wait", hz / 10);
243 mtx_unlock(&gmtbq_mtx);
244 wakeup(&g_multipath_kt_state);
250 g_multipath_access(struct g_provider *pp, int dr, int dw, int de)
253 struct g_consumer *cp, *badcp = NULL;
258 LIST_FOREACH(cp, &gp->consumer, consumer) {
259 error = g_access(cp, dr, dw, de);
268 LIST_FOREACH(cp, &gp->consumer, consumer) {
271 (void) g_access(cp, -dr, -dw, -de);
276 static struct g_geom *
277 g_multipath_create(struct g_class *mp, struct g_multipath_metadata *md)
279 struct g_multipath_softc *sc;
281 struct g_provider *pp;
285 LIST_FOREACH(gp, &mp->geom, geom) {
286 if (strcmp(gp->name, md->md_name) == 0) {
287 printf("GEOM_MULTIPATH: name %s already exists\n",
293 gp = g_new_geomf(mp, md->md_name);
297 sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
299 gp->start = g_multipath_start;
300 gp->orphan = g_multipath_orphan;
301 gp->access = g_multipath_access;
302 memcpy(sc->sc_uuid, md->md_uuid, sizeof (sc->sc_uuid));
303 memcpy(sc->sc_name, md->md_name, sizeof (sc->sc_name));
305 pp = g_new_providerf(gp, "multipath/%s", md->md_name);
308 /* limit the provider to not have it stomp on metadata */
309 pp->mediasize = md->md_size - md->md_sectorsize;
310 pp->sectorsize = md->md_sectorsize;
312 g_error_provider(pp, 0);
316 if (gp->softc != NULL)
324 g_multipath_add_disk(struct g_geom *gp, struct g_provider *pp)
326 struct g_multipath_softc *sc;
327 struct g_consumer *cp, *nxtcp;
333 KASSERT(sc, ("no softc"));
336 * Make sure that the passed provider isn't already attached
338 LIST_FOREACH(cp, &gp->consumer, consumer) {
339 if (cp->provider == pp)
343 printf("GEOM_MULTIPATH: provider %s already attached to %s\n",
347 nxtcp = LIST_FIRST(&gp->consumer);
348 cp = g_new_consumer(gp);
351 error = g_attach(cp, pp);
353 printf("GEOM_MULTIPATH: cannot attach %s to %s",
354 pp->name, sc->sc_name);
355 g_destroy_consumer(cp);
362 * Set access permissions on new consumer to match other consumers
364 if (nxtcp && (nxtcp->acr + nxtcp->acw + nxtcp->ace)) {
365 error = g_access(cp, nxtcp->acr, nxtcp->acw, nxtcp->ace);
367 printf("GEOM_MULTIPATH: cannot set access in "
368 "attaching %s to %s/%s (%d)\n",
369 pp->name, sc->sc_name, sc->sc_uuid, error);
371 g_destroy_consumer(cp);
375 printf("GEOM_MULTIPATH: adding %s to %s/%s\n",
376 pp->name, sc->sc_name, sc->sc_uuid);
377 if (sc->cp_active == NULL) {
379 printf("GEOM_MULTIPATH: %s now active path in %s\n",
380 pp->name, sc->sc_name);
386 g_multipath_destroy(struct g_geom *gp)
388 struct g_provider *pp;
391 if (gp->softc == NULL)
393 pp = LIST_FIRST(&gp->provider);
394 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0))
396 printf("GEOM_MULTIPATH: destroying %s\n", gp->name);
399 g_wither_geom(gp, ENXIO);
404 g_multipath_destroy_geom(struct gctl_req *req, struct g_class *mp,
408 return (g_multipath_destroy(gp));
412 g_multipath_rotate(struct g_geom *gp)
414 struct g_consumer *lcp;
415 struct g_multipath_softc *sc = gp->softc;
420 LIST_FOREACH(lcp, &gp->consumer, consumer) {
421 if ((lcp->index & MP_BAD) == 0) {
422 if (sc->cp_active != lcp) {
429 printf("GEOM_MULTIPATH: %s now active path in %s\n",
430 lcp->provider->name, sc->sc_name);
436 g_multipath_init(struct g_class *mp)
439 mtx_init(&gmtbq_mtx, "gmtbq", NULL, MTX_DEF);
440 if (kproc_create(g_multipath_kt, mp, NULL, 0, 0, "g_mp_kt") == 0)
441 g_multipath_kt_state = GKT_RUN;
445 g_multipath_fini(struct g_class *mp)
447 if (g_multipath_kt_state == GKT_RUN) {
448 mtx_lock(&gmtbq_mtx);
449 g_multipath_kt_state = GKT_DIE;
450 wakeup(&g_multipath_kt_state);
451 msleep(&g_multipath_kt_state, &gmtbq_mtx, PRIBIO,
453 mtx_unlock(&gmtbq_mtx);
458 g_multipath_read_metadata(struct g_consumer *cp,
459 struct g_multipath_metadata *md)
461 struct g_provider *pp;
466 error = g_access(cp, 1, 0, 0);
471 buf = g_read_data(cp, pp->mediasize - pp->sectorsize,
472 pp->sectorsize, &error);
474 g_access(cp, -1, 0, 0);
477 multipath_metadata_decode(buf, md);
482 static struct g_geom *
483 g_multipath_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
485 struct g_multipath_metadata md;
486 struct g_multipath_softc *sc;
487 struct g_consumer *cp;
488 struct g_geom *gp, *gp1;
493 gp = g_new_geomf(mp, "multipath:taste");
494 gp->start = g_multipath_start;
495 gp->access = g_multipath_access;
496 gp->orphan = g_multipath_orphan;
497 cp = g_new_consumer(gp);
499 error = g_multipath_read_metadata(cp, &md);
501 g_destroy_consumer(cp);
507 if (strcmp(md.md_magic, G_MULTIPATH_MAGIC) != 0) {
508 if (g_multipath_debug)
509 printf("%s is not MULTIPATH\n", pp->name);
512 if (md.md_version != G_MULTIPATH_VERSION) {
513 printf("%s has version %d multipath id- this module is version "
514 " %d: rejecting\n", pp->name, md.md_version,
515 G_MULTIPATH_VERSION);
518 if (g_multipath_debug)
519 printf("MULTIPATH: %s/%s\n", md.md_name, md.md_uuid);
522 * Let's check if such a device already is present. We check against
523 * uuid alone first because that's the true distinguishor. If that
524 * passes, then we check for name conflicts. If there are conflicts,
527 * The whole purpose of this is to solve the problem that people don't
528 * pick good unique names, but good unique names (like uuids) are a
529 * pain to use. So, we allow people to build GEOMs with friendly names
530 * and uuids, and modify the names in case there's a collision.
533 LIST_FOREACH(gp, &mp->geom, geom) {
537 if (strncmp(md.md_uuid, sc->sc_uuid, sizeof(md.md_uuid)) == 0)
541 LIST_FOREACH(gp1, &mp->geom, geom) {
547 if (strncmp(md.md_name, sc->sc_name, sizeof(md.md_name)) == 0)
552 * If gp is NULL, we had no extant MULTIPATH geom with this uuid.
554 * If gp1 is *not* NULL, that means we have a MULTIPATH geom extant
555 * with the same name (but a different UUID).
557 * If gp is NULL, then modify the name with a random number and
558 * complain, but allow the creation of the geom to continue.
560 * If gp is *not* NULL, just use the geom's name as we're attaching
561 * this disk to the (previously generated) name.
568 u_long rand = random();
570 snprintf(buf, sizeof (buf), "%s-%lu", md.md_name, rand);
571 printf("GEOM_MULTIPATH: geom %s/%s exists already\n",
572 sc->sc_name, sc->sc_uuid);
573 printf("GEOM_MULTIPATH: %s will be (temporarily) %s\n",
575 strlcpy(md.md_name, buf, sizeof(md.md_name));
577 strlcpy(md.md_name, sc->sc_name, sizeof(md.md_name));
582 gp = g_multipath_create(mp, &md);
584 printf("GEOM_MULTIPATH: cannot create geom %s/%s\n",
585 md.md_name, md.md_uuid);
594 KASSERT(sc != NULL, ("sc is NULL"));
595 error = g_multipath_add_disk(gp, pp);
598 g_multipath_destroy(gp);
605 g_multipath_ctl_create(struct gctl_req *req, struct g_class *mp)
608 struct g_provider *pp0, *pp1;
609 struct g_multipath_metadata md;
610 const char *name, *mpname, *uuid;
611 static const char devpf[6] = "/dev/";
616 mpname = gctl_get_asciiparam(req, "arg0");
617 if (mpname == NULL) {
618 gctl_error(req, "No 'arg0' argument");
622 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
624 gctl_error(req, "No 'nargs' argument");
628 gctl_error(req, "missing device or uuid arguments");
632 name = gctl_get_asciiparam(req, "arg1");
634 gctl_error(req, "No 'arg1' argument");
637 if (strncmp(name, devpf, 5) == 0)
639 pp0 = g_provider_by_name(name);
641 gctl_error(req, "Provider %s is invalid", name);
645 name = gctl_get_asciiparam(req, "arg2");
647 gctl_error(req, "No 'arg2' argument");
650 if (strncmp(name, devpf, 5) == 0)
652 pp1 = g_provider_by_name(name);
654 gctl_error(req, "Provider %s is invalid", name);
658 uuid = gctl_get_asciiparam(req, "arg3");
660 gctl_error(req, "No uuid argument");
663 if (strlen(uuid) != 36) {
664 gctl_error(req, "Malformed uuid argument");
669 * Check to make sure parameters from the two providers are the same
672 gctl_error(req, "providers %s and %s are the same",
673 pp0->name, pp1->name);
676 if (pp0->mediasize != pp1->mediasize) {
677 gctl_error(req, "Provider %s is %jd; Provider %s is %jd",
678 pp0->name, (intmax_t) pp0->mediasize,
679 pp1->name, (intmax_t) pp1->mediasize);
682 if (pp0->sectorsize != pp1->sectorsize) {
683 gctl_error(req, "Provider %s has sectorsize %u; Provider %s "
684 "has sectorsize %u", pp0->name, pp0->sectorsize,
685 pp1->name, pp1->sectorsize);
690 * cons up enough of a metadata structure to use.
692 memset(&md, 0, sizeof(md));
693 md.md_size = pp0->mediasize;
694 md.md_sectorsize = pp0->sectorsize;
695 strlcpy(md.md_name, mpname, sizeof(md.md_name));
696 strlcpy(md.md_uuid, uuid, sizeof(md.md_uuid));
698 gp = g_multipath_create(mp, &md);
701 error = g_multipath_add_disk(gp, pp0);
703 g_multipath_destroy(gp);
706 error = g_multipath_add_disk(gp, pp1);
708 g_multipath_destroy(gp);
713 static struct g_geom *
714 g_multipath_find_geom(struct g_class *mp, const char *name)
718 LIST_FOREACH(gp, &mp->geom, geom) {
719 if (strcmp(gp->name, name) == 0) {
727 g_multipath_ctl_destroy(struct gctl_req *req, struct g_class *mp)
735 name = gctl_get_asciiparam(req, "arg0");
737 gctl_error(req, "No 'arg0' argument");
740 gp = g_multipath_find_geom(mp, name);
742 gctl_error(req, "Device %s is invalid", name);
745 error = g_multipath_destroy(gp);
747 gctl_error(req, "failed to destroy %s (err=%d)", name, error);
752 g_multipath_ctl_rotate(struct gctl_req *req, struct g_class *mp)
760 name = gctl_get_asciiparam(req, "arg0");
762 gctl_error(req, "No 'arg0' argument");
765 gp = g_multipath_find_geom(mp, name);
767 gctl_error(req, "Device %s is invalid", name);
770 error = g_multipath_rotate(gp);
772 gctl_error(req, "failed to rotate %s (err=%d)", name, error);
777 g_multipath_ctl_getactive(struct gctl_req *req, struct g_class *mp)
781 struct g_multipath_softc *sc;
784 sb = sbuf_new_auto();
787 name = gctl_get_asciiparam(req, "arg0");
789 gctl_error(req, "No 'arg0' argument");
792 gp = g_multipath_find_geom(mp, name);
794 gctl_error(req, "Device %s is invalid", name);
799 sbuf_printf(sb, "%s\n", sc->cp_active->provider->name);
801 sbuf_printf(sb, "none\n");
804 gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
809 g_multipath_config(struct gctl_req *req, struct g_class *mp, const char *verb)
813 version = gctl_get_paraml(req, "version", sizeof(*version));
814 if (version == NULL) {
815 gctl_error(req, "No 'version' argument");
816 } else if (*version != G_MULTIPATH_VERSION) {
817 gctl_error(req, "Userland and kernel parts are out of sync");
818 } else if (strcmp(verb, "create") == 0) {
819 g_multipath_ctl_create(req, mp);
820 } else if (strcmp(verb, "destroy") == 0) {
821 g_multipath_ctl_destroy(req, mp);
822 } else if (strcmp(verb, "rotate") == 0) {
823 g_multipath_ctl_rotate(req, mp);
824 } else if (strcmp(verb, "getactive") == 0) {
825 g_multipath_ctl_getactive(req, mp);
827 gctl_error(req, "Unknown verb %s", verb);
830 DECLARE_GEOM_CLASS(g_multipath_class, g_multipath);