2 * Copyright (c) 2004 Lukas Ertl
3 * Copyright (c) 1997, 1998, 1999
4 * Nan Yang Computer Services Limited. All rights reserved.
6 * Parts written by Greg Lehey
8 * This software is distributed under the so-called ``Berkeley
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by Nan Yang Computer
23 * 4. Neither the name of the Company nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * This software is provided ``as is'', and any express or implied
28 * warranties, including, but not limited to, the implied warranties of
29 * merchantability and fitness for a particular purpose are disclaimed.
30 * In no event shall the company or contributors be liable for any
31 * direct, indirect, incidental, special, exemplary, or consequential
32 * damages (including, but not limited to, procurement of substitute
33 * goods or services; loss of use, data, or profits; or business
34 * interruption) however caused and on any theory of liability, whether
35 * in contract, strict liability, or tort (including negligence or
36 * otherwise) arising in any way out of the use of this software, even if
37 * advised of the possibility of such damage.
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
44 #include <sys/param.h>
46 #include <sys/kernel.h>
47 #include <sys/libkern.h>
48 #include <sys/malloc.h>
49 #include <sys/systm.h>
51 #include <geom/geom.h>
52 #include <geom/geom_int.h>
53 #include <geom/vinum/geom_vinum_var.h>
54 #include <geom/vinum/geom_vinum.h>
55 #include <geom/vinum/geom_vinum_share.h>
57 static off_t gv_plex_smallest_sd(struct gv_plex *, off_t);
59 /* Find the VINUM class and it's associated geom. */
70 LIST_FOREACH(mp, &g_classes, class) {
71 if (!strcmp(mp->name, "VINUM")) {
72 gp = LIST_FIRST(&mp->geom);
81 * Parse the vinum config provided in *buf and store it in *gp's softc.
82 * If parameter 'merge' is non-zero, then the given config is merged into
86 gv_parse_config(struct gv_softc *sc, u_char *buf, int merge)
88 char *aptr, *bptr, *cptr;
89 struct gv_volume *v, *v2;
90 struct gv_plex *p, *p2;
93 char *token[GV_MAXARGS];
97 KASSERT(sc != NULL, ("gv_parse_config: NULL softc"));
99 /* Until the end of the string *buf. */
100 for (aptr = buf; *aptr != '\0'; aptr = bptr) {
104 /* Seperate input lines. */
105 while (*bptr != '\n')
110 tokens = gv_tokenize(cptr, token, GV_MAXARGS);
113 if (!strcmp(token[0], "volume")) {
114 v = gv_new_volume(tokens, token);
116 printf("geom_vinum: failed volume\n");
121 v2 = gv_find_vol(sc, v->name);
129 LIST_INIT(&v->plexes);
130 LIST_INSERT_HEAD(&sc->volumes, v, volume);
132 } else if (!strcmp(token[0], "plex")) {
133 p = gv_new_plex(tokens, token);
135 printf("geom_vinum: failed plex\n");
140 p2 = gv_find_plex(sc, p->name);
148 LIST_INIT(&p->subdisks);
149 LIST_INSERT_HEAD(&sc->plexes, p, plex);
151 } else if (!strcmp(token[0], "sd")) {
152 s = gv_new_sd(tokens, token);
155 printf("geom_vinum: failed subdisk\n");
160 s2 = gv_find_sd(sc, s->name);
168 LIST_INSERT_HEAD(&sc->subdisks, s, sd);
175 * Format the vinum configuration properly. If ondisk is non-zero then the
176 * configuration is intended to be written to disk later.
179 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
189 * We don't need the drive configuration if we're not writing the
193 LIST_FOREACH(d, &sc->drives, drive) {
194 sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
199 LIST_FOREACH(v, &sc->volumes, volume) {
201 sbuf_printf(sb, "%s", prefix);
202 sbuf_printf(sb, "volume %s", v->name);
204 sbuf_printf(sb, " state %s", gv_volstate(v->state));
205 sbuf_printf(sb, "\n");
208 LIST_FOREACH(p, &sc->plexes, plex) {
210 sbuf_printf(sb, "%s", prefix);
211 sbuf_printf(sb, "plex name %s org %s ", p->name,
213 if (gv_is_striped(p))
214 sbuf_printf(sb, "%ds ", p->stripesize / 512);
215 if (p->vol_sc != NULL)
216 sbuf_printf(sb, "vol %s", p->volume);
218 sbuf_printf(sb, " state %s", gv_plexstate(p->state));
219 sbuf_printf(sb, "\n");
222 LIST_FOREACH(s, &sc->subdisks, sd) {
224 sbuf_printf(sb, "%s", prefix);
225 sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
226 "%jds", s->name, s->drive, s->size / 512,
227 s->drive_offset / 512);
228 if (s->plex_sc != NULL) {
229 sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
230 s->plex_offset / 512);
233 sbuf_printf(sb, " state %s", gv_sdstate(s->state));
234 sbuf_printf(sb, "\n");
241 gv_plex_smallest_sd(struct gv_plex *p, off_t smallest)
245 KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
247 LIST_FOREACH(s, &p->subdisks, in_plex) {
248 if (s->size < smallest)
255 gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
261 /* If this subdisk was already given to this plex, do nothing. */
265 /* Check correct size of this subdisk. */
266 s2 = LIST_FIRST(&p->subdisks);
267 if (s2 != NULL && gv_is_striped(p) && (s2->size != s->size)) {
268 printf("GEOM_VINUM: need equal sized subdisks for "
269 "this plex organisation - %s (%jd) <-> %s (%jd)\n",
270 s2->name, s2->size, s->name, s->size);
274 /* Find the correct plex offset for this subdisk, if needed. */
275 if (s->plex_offset == -1) {
277 LIST_FOREACH(s2, &p->subdisks, in_plex) {
278 if (gv_is_striped(p))
279 s->plex_offset = p->sdcount *
282 s->plex_offset = s2->plex_offset +
291 /* Adjust the size of our plex. */
294 case GV_PLEX_STRIPED:
299 p->size = (p->sdcount - 1) * gv_plex_smallest_sd(p, s->size);
306 /* There are no subdisks for this plex yet, just insert it. */
307 if (LIST_EMPTY(&p->subdisks)) {
308 LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
310 /* Insert in correct order, depending on plex_offset. */
312 LIST_FOREACH(s2, &p->subdisks, in_plex) {
313 if (s->plex_offset < s2->plex_offset) {
314 LIST_INSERT_BEFORE(s2, s, in_plex);
316 } else if (LIST_NEXT(s2, in_plex) == NULL) {
317 LIST_INSERT_AFTER(s2, s, in_plex);
329 gv_update_vol_size(struct gv_volume *v, off_t size)
332 struct g_provider *pp;
341 LIST_FOREACH(pp, &gp->provider, provider) {
342 pp->mediasize = size;
348 /* Calculates the plex size. */
350 gv_plex_size(struct gv_plex *p)
355 KASSERT(p != NULL, ("gv_plex_size: NULL p"));
360 /* Adjust the size of our plex. */
364 LIST_FOREACH(s, &p->subdisks, in_plex)
367 case GV_PLEX_STRIPED:
368 s = LIST_FIRST(&p->subdisks);
369 size = p->sdcount * s->size;
372 s = LIST_FIRST(&p->subdisks);
373 size = (p->sdcount - 1) * s->size;
380 /* Returns the size of a volume. */
382 gv_vol_size(struct gv_volume *v)
387 KASSERT(v != NULL, ("gv_vol_size: NULL v"));
389 p = LIST_FIRST(&v->plexes);
393 minplexsize = p->size;
394 LIST_FOREACH(p, &v->plexes, plex) {
395 if (p->size < minplexsize) {
396 minplexsize = p->size;
399 return (minplexsize);
403 gv_update_plex_config(struct gv_plex *p)
405 struct gv_sd *s, *s2;
407 int required_sds, state;
409 KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
411 /* This is what we want the plex to be. */
414 /* The plex was added to an already running volume. */
415 if (p->flags & GV_PLEX_ADDED)
416 state = GV_PLEX_DOWN;
419 case GV_PLEX_STRIPED:
432 if (p->sdcount < required_sds) {
433 state = GV_PLEX_DOWN;
437 * The subdisks in striped plexes must all have the same size.
439 s = LIST_FIRST(&p->subdisks);
440 LIST_FOREACH(s2, &p->subdisks, in_plex) {
441 if (s->size != s2->size) {
442 printf("geom_vinum: subdisk size mismatch "
443 "%s (%jd) <> %s (%jd)\n", s->name, s->size,
445 state = GV_PLEX_DOWN;
449 /* Trim subdisk sizes so that they match the stripe size. */
450 LIST_FOREACH(s, &p->subdisks, in_plex) {
451 remainder = s->size % p->stripesize;
453 printf("gvinum: size of sd %s is not a "
454 "multiple of plex stripesize, taking off "
455 "%jd bytes\n", s->name,
456 (intmax_t)remainder);
457 gv_adjust_freespace(s, remainder);
462 /* Adjust the size of our plex. */
463 if (p->sdcount > 0) {
467 LIST_FOREACH(s, &p->subdisks, in_plex)
471 case GV_PLEX_STRIPED:
472 s = LIST_FIRST(&p->subdisks);
473 p->size = p->sdcount * s->size;
477 s = LIST_FIRST(&p->subdisks);
478 p->size = (p->sdcount - 1) * s->size;
487 state = GV_PLEX_DOWN;
488 else if ((p->flags & GV_PLEX_ADDED) ||
489 ((p->org == GV_PLEX_RAID5) && (p->flags & GV_PLEX_NEWBORN))) {
490 LIST_FOREACH(s, &p->subdisks, in_plex)
491 s->state = GV_SD_STALE;
492 p->flags &= ~GV_PLEX_ADDED;
493 p->flags &= ~GV_PLEX_NEWBORN;
494 p->state = GV_PLEX_DOWN;
499 * Give a subdisk to a drive, check and adjust several parameters, adjust
503 gv_sd_to_drive(struct gv_softc *sc, struct gv_drive *d, struct gv_sd *s,
504 char *errstr, int errlen)
507 struct gv_freelist *fl, *fl2;
515 KASSERT(sc != NULL, ("gv_sd_to_drive: NULL softc"));
516 KASSERT(d != NULL, ("gv_sd_to_drive: NULL drive"));
517 KASSERT(s != NULL, ("gv_sd_to_drive: NULL subdisk"));
518 KASSERT(errstr != NULL, ("gv_sd_to_drive: NULL errstr"));
519 KASSERT(errlen >= ERRBUFSIZ, ("gv_sd_to_drive: short errlen (%d)",
522 /* Check if this subdisk was already given to this drive. */
523 if (s->drive_sc == d)
526 /* Preliminary checks. */
527 if (s->size > d->avail || d->freelist_entries == 0) {
528 snprintf(errstr, errlen, "not enough space on '%s' for '%s'",
533 /* No size given, autosize it. */
535 /* Find the largest available slot. */
536 LIST_FOREACH(fl, &d->freelist, freelist) {
537 if (fl->size >= s->size) {
539 s->drive_offset = fl->offset;
544 /* No good slot found? */
546 snprintf(errstr, errlen, "couldn't autosize '%s' on "
547 "'%s'", s->name, d->name);
552 * Check if we have a free slot that's large enough for the given size.
556 LIST_FOREACH(fl, &d->freelist, freelist) {
557 /* Yes, this subdisk fits. */
558 if (fl->size >= s->size) {
560 /* Assign drive offset, if not given. */
561 if (s->drive_offset == -1)
562 s->drive_offset = fl->offset;
568 /* Couldn't find a good free slot. */
570 snprintf(errstr, errlen, "free slots to small for '%s' "
571 "on '%s'", s->name, d->name);
576 /* No drive offset given, try to calculate it. */
577 if (s->drive_offset == -1) {
579 /* Add offsets and sizes from other subdisks on this drive. */
580 LIST_FOREACH(s2, &d->subdisks, from_drive) {
581 s->drive_offset = s2->drive_offset + s2->size;
585 * If there are no other subdisks yet, then set the default
586 * offset to GV_DATA_START.
588 if (s->drive_offset == -1)
589 s->drive_offset = GV_DATA_START;
591 /* Check if we have a free slot at the given drive offset. */
594 LIST_FOREACH(fl, &d->freelist, freelist) {
595 /* Yes, this subdisk fits. */
596 if ((fl->offset <= s->drive_offset) &&
597 (fl->offset + fl->size >=
598 s->drive_offset + s->size)) {
605 /* Couldn't find a good free slot. */
607 snprintf(errstr, errlen, "given drive_offset for '%s' "
608 "won't fit on '%s'", s->name, d->name);
614 * Now that all parameters are checked and set up, we can give the
615 * subdisk to the drive and adjust the freelist.
618 /* First, adjust the freelist. */
619 LIST_FOREACH(fl, &d->freelist, freelist) {
621 /* This is the free slot that we have found before. */
625 * The subdisk starts at the beginning of the free
628 if (fl->offset == s->drive_offset) {
629 fl->offset += s->size;
633 * The subdisk uses the whole slot, so remove
637 d->freelist_entries--;
638 LIST_REMOVE(fl, freelist);
641 * The subdisk does not start at the beginning of the
645 tmp = fl->offset + fl->size;
646 fl->size = s->drive_offset - fl->offset;
649 * The subdisk didn't use the complete rest of
650 * the free slot, so we need to split it.
652 if (s->drive_offset + s->size != tmp) {
653 fl2 = g_malloc(sizeof(*fl2),
655 fl2->offset = s->drive_offset + s->size;
656 fl2->size = tmp - fl2->offset;
657 LIST_INSERT_AFTER(fl, fl2, freelist);
658 d->freelist_entries++;
666 * This is the first subdisk on this drive, just insert it into the
669 if (LIST_EMPTY(&d->subdisks)) {
670 LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
672 /* There are other subdisks, so insert this one in correct order. */
674 LIST_FOREACH(s2, &d->subdisks, from_drive) {
675 if (s->drive_offset < s2->drive_offset) {
676 LIST_INSERT_BEFORE(s2, s, from_drive);
678 } else if (LIST_NEXT(s2, from_drive) == NULL) {
679 LIST_INSERT_AFTER(s2, s, from_drive);
688 /* Link back from the subdisk to this drive. */
695 gv_free_sd(struct gv_sd *s)
698 struct gv_freelist *fl, *fl2;
700 KASSERT(s != NULL, ("gv_free_sd: NULL s"));
707 * First, find the free slot that's immediately before or after this
711 LIST_FOREACH(fl, &d->freelist, freelist) {
712 if (fl->offset == s->drive_offset + s->size)
714 if (fl->offset + fl->size == s->drive_offset)
718 /* If there is no free slot behind this subdisk, so create one. */
721 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
723 fl->offset = s->drive_offset;
725 if (d->freelist_entries == 0) {
726 LIST_INSERT_HEAD(&d->freelist, fl, freelist);
728 LIST_FOREACH(fl2, &d->freelist, freelist) {
729 if (fl->offset < fl2->offset) {
730 LIST_INSERT_BEFORE(fl2, fl, freelist);
732 } else if (LIST_NEXT(fl2, freelist) == NULL) {
733 LIST_INSERT_AFTER(fl2, fl, freelist);
739 d->freelist_entries++;
741 /* Expand the free slot we just found. */
744 if (fl->offset > s->drive_offset)
745 fl->offset = s->drive_offset;
753 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
756 struct gv_freelist *fl, *fl2;
758 KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
760 KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
762 /* First, find the free slot that's immediately after this subdisk. */
764 LIST_FOREACH(fl, &d->freelist, freelist) {
765 if (fl->offset == s->drive_offset + s->size)
769 /* If there is no free slot behind this subdisk, so create one. */
772 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
773 fl->size = remainder;
774 fl->offset = s->drive_offset + s->size - remainder;
776 if (d->freelist_entries == 0) {
777 LIST_INSERT_HEAD(&d->freelist, fl, freelist);
779 LIST_FOREACH(fl2, &d->freelist, freelist) {
780 if (fl->offset < fl2->offset) {
781 LIST_INSERT_BEFORE(fl2, fl, freelist);
783 } else if (LIST_NEXT(fl2, freelist) == NULL) {
784 LIST_INSERT_AFTER(fl2, fl, freelist);
790 d->freelist_entries++;
792 /* Expand the free slot we just found. */
794 fl->offset -= remainder;
795 fl->size += remainder;
798 s->size -= remainder;
799 d->avail += remainder;
802 /* Check if the given plex is a striped one. */
804 gv_is_striped(struct gv_plex *p)
806 KASSERT(p != NULL, ("gv_is_striped: NULL p"));
808 case GV_PLEX_STRIPED:
816 /* Find a volume by name. */
818 gv_find_vol(struct gv_softc *sc, char *name)
822 LIST_FOREACH(v, &sc->volumes, volume) {
823 if (!strncmp(v->name, name, GV_MAXVOLNAME))
830 /* Find a plex by name. */
832 gv_find_plex(struct gv_softc *sc, char *name)
836 LIST_FOREACH(p, &sc->plexes, plex) {
837 if (!strncmp(p->name, name, GV_MAXPLEXNAME))
844 /* Find a subdisk by name. */
846 gv_find_sd(struct gv_softc *sc, char *name)
850 LIST_FOREACH(s, &sc->subdisks, sd) {
851 if (!strncmp(s->name, name, GV_MAXSDNAME))
858 /* Find a drive by name. */
860 gv_find_drive(struct gv_softc *sc, char *name)
864 LIST_FOREACH(d, &sc->drives, drive) {
865 if (!strncmp(d->name, name, GV_MAXDRIVENAME))
872 /* Check if any consumer of the given geom is open. */
874 gv_is_open(struct g_geom *gp)
876 struct g_consumer *cp;
881 LIST_FOREACH(cp, &gp->consumer, consumer) {
882 if (cp->acr || cp->acw || cp->ace)
889 /* Return the type of object identified by string 'name'. */
891 gv_object_type(struct gv_softc *sc, char *name)
898 LIST_FOREACH(v, &sc->volumes, volume) {
899 if (!strncmp(v->name, name, GV_MAXVOLNAME))
900 return (GV_TYPE_VOL);
903 LIST_FOREACH(p, &sc->plexes, plex) {
904 if (!strncmp(p->name, name, GV_MAXPLEXNAME))
905 return (GV_TYPE_PLEX);
908 LIST_FOREACH(s, &sc->subdisks, sd) {
909 if (!strncmp(s->name, name, GV_MAXSDNAME))
913 LIST_FOREACH(d, &sc->drives, drive) {
914 if (!strncmp(d->name, name, GV_MAXDRIVENAME))
915 return (GV_TYPE_DRIVE);
922 gv_kill_drive_thread(struct gv_drive *d)
924 if (d->flags & GV_DRIVE_THREAD_ACTIVE) {
925 d->flags |= GV_DRIVE_THREAD_DIE;
927 while (!(d->flags & GV_DRIVE_THREAD_DEAD))
928 tsleep(d, PRIBIO, "gv_die", hz);
929 d->flags &= ~GV_DRIVE_THREAD_ACTIVE;
930 d->flags &= ~GV_DRIVE_THREAD_DIE;
931 d->flags &= ~GV_DRIVE_THREAD_DEAD;
934 mtx_destroy(&d->bqueue_mtx);
939 gv_kill_plex_thread(struct gv_plex *p)
941 if (p->flags & GV_PLEX_THREAD_ACTIVE) {
942 p->flags |= GV_PLEX_THREAD_DIE;
944 while (!(p->flags & GV_PLEX_THREAD_DEAD))
945 tsleep(p, PRIBIO, "gv_die", hz);
946 p->flags &= ~GV_PLEX_THREAD_ACTIVE;
947 p->flags &= ~GV_PLEX_THREAD_DIE;
948 p->flags &= ~GV_PLEX_THREAD_DEAD;
953 mtx_destroy(&p->bqueue_mtx);
958 gv_kill_vol_thread(struct gv_volume *v)
960 if (v->flags & GV_VOL_THREAD_ACTIVE) {
961 v->flags |= GV_VOL_THREAD_DIE;
963 while (!(v->flags & GV_VOL_THREAD_DEAD))
964 tsleep(v, PRIBIO, "gv_die", hz);
965 v->flags &= ~GV_VOL_THREAD_ACTIVE;
966 v->flags &= ~GV_VOL_THREAD_DIE;
967 v->flags &= ~GV_VOL_THREAD_DEAD;
970 mtx_destroy(&v->bqueue_mtx);