2 * Copyright (c) 2004 Lukas Ertl
3 * Copyright (c) 1997, 1998, 1999
4 * Nan Yang Computer Services Limited. All rights reserved.
6 * Parts written by Greg Lehey
8 * This software is distributed under the so-called ``Berkeley
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by Nan Yang Computer
23 * 4. Neither the name of the Company nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * This software is provided ``as is'', and any express or implied
28 * warranties, including, but not limited to, the implied warranties of
29 * merchantability and fitness for a particular purpose are disclaimed.
30 * In no event shall the company or contributors be liable for any
31 * direct, indirect, incidental, special, exemplary, or consequential
32 * damages (including, but not limited to, procurement of substitute
33 * goods or services; loss of use, data, or profits; or business
34 * interruption) however caused and on any theory of liability, whether
35 * in contract, strict liability, or tort (including negligence or
36 * otherwise) arising in any way out of the use of this software, even if
37 * advised of the possibility of such damage.
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
44 #include <sys/param.h>
46 #include <sys/kernel.h>
47 #include <sys/libkern.h>
48 #include <sys/malloc.h>
49 #include <sys/systm.h>
51 #include <geom/geom.h>
52 #include <geom/geom_int.h>
53 #include <geom/vinum/geom_vinum_var.h>
54 #include <geom/vinum/geom_vinum.h>
55 #include <geom/vinum/geom_vinum_share.h>
57 static off_t gv_plex_smallest_sd(struct gv_plex *, off_t);
59 /* Find the VINUM class and it's associated geom. */
70 LIST_FOREACH(mp, &g_classes, class) {
71 if (!strcmp(mp->name, "VINUM")) {
72 gp = LIST_FIRST(&mp->geom);
81 * Parse the vinum config provided in *buf and store it in *gp's softc.
82 * If parameter 'merge' is non-zero, then the given config is merged into
86 gv_parse_config(struct gv_softc *sc, u_char *buf, int merge)
88 char *aptr, *bptr, *cptr;
89 struct gv_volume *v, *v2;
90 struct gv_plex *p, *p2;
93 char *token[GV_MAXARGS];
97 KASSERT(sc != NULL, ("gv_parse_config: NULL softc"));
99 /* Until the end of the string *buf. */
100 for (aptr = buf; *aptr != '\0'; aptr = bptr) {
104 /* Seperate input lines. */
105 while (*bptr != '\n')
110 tokens = gv_tokenize(cptr, token, GV_MAXARGS);
113 if (!strcmp(token[0], "volume")) {
114 v = gv_new_volume(tokens, token);
116 printf("geom_vinum: failed volume\n");
121 v2 = gv_find_vol(sc, v->name);
129 LIST_INIT(&v->plexes);
130 LIST_INSERT_HEAD(&sc->volumes, v, volume);
132 } else if (!strcmp(token[0], "plex")) {
133 p = gv_new_plex(tokens, token);
135 printf("geom_vinum: failed plex\n");
140 p2 = gv_find_plex(sc, p->name);
148 LIST_INIT(&p->subdisks);
149 LIST_INSERT_HEAD(&sc->plexes, p, plex);
151 } else if (!strcmp(token[0], "sd")) {
152 s = gv_new_sd(tokens, token);
155 printf("geom_vinum: failed subdisk\n");
160 s2 = gv_find_sd(sc, s->name);
168 LIST_INSERT_HEAD(&sc->subdisks, s, sd);
175 * Format the vinum configuration properly. If ondisk is non-zero then the
176 * configuration is intended to be written to disk later.
179 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
189 * We don't need the drive configuration if we're not writing the
193 LIST_FOREACH(d, &sc->drives, drive) {
194 sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
199 LIST_FOREACH(v, &sc->volumes, volume) {
201 sbuf_printf(sb, "%s", prefix);
202 sbuf_printf(sb, "volume %s", v->name);
204 sbuf_printf(sb, " state %s", gv_volstate(v->state));
205 sbuf_printf(sb, "\n");
208 LIST_FOREACH(p, &sc->plexes, plex) {
210 sbuf_printf(sb, "%s", prefix);
211 sbuf_printf(sb, "plex name %s org %s ", p->name,
213 if (gv_is_striped(p))
214 sbuf_printf(sb, "%ds ", p->stripesize / 512);
215 if (p->vol_sc != NULL)
216 sbuf_printf(sb, "vol %s", p->volume);
218 sbuf_printf(sb, " state %s", gv_plexstate(p->state));
219 sbuf_printf(sb, "\n");
222 LIST_FOREACH(s, &sc->subdisks, sd) {
224 sbuf_printf(sb, "%s", prefix);
225 sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
226 "%jds", s->name, s->drive, s->size / 512,
227 s->drive_offset / 512);
228 if (s->plex_sc != NULL) {
229 sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
230 s->plex_offset / 512);
233 sbuf_printf(sb, " state %s", gv_sdstate(s->state));
234 sbuf_printf(sb, "\n");
241 gv_plex_smallest_sd(struct gv_plex *p, off_t smallest)
245 KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
247 LIST_FOREACH(s, &p->subdisks, in_plex) {
248 if (s->size < smallest)
255 gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
261 /* If this subdisk was already given to this plex, do nothing. */
265 /* Check correct size of this subdisk. */
266 s2 = LIST_FIRST(&p->subdisks);
267 if (s2 != NULL && gv_is_striped(p) && (s2->size != s->size)) {
268 printf("GEOM_VINUM: need equal sized subdisks for "
269 "this plex organisation - %s (%jd) <-> %s (%jd)\n",
270 s2->name, s2->size, s->name, s->size);
274 /* Find the correct plex offset for this subdisk, if needed. */
275 if (s->plex_offset == -1) {
277 LIST_FOREACH(s2, &p->subdisks, in_plex) {
278 if (gv_is_striped(p))
279 s->plex_offset = p->sdcount *
282 s->plex_offset = s2->plex_offset +
291 /* Adjust the size of our plex. */
294 case GV_PLEX_STRIPED:
299 p->size = (p->sdcount - 1) * gv_plex_smallest_sd(p, s->size);
306 /* There are no subdisks for this plex yet, just insert it. */
307 if (LIST_EMPTY(&p->subdisks)) {
308 LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
310 /* Insert in correct order, depending on plex_offset. */
312 LIST_FOREACH(s2, &p->subdisks, in_plex) {
313 if (s->plex_offset < s2->plex_offset) {
314 LIST_INSERT_BEFORE(s2, s, in_plex);
316 } else if (LIST_NEXT(s2, in_plex) == NULL) {
317 LIST_INSERT_AFTER(s2, s, in_plex);
329 gv_update_vol_size(struct gv_volume *v, off_t size)
332 struct g_provider *pp;
341 LIST_FOREACH(pp, &gp->provider, provider) {
342 pp->mediasize = size;
348 /* Calculates the plex size. */
350 gv_plex_size(struct gv_plex *p)
355 KASSERT(p != NULL, ("gv_plex_size: NULL p"));
360 /* Adjust the size of our plex. */
364 LIST_FOREACH(s, &p->subdisks, in_plex)
367 case GV_PLEX_STRIPED:
368 s = LIST_FIRST(&p->subdisks);
369 size = p->sdcount * s->size;
372 s = LIST_FIRST(&p->subdisks);
373 size = (p->sdcount - 1) * s->size;
380 /* Returns the size of a volume. */
382 gv_vol_size(struct gv_volume *v)
387 KASSERT(v != NULL, ("gv_vol_size: NULL v"));
389 p = LIST_FIRST(&v->plexes);
393 minplexsize = p->size;
394 LIST_FOREACH(p, &v->plexes, plex) {
395 if (p->size < minplexsize) {
396 minplexsize = p->size;
399 return (minplexsize);
403 gv_update_plex_config(struct gv_plex *p)
405 struct gv_sd *s, *s2;
407 int required_sds, state;
409 KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
411 /* This is what we want the plex to be. */
414 /* The plex was added to an already running volume. */
415 if (p->flags & GV_PLEX_ADDED)
416 state = GV_PLEX_DOWN;
419 case GV_PLEX_STRIPED:
432 if (p->sdcount < required_sds) {
433 state = GV_PLEX_DOWN;
437 * The subdisks in striped plexes must all have the same size.
439 s = LIST_FIRST(&p->subdisks);
440 LIST_FOREACH(s2, &p->subdisks, in_plex) {
441 if (s->size != s2->size) {
442 printf("geom_vinum: subdisk size mismatch "
443 "%s (%jd) <> %s (%jd)\n", s->name, s->size,
445 state = GV_PLEX_DOWN;
449 /* Trim subdisk sizes so that they match the stripe size. */
450 LIST_FOREACH(s, &p->subdisks, in_plex) {
451 remainder = s->size % p->stripesize;
453 printf("gvinum: size of sd %s is not a "
454 "multiple of plex stripesize, taking off "
455 "%jd bytes\n", s->name,
456 (intmax_t)remainder);
457 gv_adjust_freespace(s, remainder);
462 /* Adjust the size of our plex. */
463 if (p->sdcount > 0) {
467 LIST_FOREACH(s, &p->subdisks, in_plex)
471 case GV_PLEX_STRIPED:
472 s = LIST_FIRST(&p->subdisks);
473 p->size = p->sdcount * s->size;
477 s = LIST_FIRST(&p->subdisks);
478 p->size = (p->sdcount - 1) * s->size;
487 state = GV_PLEX_DOWN;
488 else if ((p->flags & GV_PLEX_ADDED) ||
489 ((p->org == GV_PLEX_RAID5) && (p->flags & GV_PLEX_NEWBORN))) {
490 LIST_FOREACH(s, &p->subdisks, in_plex)
491 s->state = GV_SD_STALE;
492 p->flags &= ~GV_PLEX_ADDED;
493 p->flags &= ~GV_PLEX_NEWBORN;
494 state = GV_PLEX_DOWN;
500 * Give a subdisk to a drive, check and adjust several parameters, adjust
504 gv_sd_to_drive(struct gv_softc *sc, struct gv_drive *d, struct gv_sd *s,
505 char *errstr, int errlen)
508 struct gv_freelist *fl, *fl2;
516 KASSERT(sc != NULL, ("gv_sd_to_drive: NULL softc"));
517 KASSERT(d != NULL, ("gv_sd_to_drive: NULL drive"));
518 KASSERT(s != NULL, ("gv_sd_to_drive: NULL subdisk"));
519 KASSERT(errstr != NULL, ("gv_sd_to_drive: NULL errstr"));
520 KASSERT(errlen >= ERRBUFSIZ, ("gv_sd_to_drive: short errlen (%d)",
523 /* Check if this subdisk was already given to this drive. */
524 if (s->drive_sc == d)
527 /* Preliminary checks. */
528 if (s->size > d->avail || d->freelist_entries == 0) {
529 snprintf(errstr, errlen, "not enough space on '%s' for '%s'",
534 /* No size given, autosize it. */
536 /* Find the largest available slot. */
537 LIST_FOREACH(fl, &d->freelist, freelist) {
538 if (fl->size >= s->size) {
540 s->drive_offset = fl->offset;
545 /* No good slot found? */
547 snprintf(errstr, errlen, "couldn't autosize '%s' on "
548 "'%s'", s->name, d->name);
553 * Check if we have a free slot that's large enough for the given size.
557 LIST_FOREACH(fl, &d->freelist, freelist) {
558 /* Yes, this subdisk fits. */
559 if (fl->size >= s->size) {
561 /* Assign drive offset, if not given. */
562 if (s->drive_offset == -1)
563 s->drive_offset = fl->offset;
569 /* Couldn't find a good free slot. */
571 snprintf(errstr, errlen, "free slots to small for '%s' "
572 "on '%s'", s->name, d->name);
577 /* No drive offset given, try to calculate it. */
578 if (s->drive_offset == -1) {
580 /* Add offsets and sizes from other subdisks on this drive. */
581 LIST_FOREACH(s2, &d->subdisks, from_drive) {
582 s->drive_offset = s2->drive_offset + s2->size;
586 * If there are no other subdisks yet, then set the default
587 * offset to GV_DATA_START.
589 if (s->drive_offset == -1)
590 s->drive_offset = GV_DATA_START;
592 /* Check if we have a free slot at the given drive offset. */
595 LIST_FOREACH(fl, &d->freelist, freelist) {
596 /* Yes, this subdisk fits. */
597 if ((fl->offset <= s->drive_offset) &&
598 (fl->offset + fl->size >=
599 s->drive_offset + s->size)) {
606 /* Couldn't find a good free slot. */
608 snprintf(errstr, errlen, "given drive_offset for '%s' "
609 "won't fit on '%s'", s->name, d->name);
615 * Now that all parameters are checked and set up, we can give the
616 * subdisk to the drive and adjust the freelist.
619 /* First, adjust the freelist. */
620 LIST_FOREACH(fl, &d->freelist, freelist) {
622 /* This is the free slot that we have found before. */
626 * The subdisk starts at the beginning of the free
629 if (fl->offset == s->drive_offset) {
630 fl->offset += s->size;
634 * The subdisk uses the whole slot, so remove
638 d->freelist_entries--;
639 LIST_REMOVE(fl, freelist);
642 * The subdisk does not start at the beginning of the
646 tmp = fl->offset + fl->size;
647 fl->size = s->drive_offset - fl->offset;
650 * The subdisk didn't use the complete rest of
651 * the free slot, so we need to split it.
653 if (s->drive_offset + s->size != tmp) {
654 fl2 = g_malloc(sizeof(*fl2),
656 fl2->offset = s->drive_offset + s->size;
657 fl2->size = tmp - fl2->offset;
658 LIST_INSERT_AFTER(fl, fl2, freelist);
659 d->freelist_entries++;
667 * This is the first subdisk on this drive, just insert it into the
670 if (LIST_EMPTY(&d->subdisks)) {
671 LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
673 /* There are other subdisks, so insert this one in correct order. */
675 LIST_FOREACH(s2, &d->subdisks, from_drive) {
676 if (s->drive_offset < s2->drive_offset) {
677 LIST_INSERT_BEFORE(s2, s, from_drive);
679 } else if (LIST_NEXT(s2, from_drive) == NULL) {
680 LIST_INSERT_AFTER(s2, s, from_drive);
689 /* Link back from the subdisk to this drive. */
696 gv_free_sd(struct gv_sd *s)
699 struct gv_freelist *fl, *fl2;
701 KASSERT(s != NULL, ("gv_free_sd: NULL s"));
708 * First, find the free slot that's immediately before or after this
712 LIST_FOREACH(fl, &d->freelist, freelist) {
713 if (fl->offset == s->drive_offset + s->size)
715 if (fl->offset + fl->size == s->drive_offset)
719 /* If there is no free slot behind this subdisk, so create one. */
722 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
724 fl->offset = s->drive_offset;
726 if (d->freelist_entries == 0) {
727 LIST_INSERT_HEAD(&d->freelist, fl, freelist);
729 LIST_FOREACH(fl2, &d->freelist, freelist) {
730 if (fl->offset < fl2->offset) {
731 LIST_INSERT_BEFORE(fl2, fl, freelist);
733 } else if (LIST_NEXT(fl2, freelist) == NULL) {
734 LIST_INSERT_AFTER(fl2, fl, freelist);
740 d->freelist_entries++;
742 /* Expand the free slot we just found. */
745 if (fl->offset > s->drive_offset)
746 fl->offset = s->drive_offset;
754 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
757 struct gv_freelist *fl, *fl2;
759 KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
761 KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
763 /* First, find the free slot that's immediately after this subdisk. */
765 LIST_FOREACH(fl, &d->freelist, freelist) {
766 if (fl->offset == s->drive_offset + s->size)
770 /* If there is no free slot behind this subdisk, so create one. */
773 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
774 fl->size = remainder;
775 fl->offset = s->drive_offset + s->size - remainder;
777 if (d->freelist_entries == 0) {
778 LIST_INSERT_HEAD(&d->freelist, fl, freelist);
780 LIST_FOREACH(fl2, &d->freelist, freelist) {
781 if (fl->offset < fl2->offset) {
782 LIST_INSERT_BEFORE(fl2, fl, freelist);
784 } else if (LIST_NEXT(fl2, freelist) == NULL) {
785 LIST_INSERT_AFTER(fl2, fl, freelist);
791 d->freelist_entries++;
793 /* Expand the free slot we just found. */
795 fl->offset -= remainder;
796 fl->size += remainder;
799 s->size -= remainder;
800 d->avail += remainder;
803 /* Check if the given plex is a striped one. */
805 gv_is_striped(struct gv_plex *p)
807 KASSERT(p != NULL, ("gv_is_striped: NULL p"));
809 case GV_PLEX_STRIPED:
817 /* Find a volume by name. */
819 gv_find_vol(struct gv_softc *sc, char *name)
823 LIST_FOREACH(v, &sc->volumes, volume) {
824 if (!strncmp(v->name, name, GV_MAXVOLNAME))
831 /* Find a plex by name. */
833 gv_find_plex(struct gv_softc *sc, char *name)
837 LIST_FOREACH(p, &sc->plexes, plex) {
838 if (!strncmp(p->name, name, GV_MAXPLEXNAME))
845 /* Find a subdisk by name. */
847 gv_find_sd(struct gv_softc *sc, char *name)
851 LIST_FOREACH(s, &sc->subdisks, sd) {
852 if (!strncmp(s->name, name, GV_MAXSDNAME))
859 /* Find a drive by name. */
861 gv_find_drive(struct gv_softc *sc, char *name)
865 LIST_FOREACH(d, &sc->drives, drive) {
866 if (!strncmp(d->name, name, GV_MAXDRIVENAME))
873 /* Check if any consumer of the given geom is open. */
875 gv_is_open(struct g_geom *gp)
877 struct g_consumer *cp;
882 LIST_FOREACH(cp, &gp->consumer, consumer) {
883 if (cp->acr || cp->acw || cp->ace)
890 /* Return the type of object identified by string 'name'. */
892 gv_object_type(struct gv_softc *sc, char *name)
899 LIST_FOREACH(v, &sc->volumes, volume) {
900 if (!strncmp(v->name, name, GV_MAXVOLNAME))
901 return (GV_TYPE_VOL);
904 LIST_FOREACH(p, &sc->plexes, plex) {
905 if (!strncmp(p->name, name, GV_MAXPLEXNAME))
906 return (GV_TYPE_PLEX);
909 LIST_FOREACH(s, &sc->subdisks, sd) {
910 if (!strncmp(s->name, name, GV_MAXSDNAME))
914 LIST_FOREACH(d, &sc->drives, drive) {
915 if (!strncmp(d->name, name, GV_MAXDRIVENAME))
916 return (GV_TYPE_DRIVE);
923 gv_kill_drive_thread(struct gv_drive *d)
925 if (d->flags & GV_DRIVE_THREAD_ACTIVE) {
926 d->flags |= GV_DRIVE_THREAD_DIE;
928 while (!(d->flags & GV_DRIVE_THREAD_DEAD))
929 tsleep(d, PRIBIO, "gv_die", hz);
930 d->flags &= ~GV_DRIVE_THREAD_ACTIVE;
931 d->flags &= ~GV_DRIVE_THREAD_DIE;
932 d->flags &= ~GV_DRIVE_THREAD_DEAD;
935 mtx_destroy(&d->bqueue_mtx);
940 gv_kill_plex_thread(struct gv_plex *p)
942 if (p->flags & GV_PLEX_THREAD_ACTIVE) {
943 p->flags |= GV_PLEX_THREAD_DIE;
945 while (!(p->flags & GV_PLEX_THREAD_DEAD))
946 tsleep(p, PRIBIO, "gv_die", hz);
947 p->flags &= ~GV_PLEX_THREAD_ACTIVE;
948 p->flags &= ~GV_PLEX_THREAD_DIE;
949 p->flags &= ~GV_PLEX_THREAD_DEAD;
954 mtx_destroy(&p->bqueue_mtx);
959 gv_kill_vol_thread(struct gv_volume *v)
961 if (v->flags & GV_VOL_THREAD_ACTIVE) {
962 v->flags |= GV_VOL_THREAD_DIE;
964 while (!(v->flags & GV_VOL_THREAD_DEAD))
965 tsleep(v, PRIBIO, "gv_die", hz);
966 v->flags &= ~GV_VOL_THREAD_ACTIVE;
967 v->flags &= ~GV_VOL_THREAD_DIE;
968 v->flags &= ~GV_VOL_THREAD_DEAD;
971 mtx_destroy(&v->bqueue_mtx);