]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/geom/vinum/geom_vinum_subr.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / geom / vinum / geom_vinum_subr.c
1 /*-
2  * Copyright (c) 2004, 2007 Lukas Ertl
3  * Copyright (c) 2007, 2009 Ulf Lilleengen
4  * Copyright (c) 1997, 1998, 1999
5  *      Nan Yang Computer Services Limited.  All rights reserved.
6  *
7  *  Parts written by Greg Lehey
8  *
9  *  This software is distributed under the so-called ``Berkeley
10  *  License'':
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *      This product includes software developed by Nan Yang Computer
23  *      Services Limited.
24  * 4. Neither the name of the Company nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * This software is provided ``as is'', and any express or implied
29  * warranties, including, but not limited to, the implied warranties of
30  * merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall the company or contributors be liable for any
32  * direct, indirect, incidental, special, exemplary, or consequential
33  * damages (including, but not limited to, procurement of substitute
34  * goods or services; loss of use, data, or profits; or business
35  * interruption) however caused and on any theory of liability, whether
36  * in contract, strict liability, or tort (including negligence or
37  * otherwise) arising in any way out of the use of this software, even if
38  * advised of the possibility of such damage.
39  *
40  */
41
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
44
45 #include <sys/param.h>
46 #include <sys/malloc.h>
47 #include <sys/sbuf.h>
48 #include <sys/systm.h>
49
50 #include <geom/geom.h>
51 #include <geom/vinum/geom_vinum_var.h>
52 #include <geom/vinum/geom_vinum.h>
53 #include <geom/vinum/geom_vinum_share.h>
54
55 int     gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
56 static off_t gv_plex_smallest_sd(struct gv_plex *);
57
58 void
59 gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
60 {
61         char *aptr, *bptr, *cptr;
62         struct gv_volume *v, *v2;
63         struct gv_plex *p, *p2;
64         struct gv_sd *s, *s2;
65         int error, is_newer, tokens;
66         char *token[GV_MAXARGS];
67
68         is_newer = gv_drive_is_newer(sc, d);
69
70         /* Until the end of the string *buf. */
71         for (aptr = buf; *aptr != '\0'; aptr = bptr) {
72                 bptr = aptr;
73                 cptr = aptr;
74
75                 /* Seperate input lines. */
76                 while (*bptr != '\n')
77                         bptr++;
78                 *bptr = '\0';
79                 bptr++;
80
81                 tokens = gv_tokenize(cptr, token, GV_MAXARGS);
82
83                 if (tokens <= 0)
84                         continue;
85
86                 if (!strcmp(token[0], "volume")) {
87                         v = gv_new_volume(tokens, token);
88                         if (v == NULL) {
89                                 G_VINUM_DEBUG(0, "config parse failed volume");
90                                 break;
91                         }
92
93                         v2 = gv_find_vol(sc, v->name);
94                         if (v2 != NULL) {
95                                 if (is_newer) {
96                                         v2->state = v->state;
97                                         G_VINUM_DEBUG(2, "newer volume found!");
98                                 }
99                                 g_free(v);
100                                 continue;
101                         }
102
103                         gv_create_volume(sc, v);
104
105                 } else if (!strcmp(token[0], "plex")) {
106                         p = gv_new_plex(tokens, token);
107                         if (p == NULL) {
108                                 G_VINUM_DEBUG(0, "config parse failed plex");
109                                 break;
110                         }
111
112                         p2 = gv_find_plex(sc, p->name);
113                         if (p2 != NULL) {
114                                 /* XXX */
115                                 if (is_newer) {
116                                         p2->state = p->state;
117                                         G_VINUM_DEBUG(2, "newer plex found!");
118                                 }
119                                 g_free(p);
120                                 continue;
121                         }
122
123                         error = gv_create_plex(sc, p);
124                         if (error)
125                                 continue;
126                         /*
127                          * These flags were set in gv_create_plex() and are not
128                          * needed here (on-disk config parsing).
129                          */
130                         p->flags &= ~GV_PLEX_ADDED;
131
132                 } else if (!strcmp(token[0], "sd")) {
133                         s = gv_new_sd(tokens, token);
134
135                         if (s == NULL) {
136                                 G_VINUM_DEBUG(0, "config parse failed subdisk");
137                                 break;
138                         }
139
140                         s2 = gv_find_sd(sc, s->name);
141                         if (s2 != NULL) {
142                                 /* XXX */
143                                 if (is_newer) {
144                                         s2->state = s->state;
145                                         G_VINUM_DEBUG(2, "newer subdisk found!");
146                                 }
147                                 g_free(s);
148                                 continue;
149                         }
150
151                         /*
152                          * Signal that this subdisk was tasted, and could
153                          * possibly reference a drive that isn't in our config
154                          * yet.
155                          */
156                         s->flags |= GV_SD_TASTED;
157
158                         if (s->state == GV_SD_UP)
159                                 s->flags |= GV_SD_CANGOUP;
160
161                         error = gv_create_sd(sc, s);
162                         if (error)
163                                 continue;
164
165                         /*
166                          * This flag was set in gv_create_sd() and is not
167                          * needed here (on-disk config parsing).
168                          */
169                         s->flags &= ~GV_SD_NEWBORN;
170                         s->flags &= ~GV_SD_GROW;
171                 }
172         }
173 }
174
175 /*
176  * Format the vinum configuration properly.  If ondisk is non-zero then the
177  * configuration is intended to be written to disk later.
178  */
179 void
180 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
181 {
182         struct gv_drive *d;
183         struct gv_sd *s;
184         struct gv_plex *p;
185         struct gv_volume *v;
186
187         /*
188          * We don't need the drive configuration if we're not writing the
189          * config to disk.
190          */
191         if (!ondisk) {
192                 LIST_FOREACH(d, &sc->drives, drive) {
193                         sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
194                             d->name, d->device);
195                 }
196         }
197
198         LIST_FOREACH(v, &sc->volumes, volume) {
199                 if (!ondisk)
200                         sbuf_printf(sb, "%s", prefix);
201                 sbuf_printf(sb, "volume %s", v->name);
202                 if (ondisk)
203                         sbuf_printf(sb, " state %s", gv_volstate(v->state));
204                 sbuf_printf(sb, "\n");
205         }
206
207         LIST_FOREACH(p, &sc->plexes, plex) {
208                 if (!ondisk)
209                         sbuf_printf(sb, "%s", prefix);
210                 sbuf_printf(sb, "plex name %s org %s ", p->name,
211                     gv_plexorg(p->org));
212                 if (gv_is_striped(p))
213                         sbuf_printf(sb, "%ds ", p->stripesize / 512);
214                 if (p->vol_sc != NULL)
215                         sbuf_printf(sb, "vol %s", p->volume);
216                 if (ondisk)
217                         sbuf_printf(sb, " state %s", gv_plexstate(p->state));
218                 sbuf_printf(sb, "\n");
219         }
220
221         LIST_FOREACH(s, &sc->subdisks, sd) {
222                 if (!ondisk)
223                         sbuf_printf(sb, "%s", prefix);
224                 sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
225                     "%jds", s->name, s->drive, s->size / 512,
226                     s->drive_offset / 512);
227                 if (s->plex_sc != NULL) {
228                         sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
229                             s->plex_offset / 512);
230                 }
231                 if (ondisk)
232                         sbuf_printf(sb, " state %s", gv_sdstate(s->state));
233                 sbuf_printf(sb, "\n");
234         }
235 }
236
237 static off_t
238 gv_plex_smallest_sd(struct gv_plex *p)
239 {
240         struct gv_sd *s;
241         off_t smallest;
242
243         KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
244
245         s = LIST_FIRST(&p->subdisks);
246         if (s == NULL)
247                 return (-1);
248         smallest = s->size;
249         LIST_FOREACH(s, &p->subdisks, in_plex) {
250                 if (s->size < smallest)
251                         smallest = s->size;
252         }
253         return (smallest);
254 }
255
256 /* Walk over plexes in a volume and count how many are down. */
257 int
258 gv_plexdown(struct gv_volume *v)
259 {
260         int plexdown;
261         struct gv_plex *p;
262
263         KASSERT(v != NULL, ("gv_plexdown: NULL v"));
264
265         plexdown = 0;
266
267         LIST_FOREACH(p, &v->plexes, plex) {
268                 if (p->state == GV_PLEX_DOWN)
269                         plexdown++;
270         }
271         return (plexdown);
272 }
273
274 int
275 gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
276 {
277         struct gv_sd *s2;
278         off_t psizeorig, remainder, smallest;
279
280         /* If this subdisk was already given to this plex, do nothing. */
281         if (s->plex_sc == p)
282                 return (0);
283
284         /* Check correct size of this subdisk. */
285         s2 = LIST_FIRST(&p->subdisks);
286         /* Adjust the subdisk-size if necessary. */
287         if (s2 != NULL && gv_is_striped(p)) {
288                 /* First adjust to the stripesize. */
289                 remainder = s->size % p->stripesize;
290
291                 if (remainder) {
292                         G_VINUM_DEBUG(1, "size of sd %s is not a "
293                             "multiple of plex stripesize, taking off "
294                             "%jd bytes", s->name,
295                             (intmax_t)remainder);
296                         gv_adjust_freespace(s, remainder);
297                 }
298
299                 smallest = gv_plex_smallest_sd(p);
300                 /* Then take off extra if other subdisks are smaller. */
301                 remainder = s->size - smallest;
302
303                 /*
304                  * Don't allow a remainder below zero for running plexes, it's too
305                  * painful, and if someone were to accidentally do this, the
306                  * resulting array might be smaller than the original... not god 
307                  */
308                 if (remainder < 0) {
309                         if (!(p->flags & GV_PLEX_NEWBORN)) {
310                                 G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
311                                     s->name, p->name);
312                                 return (GV_ERR_BADSIZE);
313                         }
314                         /* Adjust other subdisks. */
315                         LIST_FOREACH(s2, &p->subdisks, in_plex) {
316                                 G_VINUM_DEBUG(1, "size of sd %s is to big, "
317                                     "taking off %jd bytes", s->name,
318                                     (intmax_t)remainder);
319                                 gv_adjust_freespace(s2, (remainder * -1));
320                         }
321                 } else if (remainder > 0) {
322                         G_VINUM_DEBUG(1, "size of sd %s is to big, "
323                             "taking off %jd bytes", s->name,
324                             (intmax_t)remainder);
325                         gv_adjust_freespace(s, remainder);
326                 }
327         }
328
329         /* Find the correct plex offset for this subdisk, if needed. */
330         if (s->plex_offset == -1) {
331                 /* 
332                  * First set it to 0 to catch the case where we had a detached
333                  * subdisk that didn't get any good offset.
334                  */
335                 s->plex_offset = 0;
336                 if (p->sdcount) {
337                         LIST_FOREACH(s2, &p->subdisks, in_plex) {
338                                 if (gv_is_striped(p))
339                                         s->plex_offset = p->sdcount *
340                                             p->stripesize;
341                                 else
342                                         s->plex_offset = s2->plex_offset +
343                                             s2->size;
344                         }
345                 }
346         }
347
348         /* There are no subdisks for this plex yet, just insert it. */
349         if (LIST_EMPTY(&p->subdisks)) {
350                 LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
351
352         /* Insert in correct order, depending on plex_offset. */
353         } else {
354                 LIST_FOREACH(s2, &p->subdisks, in_plex) {
355                         if (s->plex_offset < s2->plex_offset) {
356                                 LIST_INSERT_BEFORE(s2, s, in_plex);
357                                 break;
358                         } else if (LIST_NEXT(s2, in_plex) == NULL) {
359                                 LIST_INSERT_AFTER(s2, s, in_plex);
360                                 break;
361                         }
362                 }
363         }
364
365         s->plex_sc = p;
366         /* Adjust the size of our plex. We check if the plex misses a subdisk,
367          * so we don't make the plex smaller than it actually should be.
368          */
369         psizeorig = p->size;
370         p->size = gv_plex_size(p);
371         /* Make sure the size is not changed. */
372         if (p->sddetached > 0) {
373                 if (p->size < psizeorig) {
374                         p->size = psizeorig;
375                         /* We make sure wee need another subdisk. */
376                         if (p->sddetached == 1)
377                                 p->sddetached++;
378                 }
379                 p->sddetached--;
380         } else {
381                 if ((p->org == GV_PLEX_RAID5 ||
382                     p->org == GV_PLEX_STRIPED) &&
383                     !(p->flags & GV_PLEX_NEWBORN) && 
384                     p->state == GV_PLEX_UP) {
385                         s->flags |= GV_SD_GROW;
386                 }
387                 p->sdcount++;
388         }
389
390         return (0);
391 }
392
393 void
394 gv_update_vol_size(struct gv_volume *v, off_t size)
395 {
396         if (v == NULL)
397                 return;
398         if (v->provider != NULL) {
399                 g_topology_lock();
400                 v->provider->mediasize = size;
401                 g_topology_unlock();
402         }
403         v->size = size;
404 }
405
406 /* Return how many subdisks that constitute the original plex. */
407 int
408 gv_sdcount(struct gv_plex *p, int growing)
409 {
410         struct gv_sd *s;
411         int sdcount;
412
413         sdcount = p->sdcount;
414         if (growing) {
415                 LIST_FOREACH(s, &p->subdisks, in_plex) {
416                         if (s->flags & GV_SD_GROW)
417                                 sdcount--;
418                 }
419         }
420
421         return (sdcount);
422 }
423
424 /* Calculates the plex size. */
425 off_t
426 gv_plex_size(struct gv_plex *p)
427 {
428         struct gv_sd *s;
429         off_t size;
430         int sdcount;
431
432         KASSERT(p != NULL, ("gv_plex_size: NULL p"));
433
434         /* Adjust the size of our plex. */
435         size = 0;
436         sdcount = gv_sdcount(p, 1);
437         switch (p->org) {
438         case GV_PLEX_CONCAT:
439                 LIST_FOREACH(s, &p->subdisks, in_plex)
440                         size += s->size;
441                 break;
442         case GV_PLEX_STRIPED:
443                 s = LIST_FIRST(&p->subdisks);
444                 size = ((s != NULL) ? (sdcount * s->size) : 0);
445                 break;
446         case GV_PLEX_RAID5:
447                 s = LIST_FIRST(&p->subdisks);
448                 size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
449                 break;
450         }
451
452         return (size);
453 }
454
455 /* Returns the size of a volume. */
456 off_t
457 gv_vol_size(struct gv_volume *v)
458 {
459         struct gv_plex *p;
460         off_t minplexsize;
461
462         KASSERT(v != NULL, ("gv_vol_size: NULL v"));
463
464         p = LIST_FIRST(&v->plexes);
465         if (p == NULL)
466                 return (0);
467
468         minplexsize = p->size;
469         LIST_FOREACH(p, &v->plexes, in_volume) {
470                 if (p->size < minplexsize) {
471                         minplexsize = p->size;
472                 }
473         }
474         return (minplexsize);
475 }
476
477 void
478 gv_update_plex_config(struct gv_plex *p)
479 {
480         struct gv_sd *s, *s2;
481         off_t remainder;
482         int required_sds, state;
483
484         KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
485
486         /* The plex was added to an already running volume. */
487         if (p->flags & GV_PLEX_ADDED)
488                 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
489
490         switch (p->org) {
491         case GV_PLEX_STRIPED:
492                 required_sds = 2;
493                 break;
494         case GV_PLEX_RAID5:
495                 required_sds = 3;
496                 break;
497         case GV_PLEX_CONCAT:
498         default:
499                 required_sds = 0;
500                 break;
501         }
502
503         if (required_sds) {
504                 if (p->sdcount < required_sds) {
505                         gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
506                 }
507
508                 /*
509                  * The subdisks in striped plexes must all have the same size.
510                  */
511                 s = LIST_FIRST(&p->subdisks);
512                 LIST_FOREACH(s2, &p->subdisks, in_plex) {
513                         if (s->size != s2->size) {
514                                 G_VINUM_DEBUG(0, "subdisk size mismatch %s"
515                                     "(%jd) <> %s (%jd)", s->name, s->size,
516                                     s2->name, s2->size);
517                                 gv_set_plex_state(p, GV_PLEX_DOWN,
518                                     GV_SETSTATE_FORCE);
519                         }
520                 }
521
522                 LIST_FOREACH(s, &p->subdisks, in_plex) {
523                         /* Trim subdisk sizes to match the stripe size. */
524                         remainder = s->size % p->stripesize;
525                         if (remainder) {
526                                 G_VINUM_DEBUG(1, "size of sd %s is not a "
527                                     "multiple of plex stripesize, taking off "
528                                     "%jd bytes", s->name, (intmax_t)remainder);
529                                 gv_adjust_freespace(s, remainder);
530                         }
531                 }
532         }
533
534         p->size = gv_plex_size(p);
535         if (p->sdcount == 0)
536                 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
537         else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
538                 LIST_FOREACH(s, &p->subdisks, in_plex)
539                         gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
540                 /* If added to a volume, we want the plex to be down. */
541                 state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
542                 gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
543                 p->flags &= ~GV_PLEX_ADDED;
544         } else if (p->flags & GV_PLEX_ADDED) {
545                 LIST_FOREACH(s, &p->subdisks, in_plex)
546                         gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
547                 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
548                 p->flags &= ~GV_PLEX_ADDED;
549         } else if (p->state == GV_PLEX_UP) {
550                 LIST_FOREACH(s, &p->subdisks, in_plex) {
551                         if (s->flags & GV_SD_GROW) {
552                                 gv_set_plex_state(p, GV_PLEX_GROWABLE,
553                                     GV_SETSTATE_FORCE);
554                                 break;
555                         }
556                 }
557         }
558         /* Our plex is grown up now. */
559         p->flags &= ~GV_PLEX_NEWBORN;
560 }
561
562 /*
563  * Give a subdisk to a drive, check and adjust several parameters, adjust
564  * freelist.
565  */
566 int
567 gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
568 {
569         struct gv_sd *s2;
570         struct gv_freelist *fl, *fl2;
571         off_t tmp;
572         int i;
573
574         fl2 = NULL;
575
576         /* Shortcut for "referenced" drives. */
577         if (d->flags & GV_DRIVE_REFERENCED) {
578                 s->drive_sc = d;
579                 return (0);
580         }
581
582         /* Check if this subdisk was already given to this drive. */
583         if (s->drive_sc != NULL) {
584                 if (s->drive_sc == d) {
585                         if (!(s->flags & GV_SD_TASTED)) {
586                                 return (0);
587                         }
588                 } else {
589                         G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' "
590                             "(already on '%s')", s->name, d->name,
591                             s->drive_sc->name);
592                         return (GV_ERR_ISATTACHED);
593                 }
594         }
595
596         /* Preliminary checks. */
597         if ((s->size > d->avail) || (d->freelist_entries == 0)) {
598                 G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
599                     s->name);
600                 return (GV_ERR_NOSPACE);
601         }
602
603         /* If no size was given for this subdisk, try to auto-size it... */
604         if (s->size == -1) {
605                 /* Find the largest available slot. */
606                 LIST_FOREACH(fl, &d->freelist, freelist) {
607                         if (fl->size < s->size)
608                                 continue;
609                         s->size = fl->size;
610                         s->drive_offset = fl->offset;
611                         fl2 = fl;
612                 }
613
614                 /* No good slot found? */
615                 if (s->size == -1) {
616                         G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'",
617                             s->name, d->name);
618                         return (GV_ERR_BADSIZE);
619                 }
620
621         /*
622          * ... or check if we have a free slot that's large enough for the
623          * given size.
624          */
625         } else {
626                 i = 0;
627                 LIST_FOREACH(fl, &d->freelist, freelist) {
628                         if (fl->size < s->size)
629                                 continue;
630                         /* Assign drive offset, if not given. */
631                         if (s->drive_offset == -1)
632                                 s->drive_offset = fl->offset;
633                         fl2 = fl;
634                         i++;
635                         break;
636                 }
637
638                 /* Couldn't find a good free slot. */
639                 if (i == 0) {
640                         G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
641                             s->name, d->name);
642                         return (GV_ERR_NOSPACE);
643                 }
644         }
645
646         /* No drive offset given, try to calculate it. */
647         if (s->drive_offset == -1) {
648
649                 /* Add offsets and sizes from other subdisks on this drive. */
650                 LIST_FOREACH(s2, &d->subdisks, from_drive) {
651                         s->drive_offset = s2->drive_offset + s2->size;
652                 }
653
654                 /*
655                  * If there are no other subdisks yet, then set the default
656                  * offset to GV_DATA_START.
657                  */
658                 if (s->drive_offset == -1)
659                         s->drive_offset = GV_DATA_START;
660
661         /* Check if we have a free slot at the given drive offset. */
662         } else {
663                 i = 0;
664                 LIST_FOREACH(fl, &d->freelist, freelist) {
665                         /* Yes, this subdisk fits. */
666                         if ((fl->offset <= s->drive_offset) &&
667                             (fl->offset + fl->size >=
668                             s->drive_offset + s->size)) {
669                                 i++;
670                                 fl2 = fl;
671                                 break;
672                         }
673                 }
674
675                 /* Couldn't find a good free slot. */
676                 if (i == 0) {
677                         G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
678                             "on '%s'", s->name, d->name);
679                         return (GV_ERR_NOSPACE);
680                 }
681         }
682
683         /*
684          * Now that all parameters are checked and set up, we can give the
685          * subdisk to the drive and adjust the freelist.
686          */
687
688         /* First, adjust the freelist. */
689         LIST_FOREACH(fl, &d->freelist, freelist) {
690                 /* Look for the free slot that we have found before. */
691                 if (fl != fl2)
692                         continue;
693
694                 /* The subdisk starts at the beginning of the free slot. */
695                 if (fl->offset == s->drive_offset) {
696                         fl->offset += s->size;
697                         fl->size -= s->size;
698
699                         /* The subdisk uses the whole slot, so remove it. */
700                         if (fl->size == 0) {
701                                 d->freelist_entries--;
702                                 LIST_REMOVE(fl, freelist);
703                         }
704                 /*
705                  * The subdisk does not start at the beginning of the free
706                  * slot.
707                  */
708                 } else {
709                         tmp = fl->offset + fl->size;
710                         fl->size = s->drive_offset - fl->offset;
711
712                         /*
713                          * The subdisk didn't use the complete rest of the free
714                          * slot, so we need to split it.
715                          */
716                         if (s->drive_offset + s->size != tmp) {
717                                 fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
718                                 fl2->offset = s->drive_offset + s->size;
719                                 fl2->size = tmp - fl2->offset;
720                                 LIST_INSERT_AFTER(fl, fl2, freelist);
721                                 d->freelist_entries++;
722                         }
723                 }
724                 break;
725         }
726
727         /*
728          * This is the first subdisk on this drive, just insert it into the
729          * list.
730          */
731         if (LIST_EMPTY(&d->subdisks)) {
732                 LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
733
734         /* There are other subdisks, so insert this one in correct order. */
735         } else {
736                 LIST_FOREACH(s2, &d->subdisks, from_drive) {
737                         if (s->drive_offset < s2->drive_offset) {
738                                 LIST_INSERT_BEFORE(s2, s, from_drive);
739                                 break;
740                         } else if (LIST_NEXT(s2, from_drive) == NULL) {
741                                 LIST_INSERT_AFTER(s2, s, from_drive);
742                                 break;
743                         }
744                 }
745         }
746
747         d->sdcount++;
748         d->avail -= s->size;
749
750         s->flags &= ~GV_SD_TASTED;
751
752         /* Link back from the subdisk to this drive. */
753         s->drive_sc = d;
754
755         return (0);
756 }
757
758 void
759 gv_free_sd(struct gv_sd *s)
760 {
761         struct gv_drive *d;
762         struct gv_freelist *fl, *fl2;
763
764         KASSERT(s != NULL, ("gv_free_sd: NULL s"));
765
766         d = s->drive_sc;
767         if (d == NULL)
768                 return;
769
770         /*
771          * First, find the free slot that's immediately before or after this
772          * subdisk.
773          */
774         fl = NULL;
775         LIST_FOREACH(fl, &d->freelist, freelist) {
776                 if (fl->offset == s->drive_offset + s->size)
777                         break;
778                 if (fl->offset + fl->size == s->drive_offset)
779                         break;
780         }
781
782         /* If there is no free slot behind this subdisk, so create one. */
783         if (fl == NULL) {
784
785                 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
786                 fl->size = s->size;
787                 fl->offset = s->drive_offset;
788
789                 if (d->freelist_entries == 0) {
790                         LIST_INSERT_HEAD(&d->freelist, fl, freelist);
791                 } else {
792                         LIST_FOREACH(fl2, &d->freelist, freelist) {
793                                 if (fl->offset < fl2->offset) {
794                                         LIST_INSERT_BEFORE(fl2, fl, freelist);
795                                         break;
796                                 } else if (LIST_NEXT(fl2, freelist) == NULL) {
797                                         LIST_INSERT_AFTER(fl2, fl, freelist);
798                                         break;
799                                 }
800                         }
801                 }
802
803                 d->freelist_entries++;
804
805         /* Expand the free slot we just found. */
806         } else {
807                 fl->size += s->size;
808                 if (fl->offset > s->drive_offset)
809                         fl->offset = s->drive_offset;
810         }
811
812         d->avail += s->size;
813         d->sdcount--;
814 }
815
816 void
817 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
818 {
819         struct gv_drive *d;
820         struct gv_freelist *fl, *fl2;
821
822         KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
823         d = s->drive_sc;
824         KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
825
826         /* First, find the free slot that's immediately after this subdisk. */
827         fl = NULL;
828         LIST_FOREACH(fl, &d->freelist, freelist) {
829                 if (fl->offset == s->drive_offset + s->size)
830                         break;
831         }
832
833         /* If there is no free slot behind this subdisk, so create one. */
834         if (fl == NULL) {
835
836                 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
837                 fl->size = remainder;
838                 fl->offset = s->drive_offset + s->size - remainder;
839
840                 if (d->freelist_entries == 0) {
841                         LIST_INSERT_HEAD(&d->freelist, fl, freelist);
842                 } else {
843                         LIST_FOREACH(fl2, &d->freelist, freelist) {
844                                 if (fl->offset < fl2->offset) {
845                                         LIST_INSERT_BEFORE(fl2, fl, freelist);
846                                         break;
847                                 } else if (LIST_NEXT(fl2, freelist) == NULL) {
848                                         LIST_INSERT_AFTER(fl2, fl, freelist);
849                                         break;
850                                 }
851                         }
852                 }
853
854                 d->freelist_entries++;
855
856         /* Expand the free slot we just found. */
857         } else {
858                 fl->offset -= remainder;
859                 fl->size += remainder;
860         }
861
862         s->size -= remainder;
863         d->avail += remainder;
864 }
865
866 /* Check if the given plex is a striped one. */
867 int
868 gv_is_striped(struct gv_plex *p)
869 {
870         KASSERT(p != NULL, ("gv_is_striped: NULL p"));
871         switch(p->org) {
872         case GV_PLEX_STRIPED:
873         case GV_PLEX_RAID5:
874                 return (1);
875         default:
876                 return (0);
877         }
878 }
879
880 /* Find a volume by name. */
881 struct gv_volume *
882 gv_find_vol(struct gv_softc *sc, char *name)
883 {
884         struct gv_volume *v;
885
886         LIST_FOREACH(v, &sc->volumes, volume) {
887                 if (!strncmp(v->name, name, GV_MAXVOLNAME))
888                         return (v);
889         }
890
891         return (NULL);
892 }
893
894 /* Find a plex by name. */
895 struct gv_plex *
896 gv_find_plex(struct gv_softc *sc, char *name)
897 {
898         struct gv_plex *p;
899
900         LIST_FOREACH(p, &sc->plexes, plex) {
901                 if (!strncmp(p->name, name, GV_MAXPLEXNAME))
902                         return (p);
903         }
904
905         return (NULL);
906 }
907
908 /* Find a subdisk by name. */
909 struct gv_sd *
910 gv_find_sd(struct gv_softc *sc, char *name)
911 {
912         struct gv_sd *s;
913
914         LIST_FOREACH(s, &sc->subdisks, sd) {
915                 if (!strncmp(s->name, name, GV_MAXSDNAME))
916                         return (s);
917         }
918
919         return (NULL);
920 }
921
922 /* Find a drive by name. */
923 struct gv_drive *
924 gv_find_drive(struct gv_softc *sc, char *name)
925 {
926         struct gv_drive *d;
927
928         LIST_FOREACH(d, &sc->drives, drive) {
929                 if (!strncmp(d->name, name, GV_MAXDRIVENAME))
930                         return (d);
931         }
932
933         return (NULL);
934 }
935
936 /* Find a drive given a device. */
937 struct gv_drive *
938 gv_find_drive_device(struct gv_softc *sc, char *device)
939 {
940         struct gv_drive *d;
941
942         LIST_FOREACH(d, &sc->drives, drive) {
943                 if(!strcmp(d->device, device))
944                         return (d);
945         }
946
947         return (NULL);
948 }
949
950 /* Check if any consumer of the given geom is open. */
951 int
952 gv_consumer_is_open(struct g_consumer *cp)
953 {
954         if (cp == NULL)
955                 return (0);
956
957         if (cp->acr || cp->acw || cp->ace)
958                 return (1);
959
960         return (0);
961 }
962
963 int
964 gv_provider_is_open(struct g_provider *pp)
965 {
966         if (pp == NULL)
967                 return (0);
968
969         if (pp->acr || pp->acw || pp->ace)
970                 return (1);
971
972         return (0);
973 }
974
975 /*
976  * Compare the modification dates of the drives.
977  * Return 1 if a > b, 0 otherwise.
978  */
979 int
980 gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
981 {
982         struct gv_drive *d2;
983         struct timeval *a, *b;
984
985         KASSERT(!LIST_EMPTY(&sc->drives),
986             ("gv_is_drive_newer: empty drive list"));
987
988         a = &d->hdr->label.last_update;
989         LIST_FOREACH(d2, &sc->drives, drive) {
990                 if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
991                     (d2->hdr == NULL))
992                         continue;
993                 b = &d2->hdr->label.last_update;
994                 if (timevalcmp(a, b, >))
995                         return (1);
996         }
997
998         return (0);
999 }
1000
1001 /* Return the type of object identified by string 'name'. */
1002 int
1003 gv_object_type(struct gv_softc *sc, char *name)
1004 {
1005         struct gv_drive *d;
1006         struct gv_plex *p;
1007         struct gv_sd *s;
1008         struct gv_volume *v;
1009
1010         LIST_FOREACH(v, &sc->volumes, volume) {
1011                 if (!strncmp(v->name, name, GV_MAXVOLNAME))
1012                         return (GV_TYPE_VOL);
1013         }
1014
1015         LIST_FOREACH(p, &sc->plexes, plex) {
1016                 if (!strncmp(p->name, name, GV_MAXPLEXNAME))
1017                         return (GV_TYPE_PLEX);
1018         }
1019
1020         LIST_FOREACH(s, &sc->subdisks, sd) {
1021                 if (!strncmp(s->name, name, GV_MAXSDNAME))
1022                         return (GV_TYPE_SD);
1023         }
1024
1025         LIST_FOREACH(d, &sc->drives, drive) {
1026                 if (!strncmp(d->name, name, GV_MAXDRIVENAME))
1027                         return (GV_TYPE_DRIVE);
1028         }
1029
1030         return (GV_ERR_NOTFOUND);
1031 }
1032
1033 void
1034 gv_setup_objects(struct gv_softc *sc)
1035 {
1036         struct g_provider *pp;
1037         struct gv_volume *v;
1038         struct gv_plex *p;
1039         struct gv_sd *s;
1040         struct gv_drive *d;
1041
1042         LIST_FOREACH(s, &sc->subdisks, sd) {
1043                 d = gv_find_drive(sc, s->drive);
1044                 if (d != NULL)
1045                         gv_sd_to_drive(s, d);
1046                 p = gv_find_plex(sc, s->plex);
1047                 if (p != NULL)
1048                         gv_sd_to_plex(s, p);
1049                 gv_update_sd_state(s);
1050         }
1051
1052         LIST_FOREACH(p, &sc->plexes, plex) {
1053                 gv_update_plex_config(p);
1054                 v = gv_find_vol(sc, p->volume);
1055                 if (v != NULL && p->vol_sc != v) {
1056                         p->vol_sc = v;
1057                         v->plexcount++;
1058                         LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1059                 }
1060                 gv_update_plex_config(p);
1061         }
1062
1063         LIST_FOREACH(v, &sc->volumes, volume) {
1064                 v->size = gv_vol_size(v);
1065                 if (v->provider == NULL) {
1066                         g_topology_lock();
1067                         pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
1068                         pp->mediasize = v->size;
1069                         pp->sectorsize = 512;    /* XXX */
1070                         g_error_provider(pp, 0);
1071                         v->provider = pp;
1072                         pp->private = v;
1073                         g_topology_unlock();
1074                 } else if (v->provider->mediasize != v->size) {
1075                         g_topology_lock();
1076                         v->provider->mediasize = v->size;
1077                         g_topology_unlock();
1078                 }
1079                 v->flags &= ~GV_VOL_NEWBORN;
1080                 gv_update_vol_state(v);
1081         }
1082 }
1083
1084 void
1085 gv_cleanup(struct gv_softc *sc)
1086 {
1087         struct gv_volume *v, *v2;
1088         struct gv_plex *p, *p2;
1089         struct gv_sd *s, *s2;
1090         struct gv_drive *d, *d2;
1091         struct gv_freelist *fl, *fl2;
1092
1093         mtx_lock(&sc->config_mtx);
1094         LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
1095                 LIST_REMOVE(v, volume);
1096                 g_free(v->wqueue);
1097                 g_free(v);
1098         }
1099         LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
1100                 LIST_REMOVE(p, plex);
1101                 g_free(p->bqueue);
1102                 g_free(p->rqueue);
1103                 g_free(p->wqueue);
1104                 g_free(p);
1105         }
1106         LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
1107                 LIST_REMOVE(s, sd);
1108                 g_free(s);
1109         }
1110         LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
1111                 LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
1112                         LIST_REMOVE(fl, freelist);
1113                         g_free(fl);
1114                 }
1115                 LIST_REMOVE(d, drive);
1116                 g_free(d->hdr);
1117                 g_free(d);
1118         }
1119         mtx_destroy(&sc->config_mtx);
1120 }
1121
1122 /* General 'attach' routine. */
1123 int
1124 gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
1125 {
1126         struct gv_sd *s;
1127         struct gv_softc *sc;
1128
1129         g_topology_assert();
1130
1131         sc = p->vinumconf;
1132         KASSERT(sc != NULL, ("NULL sc"));
1133
1134         if (p->vol_sc != NULL) {
1135                 G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1136                     p->name, p->volume);
1137                 return (GV_ERR_ISATTACHED);
1138         }
1139
1140         /* Stale all subdisks of this plex. */
1141         LIST_FOREACH(s, &p->subdisks, in_plex) {
1142                 if (s->state != GV_SD_STALE)
1143                         gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1144         }
1145         /* Attach to volume. Make sure volume is not up and running. */
1146         if (gv_provider_is_open(v->provider)) {
1147                 G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
1148                     p->name, v->name);
1149                 return (GV_ERR_ISBUSY);
1150         }
1151         p->vol_sc = v;
1152         strlcpy(p->volume, v->name, sizeof(p->volume));
1153         v->plexcount++;
1154         if (rename) {
1155                 snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
1156                     v->plexcount);
1157         }
1158         LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1159
1160         /* Get plex up again. */
1161         gv_update_vol_size(v, gv_vol_size(v));
1162         gv_set_plex_state(p, GV_PLEX_UP, 0);
1163         gv_save_config(p->vinumconf);
1164         return (0);
1165 }
1166
1167 int
1168 gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
1169 {
1170         struct gv_sd *s2;
1171         int error, sdcount;
1172
1173         g_topology_assert();
1174
1175         /* If subdisk is attached, don't do it. */
1176         if (s->plex_sc != NULL) {
1177                 G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1178                     s->name, s->plex);
1179                 return (GV_ERR_ISATTACHED);
1180         }
1181
1182         gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1183         /* First check that this subdisk has a correct offset. If none other
1184          * starts at the same, and it's correct module stripesize, it is */
1185         if (offset != -1 && offset % p->stripesize != 0)
1186                 return (GV_ERR_BADOFFSET);
1187         LIST_FOREACH(s2, &p->subdisks, in_plex) {
1188                 if (s2->plex_offset == offset)
1189                         return (GV_ERR_BADOFFSET);
1190         }
1191
1192         /* Attach the subdisk to the plex at given offset. */
1193         s->plex_offset = offset;
1194         strlcpy(s->plex, p->name, sizeof(s->plex));
1195
1196         sdcount = p->sdcount;
1197         error = gv_sd_to_plex(s, p);
1198         if (error)
1199                 return (error);
1200         gv_update_plex_config(p);
1201
1202         if (rename) {
1203                 snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
1204                     p->sdcount);
1205         }
1206         if (p->vol_sc != NULL)
1207                 gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
1208         gv_save_config(p->vinumconf);
1209         /* We don't update the subdisk state since the user might have to
1210          * initiate a rebuild/sync first. */
1211         return (0);
1212 }
1213
1214 /* Detach a plex from a volume. */
1215 int
1216 gv_detach_plex(struct gv_plex *p, int flags)
1217 {
1218         struct gv_volume *v;
1219
1220         g_topology_assert();
1221         v = p->vol_sc;
1222
1223         if (v == NULL) {
1224                 G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1225                     p->name);
1226                 return (0); /* Not an error. */
1227         }
1228
1229         /*
1230          * Only proceed if forced or volume inactive.
1231          */
1232         if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
1233             p->state == GV_PLEX_UP)) {
1234                 G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
1235                     p->name, p->volume);
1236                 return (GV_ERR_ISBUSY);
1237         }
1238         v->plexcount--;
1239         /* Make sure someone don't read us when gone. */
1240         v->last_read_plex = NULL; 
1241         LIST_REMOVE(p, in_volume);
1242         p->vol_sc = NULL;
1243         memset(p->volume, 0, GV_MAXVOLNAME);
1244         gv_update_vol_size(v, gv_vol_size(v));
1245         gv_save_config(p->vinumconf);
1246         return (0);
1247 }
1248
1249 /* Detach a subdisk from a plex. */
1250 int
1251 gv_detach_sd(struct gv_sd *s, int flags)
1252 {
1253         struct gv_plex *p;
1254
1255         g_topology_assert();
1256         p = s->plex_sc;
1257
1258         if (p == NULL) {
1259                 G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1260                     s->name);
1261                 return (0); /* Not an error. */
1262         }
1263
1264         /*
1265          * Don't proceed if we're not forcing, and the plex is up, or degraded
1266          * with this subdisk up.
1267          */
1268         if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
1269             ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
1270                 G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
1271                     s->name, s->plex);
1272                 return (GV_ERR_ISBUSY);
1273         }
1274
1275         LIST_REMOVE(s, in_plex);
1276         s->plex_sc = NULL;
1277         memset(s->plex, 0, GV_MAXPLEXNAME);
1278         p->sddetached++;
1279         gv_save_config(s->vinumconf);
1280         return (0);
1281 }