]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - sys/geom/vinum/geom_vinum_subr.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / sys / geom / vinum / geom_vinum_subr.c
1 /*-
2  * Copyright (c) 2004, 2007 Lukas Ertl
3  * Copyright (c) 2007, 2009 Ulf Lilleengen
4  * Copyright (c) 1997, 1998, 1999
5  *      Nan Yang Computer Services Limited.  All rights reserved.
6  *
7  *  Parts written by Greg Lehey
8  *
9  *  This software is distributed under the so-called ``Berkeley
10  *  License'':
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *      This product includes software developed by Nan Yang Computer
23  *      Services Limited.
24  * 4. Neither the name of the Company nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * This software is provided ``as is'', and any express or implied
29  * warranties, including, but not limited to, the implied warranties of
30  * merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall the company or contributors be liable for any
32  * direct, indirect, incidental, special, exemplary, or consequential
33  * damages (including, but not limited to, procurement of substitute
34  * goods or services; loss of use, data, or profits; or business
35  * interruption) however caused and on any theory of liability, whether
36  * in contract, strict liability, or tort (including negligence or
37  * otherwise) arising in any way out of the use of this software, even if
38  * advised of the possibility of such damage.
39  *
40  */
41
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
44
45 #include <sys/param.h>
46 #include <sys/malloc.h>
47 #include <sys/systm.h>
48
49 #include <geom/geom.h>
50 #include <geom/vinum/geom_vinum_var.h>
51 #include <geom/vinum/geom_vinum.h>
52 #include <geom/vinum/geom_vinum_share.h>
53
54 int     gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
55 static off_t gv_plex_smallest_sd(struct gv_plex *);
56
57 void
58 gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
59 {
60         char *aptr, *bptr, *cptr;
61         struct gv_volume *v, *v2;
62         struct gv_plex *p, *p2;
63         struct gv_sd *s, *s2;
64         int error, is_newer, tokens;
65         char *token[GV_MAXARGS];
66
67         is_newer = gv_drive_is_newer(sc, d);
68
69         /* Until the end of the string *buf. */
70         for (aptr = buf; *aptr != '\0'; aptr = bptr) {
71                 bptr = aptr;
72                 cptr = aptr;
73
74                 /* Seperate input lines. */
75                 while (*bptr != '\n')
76                         bptr++;
77                 *bptr = '\0';
78                 bptr++;
79
80                 tokens = gv_tokenize(cptr, token, GV_MAXARGS);
81
82                 if (tokens <= 0)
83                         continue;
84
85                 if (!strcmp(token[0], "volume")) {
86                         v = gv_new_volume(tokens, token);
87                         if (v == NULL) {
88                                 G_VINUM_DEBUG(0, "config parse failed volume");
89                                 break;
90                         }
91
92                         v2 = gv_find_vol(sc, v->name);
93                         if (v2 != NULL) {
94                                 if (is_newer) {
95                                         v2->state = v->state;
96                                         G_VINUM_DEBUG(2, "newer volume found!");
97                                 }
98                                 g_free(v);
99                                 continue;
100                         }
101
102                         gv_create_volume(sc, v);
103
104                 } else if (!strcmp(token[0], "plex")) {
105                         p = gv_new_plex(tokens, token);
106                         if (p == NULL) {
107                                 G_VINUM_DEBUG(0, "config parse failed plex");
108                                 break;
109                         }
110
111                         p2 = gv_find_plex(sc, p->name);
112                         if (p2 != NULL) {
113                                 /* XXX */
114                                 if (is_newer) {
115                                         p2->state = p->state;
116                                         G_VINUM_DEBUG(2, "newer plex found!");
117                                 }
118                                 g_free(p);
119                                 continue;
120                         }
121
122                         error = gv_create_plex(sc, p);
123                         if (error)
124                                 continue;
125                         /*
126                          * These flags were set in gv_create_plex() and are not
127                          * needed here (on-disk config parsing).
128                          */
129                         p->flags &= ~GV_PLEX_ADDED;
130
131                 } else if (!strcmp(token[0], "sd")) {
132                         s = gv_new_sd(tokens, token);
133
134                         if (s == NULL) {
135                                 G_VINUM_DEBUG(0, "config parse failed subdisk");
136                                 break;
137                         }
138
139                         s2 = gv_find_sd(sc, s->name);
140                         if (s2 != NULL) {
141                                 /* XXX */
142                                 if (is_newer) {
143                                         s2->state = s->state;
144                                         G_VINUM_DEBUG(2, "newer subdisk found!");
145                                 }
146                                 g_free(s);
147                                 continue;
148                         }
149
150                         /*
151                          * Signal that this subdisk was tasted, and could
152                          * possibly reference a drive that isn't in our config
153                          * yet.
154                          */
155                         s->flags |= GV_SD_TASTED;
156
157                         if (s->state == GV_SD_UP)
158                                 s->flags |= GV_SD_CANGOUP;
159
160                         error = gv_create_sd(sc, s);
161                         if (error)
162                                 continue;
163
164                         /*
165                          * This flag was set in gv_create_sd() and is not
166                          * needed here (on-disk config parsing).
167                          */
168                         s->flags &= ~GV_SD_NEWBORN;
169                         s->flags &= ~GV_SD_GROW;
170                 }
171         }
172 }
173
174 /*
175  * Format the vinum configuration properly.  If ondisk is non-zero then the
176  * configuration is intended to be written to disk later.
177  */
178 void
179 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
180 {
181         struct gv_drive *d;
182         struct gv_sd *s;
183         struct gv_plex *p;
184         struct gv_volume *v;
185
186         /*
187          * We don't need the drive configuration if we're not writing the
188          * config to disk.
189          */
190         if (!ondisk) {
191                 LIST_FOREACH(d, &sc->drives, drive) {
192                         sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
193                             d->name, d->device);
194                 }
195         }
196
197         LIST_FOREACH(v, &sc->volumes, volume) {
198                 if (!ondisk)
199                         sbuf_printf(sb, "%s", prefix);
200                 sbuf_printf(sb, "volume %s", v->name);
201                 if (ondisk)
202                         sbuf_printf(sb, " state %s", gv_volstate(v->state));
203                 sbuf_printf(sb, "\n");
204         }
205
206         LIST_FOREACH(p, &sc->plexes, plex) {
207                 if (!ondisk)
208                         sbuf_printf(sb, "%s", prefix);
209                 sbuf_printf(sb, "plex name %s org %s ", p->name,
210                     gv_plexorg(p->org));
211                 if (gv_is_striped(p))
212                         sbuf_printf(sb, "%ds ", p->stripesize / 512);
213                 if (p->vol_sc != NULL)
214                         sbuf_printf(sb, "vol %s", p->volume);
215                 if (ondisk)
216                         sbuf_printf(sb, " state %s", gv_plexstate(p->state));
217                 sbuf_printf(sb, "\n");
218         }
219
220         LIST_FOREACH(s, &sc->subdisks, sd) {
221                 if (!ondisk)
222                         sbuf_printf(sb, "%s", prefix);
223                 sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
224                     "%jds", s->name, s->drive, s->size / 512,
225                     s->drive_offset / 512);
226                 if (s->plex_sc != NULL) {
227                         sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
228                             s->plex_offset / 512);
229                 }
230                 if (ondisk)
231                         sbuf_printf(sb, " state %s", gv_sdstate(s->state));
232                 sbuf_printf(sb, "\n");
233         }
234 }
235
236 static off_t
237 gv_plex_smallest_sd(struct gv_plex *p)
238 {
239         struct gv_sd *s;
240         off_t smallest;
241
242         KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
243
244         s = LIST_FIRST(&p->subdisks);
245         if (s == NULL)
246                 return (-1);
247         smallest = s->size;
248         LIST_FOREACH(s, &p->subdisks, in_plex) {
249                 if (s->size < smallest)
250                         smallest = s->size;
251         }
252         return (smallest);
253 }
254
255 /* Walk over plexes in a volume and count how many are down. */
256 int
257 gv_plexdown(struct gv_volume *v)
258 {
259         int plexdown;
260         struct gv_plex *p;
261
262         KASSERT(v != NULL, ("gv_plexdown: NULL v"));
263
264         plexdown = 0;
265
266         LIST_FOREACH(p, &v->plexes, plex) {
267                 if (p->state == GV_PLEX_DOWN)
268                         plexdown++;
269         }
270         return (plexdown);
271 }
272
273 int
274 gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
275 {
276         struct gv_sd *s2;
277         off_t psizeorig, remainder, smallest;
278
279         /* If this subdisk was already given to this plex, do nothing. */
280         if (s->plex_sc == p)
281                 return (0);
282
283         /* Check correct size of this subdisk. */
284         s2 = LIST_FIRST(&p->subdisks);
285         /* Adjust the subdisk-size if necessary. */
286         if (s2 != NULL && gv_is_striped(p)) {
287                 /* First adjust to the stripesize. */
288                 remainder = s->size % p->stripesize;
289
290                 if (remainder) {
291                         G_VINUM_DEBUG(1, "size of sd %s is not a "
292                             "multiple of plex stripesize, taking off "
293                             "%jd bytes", s->name,
294                             (intmax_t)remainder);
295                         gv_adjust_freespace(s, remainder);
296                 }
297
298                 smallest = gv_plex_smallest_sd(p);
299                 /* Then take off extra if other subdisks are smaller. */
300                 remainder = s->size - smallest;
301
302                 /*
303                  * Don't allow a remainder below zero for running plexes, it's too
304                  * painful, and if someone were to accidentally do this, the
305                  * resulting array might be smaller than the original... not god 
306                  */
307                 if (remainder < 0) {
308                         if (!(p->flags & GV_PLEX_NEWBORN)) {
309                                 G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
310                                     s->name, p->name);
311                                 return (GV_ERR_BADSIZE);
312                         }
313                         /* Adjust other subdisks. */
314                         LIST_FOREACH(s2, &p->subdisks, in_plex) {
315                                 G_VINUM_DEBUG(1, "size of sd %s is to big, "
316                                     "taking off %jd bytes", s->name,
317                                     (intmax_t)remainder);
318                                 gv_adjust_freespace(s2, (remainder * -1));
319                         }
320                 } else if (remainder > 0) {
321                         G_VINUM_DEBUG(1, "size of sd %s is to big, "
322                             "taking off %jd bytes", s->name,
323                             (intmax_t)remainder);
324                         gv_adjust_freespace(s, remainder);
325                 }
326         }
327
328         /* Find the correct plex offset for this subdisk, if needed. */
329         if (s->plex_offset == -1) {
330                 /* 
331                  * First set it to 0 to catch the case where we had a detached
332                  * subdisk that didn't get any good offset.
333                  */
334                 s->plex_offset = 0;
335                 if (p->sdcount) {
336                         LIST_FOREACH(s2, &p->subdisks, in_plex) {
337                                 if (gv_is_striped(p))
338                                         s->plex_offset = p->sdcount *
339                                             p->stripesize;
340                                 else
341                                         s->plex_offset = s2->plex_offset +
342                                             s2->size;
343                         }
344                 }
345         }
346
347         /* There are no subdisks for this plex yet, just insert it. */
348         if (LIST_EMPTY(&p->subdisks)) {
349                 LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
350
351         /* Insert in correct order, depending on plex_offset. */
352         } else {
353                 LIST_FOREACH(s2, &p->subdisks, in_plex) {
354                         if (s->plex_offset < s2->plex_offset) {
355                                 LIST_INSERT_BEFORE(s2, s, in_plex);
356                                 break;
357                         } else if (LIST_NEXT(s2, in_plex) == NULL) {
358                                 LIST_INSERT_AFTER(s2, s, in_plex);
359                                 break;
360                         }
361                 }
362         }
363
364         s->plex_sc = p;
365         /* Adjust the size of our plex. We check if the plex misses a subdisk,
366          * so we don't make the plex smaller than it actually should be.
367          */
368         psizeorig = p->size;
369         p->size = gv_plex_size(p);
370         /* Make sure the size is not changed. */
371         if (p->sddetached > 0) {
372                 if (p->size < psizeorig) {
373                         p->size = psizeorig;
374                         /* We make sure wee need another subdisk. */
375                         if (p->sddetached == 1)
376                                 p->sddetached++;
377                 }
378                 p->sddetached--;
379         } else {
380                 if ((p->org == GV_PLEX_RAID5 ||
381                     p->org == GV_PLEX_STRIPED) &&
382                     !(p->flags & GV_PLEX_NEWBORN) && 
383                     p->state == GV_PLEX_UP) {
384                         s->flags |= GV_SD_GROW;
385                 }
386                 p->sdcount++;
387         }
388
389         return (0);
390 }
391
392 void
393 gv_update_vol_size(struct gv_volume *v, off_t size)
394 {
395         if (v == NULL)
396                 return;
397         if (v->provider != NULL) {
398                 g_topology_lock();
399                 v->provider->mediasize = size;
400                 g_topology_unlock();
401         }
402         v->size = size;
403 }
404
405 /* Return how many subdisks that constitute the original plex. */
406 int
407 gv_sdcount(struct gv_plex *p, int growing)
408 {
409         struct gv_sd *s;
410         int sdcount;
411
412         sdcount = p->sdcount;
413         if (growing) {
414                 LIST_FOREACH(s, &p->subdisks, in_plex) {
415                         if (s->flags & GV_SD_GROW)
416                                 sdcount--;
417                 }
418         }
419
420         return (sdcount);
421 }
422
423 /* Calculates the plex size. */
424 off_t
425 gv_plex_size(struct gv_plex *p)
426 {
427         struct gv_sd *s;
428         off_t size;
429         int sdcount;
430
431         KASSERT(p != NULL, ("gv_plex_size: NULL p"));
432
433         /* Adjust the size of our plex. */
434         size = 0;
435         sdcount = gv_sdcount(p, 1);
436         switch (p->org) {
437         case GV_PLEX_CONCAT:
438                 LIST_FOREACH(s, &p->subdisks, in_plex)
439                         size += s->size;
440                 break;
441         case GV_PLEX_STRIPED:
442                 s = LIST_FIRST(&p->subdisks);
443                 size = ((s != NULL) ? (sdcount * s->size) : 0);
444                 break;
445         case GV_PLEX_RAID5:
446                 s = LIST_FIRST(&p->subdisks);
447                 size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
448                 break;
449         }
450
451         return (size);
452 }
453
454 /* Returns the size of a volume. */
455 off_t
456 gv_vol_size(struct gv_volume *v)
457 {
458         struct gv_plex *p;
459         off_t minplexsize;
460
461         KASSERT(v != NULL, ("gv_vol_size: NULL v"));
462
463         p = LIST_FIRST(&v->plexes);
464         if (p == NULL)
465                 return (0);
466
467         minplexsize = p->size;
468         LIST_FOREACH(p, &v->plexes, in_volume) {
469                 if (p->size < minplexsize) {
470                         minplexsize = p->size;
471                 }
472         }
473         return (minplexsize);
474 }
475
476 void
477 gv_update_plex_config(struct gv_plex *p)
478 {
479         struct gv_sd *s, *s2;
480         off_t remainder;
481         int required_sds, state;
482
483         KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
484
485         /* The plex was added to an already running volume. */
486         if (p->flags & GV_PLEX_ADDED)
487                 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
488
489         switch (p->org) {
490         case GV_PLEX_STRIPED:
491                 required_sds = 2;
492                 break;
493         case GV_PLEX_RAID5:
494                 required_sds = 3;
495                 break;
496         case GV_PLEX_CONCAT:
497         default:
498                 required_sds = 0;
499                 break;
500         }
501
502         if (required_sds) {
503                 if (p->sdcount < required_sds) {
504                         gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
505                 }
506
507                 /*
508                  * The subdisks in striped plexes must all have the same size.
509                  */
510                 s = LIST_FIRST(&p->subdisks);
511                 LIST_FOREACH(s2, &p->subdisks, in_plex) {
512                         if (s->size != s2->size) {
513                                 G_VINUM_DEBUG(0, "subdisk size mismatch %s"
514                                     "(%jd) <> %s (%jd)", s->name, s->size,
515                                     s2->name, s2->size);
516                                 gv_set_plex_state(p, GV_PLEX_DOWN,
517                                     GV_SETSTATE_FORCE);
518                         }
519                 }
520
521                 LIST_FOREACH(s, &p->subdisks, in_plex) {
522                         /* Trim subdisk sizes to match the stripe size. */
523                         remainder = s->size % p->stripesize;
524                         if (remainder) {
525                                 G_VINUM_DEBUG(1, "size of sd %s is not a "
526                                     "multiple of plex stripesize, taking off "
527                                     "%jd bytes", s->name, (intmax_t)remainder);
528                                 gv_adjust_freespace(s, remainder);
529                         }
530                 }
531         }
532
533         p->size = gv_plex_size(p);
534         if (p->sdcount == 0)
535                 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
536         else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
537                 LIST_FOREACH(s, &p->subdisks, in_plex)
538                         gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
539                 /* If added to a volume, we want the plex to be down. */
540                 state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
541                 gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
542                 p->flags &= ~GV_PLEX_ADDED;
543         } else if (p->flags & GV_PLEX_ADDED) {
544                 LIST_FOREACH(s, &p->subdisks, in_plex)
545                         gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
546                 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
547                 p->flags &= ~GV_PLEX_ADDED;
548         } else if (p->state == GV_PLEX_UP) {
549                 LIST_FOREACH(s, &p->subdisks, in_plex) {
550                         if (s->flags & GV_SD_GROW) {
551                                 gv_set_plex_state(p, GV_PLEX_GROWABLE,
552                                     GV_SETSTATE_FORCE);
553                                 break;
554                         }
555                 }
556         }
557         /* Our plex is grown up now. */
558         p->flags &= ~GV_PLEX_NEWBORN;
559 }
560
561 /*
562  * Give a subdisk to a drive, check and adjust several parameters, adjust
563  * freelist.
564  */
565 int
566 gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
567 {
568         struct gv_sd *s2;
569         struct gv_freelist *fl, *fl2;
570         off_t tmp;
571         int i;
572
573         fl2 = NULL;
574
575         /* Shortcut for "referenced" drives. */
576         if (d->flags & GV_DRIVE_REFERENCED) {
577                 s->drive_sc = d;
578                 return (0);
579         }
580
581         /* Check if this subdisk was already given to this drive. */
582         if (s->drive_sc != NULL) {
583                 if (s->drive_sc == d) {
584                         if (!(s->flags & GV_SD_TASTED)) {
585                                 return (0);
586                         }
587                 } else {
588                         G_VINUM_DEBUG(0, "can't give sd '%s' to '%s' "
589                             "(already on '%s')", s->name, d->name,
590                             s->drive_sc->name);
591                         return (GV_ERR_ISATTACHED);
592                 }
593         }
594
595         /* Preliminary checks. */
596         if ((s->size > d->avail) || (d->freelist_entries == 0)) {
597                 G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
598                     s->name);
599                 return (GV_ERR_NOSPACE);
600         }
601
602         /* If no size was given for this subdisk, try to auto-size it... */
603         if (s->size == -1) {
604                 /* Find the largest available slot. */
605                 LIST_FOREACH(fl, &d->freelist, freelist) {
606                         if (fl->size < s->size)
607                                 continue;
608                         s->size = fl->size;
609                         s->drive_offset = fl->offset;
610                         fl2 = fl;
611                 }
612
613                 /* No good slot found? */
614                 if (s->size == -1) {
615                         G_VINUM_DEBUG(0, "couldn't autosize '%s' on '%s'",
616                             s->name, d->name);
617                         return (GV_ERR_BADSIZE);
618                 }
619
620         /*
621          * ... or check if we have a free slot that's large enough for the
622          * given size.
623          */
624         } else {
625                 i = 0;
626                 LIST_FOREACH(fl, &d->freelist, freelist) {
627                         if (fl->size < s->size)
628                                 continue;
629                         /* Assign drive offset, if not given. */
630                         if (s->drive_offset == -1)
631                                 s->drive_offset = fl->offset;
632                         fl2 = fl;
633                         i++;
634                         break;
635                 }
636
637                 /* Couldn't find a good free slot. */
638                 if (i == 0) {
639                         G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
640                             s->name, d->name);
641                         return (GV_ERR_NOSPACE);
642                 }
643         }
644
645         /* No drive offset given, try to calculate it. */
646         if (s->drive_offset == -1) {
647
648                 /* Add offsets and sizes from other subdisks on this drive. */
649                 LIST_FOREACH(s2, &d->subdisks, from_drive) {
650                         s->drive_offset = s2->drive_offset + s2->size;
651                 }
652
653                 /*
654                  * If there are no other subdisks yet, then set the default
655                  * offset to GV_DATA_START.
656                  */
657                 if (s->drive_offset == -1)
658                         s->drive_offset = GV_DATA_START;
659
660         /* Check if we have a free slot at the given drive offset. */
661         } else {
662                 i = 0;
663                 LIST_FOREACH(fl, &d->freelist, freelist) {
664                         /* Yes, this subdisk fits. */
665                         if ((fl->offset <= s->drive_offset) &&
666                             (fl->offset + fl->size >=
667                             s->drive_offset + s->size)) {
668                                 i++;
669                                 fl2 = fl;
670                                 break;
671                         }
672                 }
673
674                 /* Couldn't find a good free slot. */
675                 if (i == 0) {
676                         G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
677                             "on '%s'", s->name, d->name);
678                         return (GV_ERR_NOSPACE);
679                 }
680         }
681
682         /*
683          * Now that all parameters are checked and set up, we can give the
684          * subdisk to the drive and adjust the freelist.
685          */
686
687         /* First, adjust the freelist. */
688         LIST_FOREACH(fl, &d->freelist, freelist) {
689                 /* Look for the free slot that we have found before. */
690                 if (fl != fl2)
691                         continue;
692
693                 /* The subdisk starts at the beginning of the free slot. */
694                 if (fl->offset == s->drive_offset) {
695                         fl->offset += s->size;
696                         fl->size -= s->size;
697
698                         /* The subdisk uses the whole slot, so remove it. */
699                         if (fl->size == 0) {
700                                 d->freelist_entries--;
701                                 LIST_REMOVE(fl, freelist);
702                         }
703                 /*
704                  * The subdisk does not start at the beginning of the free
705                  * slot.
706                  */
707                 } else {
708                         tmp = fl->offset + fl->size;
709                         fl->size = s->drive_offset - fl->offset;
710
711                         /*
712                          * The subdisk didn't use the complete rest of the free
713                          * slot, so we need to split it.
714                          */
715                         if (s->drive_offset + s->size != tmp) {
716                                 fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
717                                 fl2->offset = s->drive_offset + s->size;
718                                 fl2->size = tmp - fl2->offset;
719                                 LIST_INSERT_AFTER(fl, fl2, freelist);
720                                 d->freelist_entries++;
721                         }
722                 }
723                 break;
724         }
725
726         /*
727          * This is the first subdisk on this drive, just insert it into the
728          * list.
729          */
730         if (LIST_EMPTY(&d->subdisks)) {
731                 LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
732
733         /* There are other subdisks, so insert this one in correct order. */
734         } else {
735                 LIST_FOREACH(s2, &d->subdisks, from_drive) {
736                         if (s->drive_offset < s2->drive_offset) {
737                                 LIST_INSERT_BEFORE(s2, s, from_drive);
738                                 break;
739                         } else if (LIST_NEXT(s2, from_drive) == NULL) {
740                                 LIST_INSERT_AFTER(s2, s, from_drive);
741                                 break;
742                         }
743                 }
744         }
745
746         d->sdcount++;
747         d->avail -= s->size;
748
749         s->flags &= ~GV_SD_TASTED;
750
751         /* Link back from the subdisk to this drive. */
752         s->drive_sc = d;
753
754         return (0);
755 }
756
757 void
758 gv_free_sd(struct gv_sd *s)
759 {
760         struct gv_drive *d;
761         struct gv_freelist *fl, *fl2;
762
763         KASSERT(s != NULL, ("gv_free_sd: NULL s"));
764
765         d = s->drive_sc;
766         if (d == NULL)
767                 return;
768
769         /*
770          * First, find the free slot that's immediately before or after this
771          * subdisk.
772          */
773         fl = NULL;
774         LIST_FOREACH(fl, &d->freelist, freelist) {
775                 if (fl->offset == s->drive_offset + s->size)
776                         break;
777                 if (fl->offset + fl->size == s->drive_offset)
778                         break;
779         }
780
781         /* If there is no free slot behind this subdisk, so create one. */
782         if (fl == NULL) {
783
784                 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
785                 fl->size = s->size;
786                 fl->offset = s->drive_offset;
787
788                 if (d->freelist_entries == 0) {
789                         LIST_INSERT_HEAD(&d->freelist, fl, freelist);
790                 } else {
791                         LIST_FOREACH(fl2, &d->freelist, freelist) {
792                                 if (fl->offset < fl2->offset) {
793                                         LIST_INSERT_BEFORE(fl2, fl, freelist);
794                                         break;
795                                 } else if (LIST_NEXT(fl2, freelist) == NULL) {
796                                         LIST_INSERT_AFTER(fl2, fl, freelist);
797                                         break;
798                                 }
799                         }
800                 }
801
802                 d->freelist_entries++;
803
804         /* Expand the free slot we just found. */
805         } else {
806                 fl->size += s->size;
807                 if (fl->offset > s->drive_offset)
808                         fl->offset = s->drive_offset;
809         }
810
811         d->avail += s->size;
812         d->sdcount--;
813 }
814
815 void
816 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
817 {
818         struct gv_drive *d;
819         struct gv_freelist *fl, *fl2;
820
821         KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
822         d = s->drive_sc;
823         KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
824
825         /* First, find the free slot that's immediately after this subdisk. */
826         fl = NULL;
827         LIST_FOREACH(fl, &d->freelist, freelist) {
828                 if (fl->offset == s->drive_offset + s->size)
829                         break;
830         }
831
832         /* If there is no free slot behind this subdisk, so create one. */
833         if (fl == NULL) {
834
835                 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
836                 fl->size = remainder;
837                 fl->offset = s->drive_offset + s->size - remainder;
838
839                 if (d->freelist_entries == 0) {
840                         LIST_INSERT_HEAD(&d->freelist, fl, freelist);
841                 } else {
842                         LIST_FOREACH(fl2, &d->freelist, freelist) {
843                                 if (fl->offset < fl2->offset) {
844                                         LIST_INSERT_BEFORE(fl2, fl, freelist);
845                                         break;
846                                 } else if (LIST_NEXT(fl2, freelist) == NULL) {
847                                         LIST_INSERT_AFTER(fl2, fl, freelist);
848                                         break;
849                                 }
850                         }
851                 }
852
853                 d->freelist_entries++;
854
855         /* Expand the free slot we just found. */
856         } else {
857                 fl->offset -= remainder;
858                 fl->size += remainder;
859         }
860
861         s->size -= remainder;
862         d->avail += remainder;
863 }
864
865 /* Check if the given plex is a striped one. */
866 int
867 gv_is_striped(struct gv_plex *p)
868 {
869         KASSERT(p != NULL, ("gv_is_striped: NULL p"));
870         switch(p->org) {
871         case GV_PLEX_STRIPED:
872         case GV_PLEX_RAID5:
873                 return (1);
874         default:
875                 return (0);
876         }
877 }
878
879 /* Find a volume by name. */
880 struct gv_volume *
881 gv_find_vol(struct gv_softc *sc, char *name)
882 {
883         struct gv_volume *v;
884
885         LIST_FOREACH(v, &sc->volumes, volume) {
886                 if (!strncmp(v->name, name, GV_MAXVOLNAME))
887                         return (v);
888         }
889
890         return (NULL);
891 }
892
893 /* Find a plex by name. */
894 struct gv_plex *
895 gv_find_plex(struct gv_softc *sc, char *name)
896 {
897         struct gv_plex *p;
898
899         LIST_FOREACH(p, &sc->plexes, plex) {
900                 if (!strncmp(p->name, name, GV_MAXPLEXNAME))
901                         return (p);
902         }
903
904         return (NULL);
905 }
906
907 /* Find a subdisk by name. */
908 struct gv_sd *
909 gv_find_sd(struct gv_softc *sc, char *name)
910 {
911         struct gv_sd *s;
912
913         LIST_FOREACH(s, &sc->subdisks, sd) {
914                 if (!strncmp(s->name, name, GV_MAXSDNAME))
915                         return (s);
916         }
917
918         return (NULL);
919 }
920
921 /* Find a drive by name. */
922 struct gv_drive *
923 gv_find_drive(struct gv_softc *sc, char *name)
924 {
925         struct gv_drive *d;
926
927         LIST_FOREACH(d, &sc->drives, drive) {
928                 if (!strncmp(d->name, name, GV_MAXDRIVENAME))
929                         return (d);
930         }
931
932         return (NULL);
933 }
934
935 /* Find a drive given a device. */
936 struct gv_drive *
937 gv_find_drive_device(struct gv_softc *sc, char *device)
938 {
939         struct gv_drive *d;
940
941         LIST_FOREACH(d, &sc->drives, drive) {
942                 if(!strcmp(d->device, device))
943                         return (d);
944         }
945
946         return (NULL);
947 }
948
949 /* Check if any consumer of the given geom is open. */
950 int
951 gv_consumer_is_open(struct g_consumer *cp)
952 {
953         if (cp == NULL)
954                 return (0);
955
956         if (cp->acr || cp->acw || cp->ace)
957                 return (1);
958
959         return (0);
960 }
961
962 int
963 gv_provider_is_open(struct g_provider *pp)
964 {
965         if (pp == NULL)
966                 return (0);
967
968         if (pp->acr || pp->acw || pp->ace)
969                 return (1);
970
971         return (0);
972 }
973
974 /*
975  * Compare the modification dates of the drives.
976  * Return 1 if a > b, 0 otherwise.
977  */
978 int
979 gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
980 {
981         struct gv_drive *d2;
982         struct timeval *a, *b;
983
984         KASSERT(!LIST_EMPTY(&sc->drives),
985             ("gv_is_drive_newer: empty drive list"));
986
987         a = &d->hdr->label.last_update;
988         LIST_FOREACH(d2, &sc->drives, drive) {
989                 if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
990                     (d2->hdr == NULL))
991                         continue;
992                 b = &d2->hdr->label.last_update;
993                 if (timevalcmp(a, b, >))
994                         return (1);
995         }
996
997         return (0);
998 }
999
1000 /* Return the type of object identified by string 'name'. */
1001 int
1002 gv_object_type(struct gv_softc *sc, char *name)
1003 {
1004         struct gv_drive *d;
1005         struct gv_plex *p;
1006         struct gv_sd *s;
1007         struct gv_volume *v;
1008
1009         LIST_FOREACH(v, &sc->volumes, volume) {
1010                 if (!strncmp(v->name, name, GV_MAXVOLNAME))
1011                         return (GV_TYPE_VOL);
1012         }
1013
1014         LIST_FOREACH(p, &sc->plexes, plex) {
1015                 if (!strncmp(p->name, name, GV_MAXPLEXNAME))
1016                         return (GV_TYPE_PLEX);
1017         }
1018
1019         LIST_FOREACH(s, &sc->subdisks, sd) {
1020                 if (!strncmp(s->name, name, GV_MAXSDNAME))
1021                         return (GV_TYPE_SD);
1022         }
1023
1024         LIST_FOREACH(d, &sc->drives, drive) {
1025                 if (!strncmp(d->name, name, GV_MAXDRIVENAME))
1026                         return (GV_TYPE_DRIVE);
1027         }
1028
1029         return (GV_ERR_NOTFOUND);
1030 }
1031
1032 void
1033 gv_setup_objects(struct gv_softc *sc)
1034 {
1035         struct g_provider *pp;
1036         struct gv_volume *v;
1037         struct gv_plex *p;
1038         struct gv_sd *s;
1039         struct gv_drive *d;
1040
1041         LIST_FOREACH(s, &sc->subdisks, sd) {
1042                 d = gv_find_drive(sc, s->drive);
1043                 if (d != NULL)
1044                         gv_sd_to_drive(s, d);
1045                 p = gv_find_plex(sc, s->plex);
1046                 if (p != NULL)
1047                         gv_sd_to_plex(s, p);
1048                 gv_update_sd_state(s);
1049         }
1050
1051         LIST_FOREACH(p, &sc->plexes, plex) {
1052                 gv_update_plex_config(p);
1053                 v = gv_find_vol(sc, p->volume);
1054                 if (v != NULL && p->vol_sc != v) {
1055                         p->vol_sc = v;
1056                         v->plexcount++;
1057                         LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1058                 }
1059                 gv_update_plex_config(p);
1060         }
1061
1062         LIST_FOREACH(v, &sc->volumes, volume) {
1063                 v->size = gv_vol_size(v);
1064                 if (v->provider == NULL) {
1065                         g_topology_lock();
1066                         pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
1067                         pp->mediasize = v->size;
1068                         pp->sectorsize = 512;    /* XXX */
1069                         g_error_provider(pp, 0);
1070                         v->provider = pp;
1071                         pp->private = v;
1072                         g_topology_unlock();
1073                 } else if (v->provider->mediasize != v->size) {
1074                         g_topology_lock();
1075                         v->provider->mediasize = v->size;
1076                         g_topology_unlock();
1077                 }
1078                 v->flags &= ~GV_VOL_NEWBORN;
1079                 gv_update_vol_state(v);
1080         }
1081 }
1082
1083 void
1084 gv_cleanup(struct gv_softc *sc)
1085 {
1086         struct gv_volume *v, *v2;
1087         struct gv_plex *p, *p2;
1088         struct gv_sd *s, *s2;
1089         struct gv_drive *d, *d2;
1090         struct gv_freelist *fl, *fl2;
1091
1092         mtx_lock(&sc->config_mtx);
1093         LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
1094                 LIST_REMOVE(v, volume);
1095                 g_free(v->wqueue);
1096                 g_free(v);
1097         }
1098         LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
1099                 LIST_REMOVE(p, plex);
1100                 g_free(p->bqueue);
1101                 g_free(p->rqueue);
1102                 g_free(p->wqueue);
1103                 g_free(p);
1104         }
1105         LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
1106                 LIST_REMOVE(s, sd);
1107                 g_free(s);
1108         }
1109         LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
1110                 LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
1111                         LIST_REMOVE(fl, freelist);
1112                         g_free(fl);
1113                 }
1114                 LIST_REMOVE(d, drive);
1115                 g_free(d->hdr);
1116                 g_free(d);
1117         }
1118         mtx_destroy(&sc->config_mtx);
1119 }
1120
1121 /* General 'attach' routine. */
1122 int
1123 gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
1124 {
1125         struct gv_sd *s;
1126         struct gv_softc *sc;
1127
1128         g_topology_assert();
1129
1130         sc = p->vinumconf;
1131         KASSERT(sc != NULL, ("NULL sc"));
1132
1133         if (p->vol_sc != NULL) {
1134                 G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1135                     p->name, p->volume);
1136                 return (GV_ERR_ISATTACHED);
1137         }
1138
1139         /* Stale all subdisks of this plex. */
1140         LIST_FOREACH(s, &p->subdisks, in_plex) {
1141                 if (s->state != GV_SD_STALE)
1142                         gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1143         }
1144         /* Attach to volume. Make sure volume is not up and running. */
1145         if (gv_provider_is_open(v->provider)) {
1146                 G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
1147                     p->name, v->name);
1148                 return (GV_ERR_ISBUSY);
1149         }
1150         p->vol_sc = v;
1151         strlcpy(p->volume, v->name, sizeof(p->volume));
1152         v->plexcount++;
1153         if (rename) {
1154                 snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
1155                     v->plexcount);
1156         }
1157         LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1158
1159         /* Get plex up again. */
1160         gv_update_vol_size(v, gv_vol_size(v));
1161         gv_set_plex_state(p, GV_PLEX_UP, 0);
1162         gv_save_config(p->vinumconf);
1163         return (0);
1164 }
1165
1166 int
1167 gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
1168 {
1169         struct gv_sd *s2;
1170         int error, sdcount;
1171
1172         g_topology_assert();
1173
1174         /* If subdisk is attached, don't do it. */
1175         if (s->plex_sc != NULL) {
1176                 G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1177                     s->name, s->plex);
1178                 return (GV_ERR_ISATTACHED);
1179         }
1180
1181         gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1182         /* First check that this subdisk has a correct offset. If none other
1183          * starts at the same, and it's correct module stripesize, it is */
1184         if (offset != -1 && offset % p->stripesize != 0)
1185                 return (GV_ERR_BADOFFSET);
1186         LIST_FOREACH(s2, &p->subdisks, in_plex) {
1187                 if (s2->plex_offset == offset)
1188                         return (GV_ERR_BADOFFSET);
1189         }
1190
1191         /* Attach the subdisk to the plex at given offset. */
1192         s->plex_offset = offset;
1193         strlcpy(s->plex, p->name, sizeof(s->plex));
1194
1195         sdcount = p->sdcount;
1196         error = gv_sd_to_plex(s, p);
1197         if (error)
1198                 return (error);
1199         gv_update_plex_config(p);
1200
1201         if (rename) {
1202                 snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
1203                     p->sdcount);
1204         }
1205         if (p->vol_sc != NULL)
1206                 gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
1207         gv_save_config(p->vinumconf);
1208         /* We don't update the subdisk state since the user might have to
1209          * initiate a rebuild/sync first. */
1210         return (0);
1211 }
1212
1213 /* Detach a plex from a volume. */
1214 int
1215 gv_detach_plex(struct gv_plex *p, int flags)
1216 {
1217         struct gv_volume *v;
1218
1219         g_topology_assert();
1220         v = p->vol_sc;
1221
1222         if (v == NULL) {
1223                 G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1224                     p->name);
1225                 return (0); /* Not an error. */
1226         }
1227
1228         /*
1229          * Only proceed if forced or volume inactive.
1230          */
1231         if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
1232             p->state == GV_PLEX_UP)) {
1233                 G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
1234                     p->name, p->volume);
1235                 return (GV_ERR_ISBUSY);
1236         }
1237         v->plexcount--;
1238         /* Make sure someone don't read us when gone. */
1239         v->last_read_plex = NULL; 
1240         LIST_REMOVE(p, in_volume);
1241         p->vol_sc = NULL;
1242         memset(p->volume, 0, GV_MAXVOLNAME);
1243         gv_update_vol_size(v, gv_vol_size(v));
1244         gv_save_config(p->vinumconf);
1245         return (0);
1246 }
1247
1248 /* Detach a subdisk from a plex. */
1249 int
1250 gv_detach_sd(struct gv_sd *s, int flags)
1251 {
1252         struct gv_plex *p;
1253
1254         g_topology_assert();
1255         p = s->plex_sc;
1256
1257         if (p == NULL) {
1258                 G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1259                     s->name);
1260                 return (0); /* Not an error. */
1261         }
1262
1263         /*
1264          * Don't proceed if we're not forcing, and the plex is up, or degraded
1265          * with this subdisk up.
1266          */
1267         if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
1268             ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
1269                 G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
1270                     s->name, s->plex);
1271                 return (GV_ERR_ISBUSY);
1272         }
1273
1274         LIST_REMOVE(s, in_plex);
1275         s->plex_sc = NULL;
1276         memset(s->plex, 0, GV_MAXPLEXNAME);
1277         p->sddetached++;
1278         gv_save_config(s->vinumconf);
1279         return (0);
1280 }