]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/geom/vinum/geom_vinum_drive.c
add -n option to suppress clearing the build tree and add -DNO_CLEAN
[FreeBSD/FreeBSD.git] / sys / geom / vinum / geom_vinum_drive.c
1 /*-
2  * Copyright (c) 2004, 2005 Lukas Ertl
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/errno.h>
33 #include <sys/endian.h>
34 #include <sys/conf.h>
35 #include <sys/kernel.h>
36 #include <sys/kthread.h>
37 #include <sys/libkern.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mutex.h>
42 #include <sys/sbuf.h>
43 #include <sys/systm.h>
44 #include <sys/time.h>
45 #include <sys/vimage.h>
46
47 #include <geom/geom.h>
48 #include <geom/vinum/geom_vinum_var.h>
49 #include <geom/vinum/geom_vinum.h>
50 #include <geom/vinum/geom_vinum_share.h>
51
52 #define GV_LEGACY_I386  0
53 #define GV_LEGACY_AMD64 1
54 #define GV_LEGACY_SPARC64 2
55 #define GV_LEGACY_POWERPC 3
56
57 static void     gv_drive_dead(void *, int);
58 static void     gv_drive_worker(void *);
59 static int      gv_legacy_header_type(uint8_t *, int);
60
61 /*
62  * Here are the "offset (size)" for the various struct gv_hdr fields,
63  * for the legacy i386 (or 32-bit powerpc), legacy amd64 (or sparc64), and
64  * current (cpu & endian agnostic) versions of the on-disk format of the vinum
65  * header structure:
66  *
67  *       i386    amd64   current   field
68  *     -------- -------- --------  -----
69  *       0 ( 8)   0 ( 8)   0 ( 8)  magic
70  *       8 ( 4)   8 ( 8)   8 ( 8)  config_length
71  *      12 (32)  16 (32)  16 (32)  label.sysname
72  *      44 (32)  48 (32)  48 (32)  label.name
73  *      76 ( 4)  80 ( 8)  80 ( 8)  label.date_of_birth.tv_sec
74  *      80 ( 4)  88 ( 8)  88 ( 8)  label.date_of_birth.tv_usec
75  *      84 ( 4)  96 ( 8)  96 ( 8)  label.last_update.tv_sec
76  *      88 ( 4) 104 ( 8) 104 ( 8)  label.last_update.tv_usec
77  *      92 ( 8) 112 ( 8) 112 ( 8)  label.drive_size
78  *     ======== ======== ========
79  *     100      120      120       total size
80  *
81  * NOTE: i386 and amd64 formats are stored as little-endian; the current
82  * format uses big-endian (network order).
83  */
84
85
86 /* Checks for legacy format depending on platform. */
87 static int
88 gv_legacy_header_type(uint8_t *hdr, int bigendian)
89 {
90         uint32_t *i32;
91         int arch_32, arch_64, i;
92
93         /* Set arch according to endianess. */
94         if (bigendian) {
95                 arch_32 = GV_LEGACY_POWERPC;
96                 arch_64 = GV_LEGACY_SPARC64;
97         } else {
98                 arch_32 = GV_LEGACY_I386;
99                 arch_64 = GV_LEGACY_AMD64;
100         }
101
102         /* if non-empty hostname overlaps 64-bit config_length */
103         i32 = (uint32_t *)(hdr + 12);
104         if (*i32 != 0)
105                 return (arch_32);
106         /* check for non-empty hostname */
107         if (hdr[16] != 0)
108                 return (arch_64);
109         /* check bytes past 32-bit structure */
110         for (i = 100; i < 120; i++)
111                 if (hdr[i] != 0)
112                         return (arch_32);
113         /* check for overlapping timestamp */
114         i32 = (uint32_t *)(hdr + 84);
115
116         if (*i32 == 0)
117                 return (arch_64);
118         return (arch_32);
119 }
120
121 /*
122  * Read the header while taking magic number into account, and write it to
123  * destination pointer.
124  */
125 int
126 gv_read_header(struct g_consumer *cp, struct gv_hdr *m_hdr)
127 {
128         struct g_provider *pp;
129         uint64_t magic_machdep;
130         uint8_t *d_hdr;
131         int be, off;
132
133 #define GV_GET32(endian)                                        \
134                 endian##32toh(*((uint32_t *)&d_hdr[off]));      \
135                 off += 4
136 #define GV_GET64(endian)                                        \
137                 endian##64toh(*((uint64_t *)&d_hdr[off]));      \
138                 off += 8
139
140         KASSERT(m_hdr != NULL, ("gv_read_header: null m_hdr"));
141         KASSERT(cp != NULL, ("gv_read_header: null cp"));
142         pp = cp->provider;
143         KASSERT(pp != NULL, ("gv_read_header: null pp"));
144
145         d_hdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, NULL);
146         if (d_hdr == NULL)
147                 return (-1);
148         off = 0;
149         m_hdr->magic = GV_GET64(be);
150         magic_machdep = *((uint64_t *)&d_hdr[0]);
151         /*
152          * The big endian machines will have a reverse of GV_OLD_MAGIC, so we
153          * need to decide if we are running on a big endian machine as well as
154          * checking the magic against the reverse of GV_OLD_MAGIC.
155          */
156         be = (m_hdr->magic == magic_machdep);
157         if (m_hdr->magic == GV_MAGIC) {
158                 m_hdr->config_length = GV_GET64(be);
159                 off = 16;
160                 bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN);
161                 off += GV_HOSTNAME_LEN;
162                 bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME);
163                 off += GV_MAXDRIVENAME;
164                 m_hdr->label.date_of_birth.tv_sec = GV_GET64(be);
165                 m_hdr->label.date_of_birth.tv_usec = GV_GET64(be);
166                 m_hdr->label.last_update.tv_sec = GV_GET64(be);
167                 m_hdr->label.last_update.tv_usec = GV_GET64(be);
168                 m_hdr->label.drive_size = GV_GET64(be);
169         } else if (m_hdr->magic != GV_OLD_MAGIC &&
170             m_hdr->magic != le64toh(GV_OLD_MAGIC)) {
171                 /* Not a gvinum drive. */
172                 g_free(d_hdr);
173                 return (-1);
174         } else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_SPARC64) {
175                 printf("VINUM: detected legacy sparc64 header\n");
176                 m_hdr->magic = GV_MAGIC;
177                 /* Legacy sparc64 on-disk header */
178                 m_hdr->config_length = GV_GET64(be);
179                 bcopy(d_hdr + 16, m_hdr->label.sysname, GV_HOSTNAME_LEN);
180                 off += GV_HOSTNAME_LEN;
181                 bcopy(d_hdr + 48, m_hdr->label.name, GV_MAXDRIVENAME);
182                 off += GV_MAXDRIVENAME;
183                 m_hdr->label.date_of_birth.tv_sec = GV_GET64(be);
184                 m_hdr->label.date_of_birth.tv_usec = GV_GET64(be);
185                 m_hdr->label.last_update.tv_sec = GV_GET64(be);
186                 m_hdr->label.last_update.tv_usec = GV_GET64(be);
187                 m_hdr->label.drive_size = GV_GET64(be);
188         } else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_POWERPC) {
189                 printf("VINUM: detected legacy PowerPC header\n");
190                 m_hdr->magic = GV_MAGIC;
191                 /* legacy 32-bit big endian on-disk header */
192                 m_hdr->config_length = GV_GET32(be);
193                 bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN);
194                 off += GV_HOSTNAME_LEN;
195                 bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME);
196                 off += GV_MAXDRIVENAME;
197                 m_hdr->label.date_of_birth.tv_sec = GV_GET32(be);
198                 m_hdr->label.date_of_birth.tv_usec = GV_GET32(be);
199                 m_hdr->label.last_update.tv_sec = GV_GET32(be);
200                 m_hdr->label.last_update.tv_usec = GV_GET32(be);
201                 m_hdr->label.drive_size = GV_GET64(be);
202         } else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_I386) {
203                 printf("VINUM: detected legacy i386 header\n");
204                 m_hdr->magic = GV_MAGIC;
205                 /* legacy i386 on-disk header */
206                 m_hdr->config_length = GV_GET32(le);
207                 bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN);
208                 off += GV_HOSTNAME_LEN;
209                 bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME);
210                 off += GV_MAXDRIVENAME;
211                 m_hdr->label.date_of_birth.tv_sec = GV_GET32(le);
212                 m_hdr->label.date_of_birth.tv_usec = GV_GET32(le);
213                 m_hdr->label.last_update.tv_sec = GV_GET32(le);
214                 m_hdr->label.last_update.tv_usec = GV_GET32(le);
215                 m_hdr->label.drive_size = GV_GET64(le);
216         } else {
217                 printf("VINUM: detected legacy amd64 header\n");
218                 m_hdr->magic = GV_MAGIC;
219                 /* legacy amd64 on-disk header */
220                 m_hdr->config_length = GV_GET64(le);
221                 bcopy(d_hdr + 16, m_hdr->label.sysname, GV_HOSTNAME_LEN);
222                 off += GV_HOSTNAME_LEN;
223                 bcopy(d_hdr + 48, m_hdr->label.name, GV_MAXDRIVENAME);
224                 off += GV_MAXDRIVENAME;
225                 m_hdr->label.date_of_birth.tv_sec = GV_GET64(le);
226                 m_hdr->label.date_of_birth.tv_usec = GV_GET64(le);
227                 m_hdr->label.last_update.tv_sec = GV_GET64(le);
228                 m_hdr->label.last_update.tv_usec = GV_GET64(le);
229                 m_hdr->label.drive_size = GV_GET64(le);
230         }
231
232         g_free(d_hdr);
233         return (0);
234 }
235
236 /* Write out the gvinum header. */
237 int
238 gv_write_header(struct g_consumer *cp, struct gv_hdr *m_hdr)
239 {
240         uint8_t d_hdr[GV_HDR_LEN];
241         int off, ret;
242
243 #define GV_SET64BE(field)                                       \
244         do {                                                    \
245                 *((uint64_t *)&d_hdr[off]) = htobe64(field);    \
246                 off += 8;                                       \
247         } while (0)
248
249         KASSERT(m_hdr != NULL, ("gv_write_header: null m_hdr"));
250
251         off = 0;
252         memset(d_hdr, 0, GV_HDR_LEN);
253         GV_SET64BE(m_hdr->magic);
254         GV_SET64BE(m_hdr->config_length);
255         off = 16;
256         bcopy(m_hdr->label.sysname, d_hdr + off, GV_HOSTNAME_LEN);
257         off += GV_HOSTNAME_LEN;
258         bcopy(m_hdr->label.name, d_hdr + off, GV_MAXDRIVENAME);
259         off += GV_MAXDRIVENAME;
260         GV_SET64BE(m_hdr->label.date_of_birth.tv_sec);
261         GV_SET64BE(m_hdr->label.date_of_birth.tv_usec);
262         GV_SET64BE(m_hdr->label.last_update.tv_sec);
263         GV_SET64BE(m_hdr->label.last_update.tv_usec);
264         GV_SET64BE(m_hdr->label.drive_size);
265
266         ret = g_write_data(cp, GV_HDR_OFFSET, d_hdr, GV_HDR_LEN);
267         return (ret);
268 }
269
270 void
271 gv_config_new_drive(struct gv_drive *d)
272 {
273         struct gv_hdr *vhdr;
274         struct gv_freelist *fl;
275
276         KASSERT(d != NULL, ("config_new_drive: NULL d"));
277
278         vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO);
279         vhdr->magic = GV_MAGIC;
280         vhdr->config_length = GV_CFG_LEN;
281
282         mtx_lock(&hostname_mtx);
283         bcopy(G_hostname, vhdr->label.sysname, GV_HOSTNAME_LEN);
284         mtx_unlock(&hostname_mtx);
285         strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME);
286         microtime(&vhdr->label.date_of_birth);
287
288         d->hdr = vhdr;
289
290         LIST_INIT(&d->subdisks);
291         LIST_INIT(&d->freelist);
292
293         fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO);
294         fl->offset = GV_DATA_START;
295         fl->size = d->avail;
296         LIST_INSERT_HEAD(&d->freelist, fl, freelist);
297         d->freelist_entries = 1;
298
299         d->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO);
300         bioq_init(d->bqueue);
301         mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
302         kproc_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name);
303         d->flags |= GV_DRIVE_THREAD_ACTIVE;
304 }
305
306 void
307 gv_save_config_all(struct gv_softc *sc)
308 {
309         struct gv_drive *d;
310
311         g_topology_assert();
312
313         LIST_FOREACH(d, &sc->drives, drive) {
314                 if (d->geom == NULL)
315                         continue;
316                 gv_save_config(NULL, d, sc);
317         }
318 }
319
320 /* Save the vinum configuration back to disk. */
321 void
322 gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc)
323 {
324         struct g_geom *gp;
325         struct g_consumer *cp2;
326         struct gv_hdr *vhdr, *hdr;
327         struct sbuf *sb;
328         int error;
329
330         g_topology_assert();
331
332         KASSERT(d != NULL, ("gv_save_config: null d"));
333         KASSERT(sc != NULL, ("gv_save_config: null sc"));
334
335         /*
336          * We can't save the config on a drive that isn't up, but drives that
337          * were just created aren't officially up yet, so we check a special
338          * flag.
339          */
340         if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN))
341                 return;
342
343         if (cp == NULL) {
344                 gp = d->geom;
345                 KASSERT(gp != NULL, ("gv_save_config: null gp"));
346                 cp2 = LIST_FIRST(&gp->consumer);
347                 KASSERT(cp2 != NULL, ("gv_save_config: null cp2"));
348         } else
349                 cp2 = cp;
350
351         vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
352         vhdr->magic = GV_MAGIC;
353         vhdr->config_length = GV_CFG_LEN;
354
355         hdr = d->hdr;
356         if (hdr == NULL) {
357                 printf("GEOM_VINUM: drive %s has NULL hdr\n", d->name);
358                 g_free(vhdr);
359                 return;
360         }
361         microtime(&hdr->label.last_update);
362         bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
363
364         sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
365         gv_format_config(sc, sb, 1, NULL);
366         sbuf_finish(sb);
367
368         error = g_access(cp2, 0, 1, 0);
369         if (error) {
370                 printf("GEOM_VINUM: g_access failed on drive %s, errno %d\n",
371                     d->name, error);
372                 sbuf_delete(sb);
373                 g_free(vhdr);
374                 return;
375         }
376         g_topology_unlock();
377
378         do {
379                 error = gv_write_header(cp2, vhdr);
380                 if (error) {
381                         printf("GEOM_VINUM: writing vhdr failed on drive %s, "
382                             "errno %d", d->name, error);
383                         break;
384                 }
385
386                 error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb),
387                     GV_CFG_LEN);
388                 if (error) {
389                         printf("GEOM_VINUM: writing first config copy failed "
390                             "on drive %s, errno %d", d->name, error);
391                         break;
392                 }
393                 
394                 error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN,
395                     sbuf_data(sb), GV_CFG_LEN);
396                 if (error)
397                         printf("GEOM_VINUM: writing second config copy failed "
398                             "on drive %s, errno %d", d->name, error);
399         } while (0);
400
401         g_topology_lock();
402         g_access(cp2, 0, -1, 0);
403         sbuf_delete(sb);
404         g_free(vhdr);
405
406         if (d->geom != NULL)
407                 gv_drive_modify(d);
408 }
409
410 /* This resembles g_slice_access(). */
411 static int
412 gv_drive_access(struct g_provider *pp, int dr, int dw, int de)
413 {
414         struct g_geom *gp;
415         struct g_consumer *cp;
416         struct g_provider *pp2;
417         struct gv_drive *d;
418         struct gv_sd *s, *s2;
419         int error;
420
421         gp = pp->geom;
422         cp = LIST_FIRST(&gp->consumer);
423         if (cp == NULL)
424                 return (0);
425
426         d = gp->softc;
427         if (d == NULL)
428                 return (0);
429
430         s = pp->private;
431         KASSERT(s != NULL, ("gv_drive_access: NULL s"));
432
433         LIST_FOREACH(s2, &d->subdisks, from_drive) {
434                 if (s == s2)
435                         continue;
436                 if (s->drive_offset + s->size <= s2->drive_offset)
437                         continue;
438                 if (s2->drive_offset + s2->size <= s->drive_offset)
439                         continue;
440
441                 /* Overlap. */
442                 pp2 = s2->provider;
443                 KASSERT(s2 != NULL, ("gv_drive_access: NULL s2"));
444                 if ((pp->acw + dw) > 0 && pp2->ace > 0)
445                         return (EPERM);
446                 if ((pp->ace + de) > 0 && pp2->acw > 0)
447                         return (EPERM);
448         }
449
450         error = g_access(cp, dr, dw, de);
451         return (error);
452 }
453
454 static void
455 gv_drive_done(struct bio *bp)
456 {
457         struct gv_drive *d;
458
459         /* Put the BIO on the worker queue again. */
460         d = bp->bio_from->geom->softc;
461         bp->bio_cflags |= GV_BIO_DONE;
462         mtx_lock(&d->bqueue_mtx);
463         bioq_insert_tail(d->bqueue, bp);
464         wakeup(d);
465         mtx_unlock(&d->bqueue_mtx);
466 }
467
468
469 static void
470 gv_drive_start(struct bio *bp)
471 {
472         struct gv_drive *d;
473         struct gv_sd *s;
474
475         switch (bp->bio_cmd) {
476         case BIO_READ:
477         case BIO_WRITE:
478         case BIO_DELETE:
479                 break;
480         case BIO_GETATTR:
481         default:
482                 g_io_deliver(bp, EOPNOTSUPP);
483                 return;
484         }
485
486         s = bp->bio_to->private;
487         if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) {
488                 g_io_deliver(bp, ENXIO);
489                 return;
490         }
491
492         d = bp->bio_to->geom->softc;
493
494         /*
495          * Put the BIO on the worker queue, where the worker thread will pick
496          * it up.
497          */
498         mtx_lock(&d->bqueue_mtx);
499         bioq_disksort(d->bqueue, bp);
500         wakeup(d);
501         mtx_unlock(&d->bqueue_mtx);
502
503 }
504
505 static void
506 gv_drive_worker(void *arg)
507 {
508         struct bio *bp, *cbp;
509         struct g_geom *gp;
510         struct g_provider *pp;
511         struct gv_drive *d;
512         struct gv_sd *s;
513         int error;
514
515         d = arg;
516
517         mtx_lock(&d->bqueue_mtx);
518         for (;;) {
519                 /* We were signaled to exit. */
520                 if (d->flags & GV_DRIVE_THREAD_DIE)
521                         break;
522
523                 /* Take the first BIO from out queue. */
524                 bp = bioq_takefirst(d->bqueue);
525                 if (bp == NULL) {
526                         msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10);
527                         continue;
528                 }
529                 mtx_unlock(&d->bqueue_mtx);
530  
531                 pp = bp->bio_to;
532                 gp = pp->geom;
533
534                 /* Completed request. */
535                 if (bp->bio_cflags & GV_BIO_DONE) {
536                         error = bp->bio_error;
537
538                         /* Deliver the original request. */
539                         g_std_done(bp);
540
541                         /* The request had an error, we need to clean up. */
542                         if (error != 0) {
543                                 g_topology_lock();
544                                 gv_set_drive_state(d, GV_DRIVE_DOWN,
545                                     GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
546                                 g_topology_unlock();
547                                 g_post_event(gv_drive_dead, d, M_WAITOK, d,
548                                     NULL);
549                         }
550
551                 /* New request, needs to be sent downwards. */
552                 } else {
553                         s = pp->private;
554
555                         if ((s->state == GV_SD_DOWN) ||
556                             (s->state == GV_SD_STALE)) {
557                                 g_io_deliver(bp, ENXIO);
558                                 mtx_lock(&d->bqueue_mtx);
559                                 continue;
560                         }
561                         if (bp->bio_offset > s->size) {
562                                 g_io_deliver(bp, EINVAL);
563                                 mtx_lock(&d->bqueue_mtx);
564                                 continue;
565                         }
566
567                         cbp = g_clone_bio(bp);
568                         if (cbp == NULL) {
569                                 g_io_deliver(bp, ENOMEM);
570                                 mtx_lock(&d->bqueue_mtx);
571                                 continue;
572                         }
573                         if (cbp->bio_offset + cbp->bio_length > s->size)
574                                 cbp->bio_length = s->size -
575                                     cbp->bio_offset;
576                         cbp->bio_done = gv_drive_done;
577                         cbp->bio_offset += s->drive_offset;
578                         g_io_request(cbp, LIST_FIRST(&gp->consumer));
579                 }
580
581                 mtx_lock(&d->bqueue_mtx);
582         }
583
584         while ((bp = bioq_takefirst(d->bqueue)) != NULL) {
585                 mtx_unlock(&d->bqueue_mtx);
586                 if (bp->bio_cflags & GV_BIO_DONE) 
587                         g_std_done(bp);
588                 else
589                         g_io_deliver(bp, ENXIO);
590                 mtx_lock(&d->bqueue_mtx);
591         }
592         mtx_unlock(&d->bqueue_mtx);
593         d->flags |= GV_DRIVE_THREAD_DEAD;
594
595         kproc_exit(ENXIO);
596 }
597
598
599 static void
600 gv_drive_orphan(struct g_consumer *cp)
601 {
602         struct g_geom *gp;
603         struct gv_drive *d;
604
605         g_topology_assert();
606         gp = cp->geom;
607         g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name);
608         d = gp->softc;
609         if (d != NULL) {
610                 gv_set_drive_state(d, GV_DRIVE_DOWN,
611                     GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
612                 g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL);
613         } else
614                 g_wither_geom(gp, ENXIO);
615 }
616
617 static struct g_geom *
618 gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
619 {
620         struct g_geom *gp, *gp2;
621         struct g_consumer *cp;
622         struct gv_drive *d;
623         struct gv_sd *s;
624         struct gv_softc *sc;
625         struct gv_freelist *fl;
626         struct gv_hdr *vhdr;
627         int error;
628         char *buf, errstr[ERRBUFSIZ];
629
630         vhdr = NULL;
631         d = NULL;
632
633         g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name);
634         g_topology_assert();
635
636         /* Find the VINUM class and its associated geom. */
637         gp2 = find_vinum_geom();
638         if (gp2 == NULL)
639                 return (NULL);
640         sc = gp2->softc;
641
642         gp = g_new_geomf(mp, "%s.vinumdrive", pp->name);
643         gp->start = gv_drive_start;
644         gp->orphan = gv_drive_orphan;
645         gp->access = gv_drive_access;
646         gp->start = gv_drive_start;
647
648         cp = g_new_consumer(gp);
649         g_attach(cp, pp);
650         error = g_access(cp, 1, 0, 0);
651         if (error) {
652                 g_detach(cp);
653                 g_destroy_consumer(cp);
654                 g_destroy_geom(gp);
655                 return (NULL);
656         }
657
658         g_topology_unlock();
659
660         /* Now check if the provided slice is a valid vinum drive. */
661         do {
662                 vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
663                 error = gv_read_header(cp, vhdr);
664                 if (error) {
665                         g_free(vhdr);
666                         break;
667                 }
668
669                 /* A valid vinum drive, let's parse the on-disk information. */
670                 buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, NULL);
671                 if (buf == NULL) {
672                         g_free(vhdr);
673                         break;
674                 }
675                 g_topology_lock();
676                 gv_parse_config(sc, buf, 1);
677                 g_free(buf);
678
679                 /*
680                  * Let's see if this drive is already known in the
681                  * configuration.
682                  */
683                 d = gv_find_drive(sc, vhdr->label.name);
684
685                 /* We already know about this drive. */
686                 if (d != NULL) {
687                         /* Check if this drive already has a geom. */
688                         if (d->geom != NULL) {
689                                 g_topology_unlock();
690                                 g_free(vhdr);
691                                 break;
692                         }
693                         bcopy(vhdr, d->hdr, sizeof(*vhdr));
694                         g_free(vhdr);
695
696                 /* This is a new drive. */
697                 } else {
698                         d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
699
700                         /* Initialize all needed variables. */
701                         d->size = pp->mediasize - GV_DATA_START;
702                         d->avail = d->size;
703                         d->hdr = vhdr;
704                         strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME);
705                         LIST_INIT(&d->subdisks);
706                         LIST_INIT(&d->freelist);
707
708                         /* We also need a freelist entry. */
709                         fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
710                         fl->offset = GV_DATA_START;
711                         fl->size = d->avail;
712                         LIST_INSERT_HEAD(&d->freelist, fl, freelist);
713                         d->freelist_entries = 1;
714
715                         /* Save it into the main configuration. */
716                         LIST_INSERT_HEAD(&sc->drives, d, drive);
717                 }
718
719                 /*
720                  * Create bio queue, queue mutex and a worker thread, if
721                  * necessary.
722                  */
723                 if (d->bqueue == NULL) {
724                         d->bqueue = g_malloc(sizeof(struct bio_queue_head),
725                             M_WAITOK | M_ZERO);
726                         bioq_init(d->bqueue);
727                 }
728                 if (mtx_initialized(&d->bqueue_mtx) == 0)
729                         mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
730
731                 if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) {
732                         kproc_create(gv_drive_worker, d, NULL, 0, 0,
733                             "gv_d %s", d->name);
734                         d->flags |= GV_DRIVE_THREAD_ACTIVE;
735                 }
736
737                 g_access(cp, -1, 0, 0);
738
739                 gp->softc = d;
740                 d->geom = gp;
741                 d->vinumconf = sc;
742                 strncpy(d->device, pp->name, GV_MAXDRIVENAME);
743
744                 /*
745                  * Find out which subdisks belong to this drive and crosslink
746                  * them.
747                  */
748                 LIST_FOREACH(s, &sc->subdisks, sd) {
749                         if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME))
750                                 /* XXX: errors ignored */
751                                 gv_sd_to_drive(sc, d, s, errstr,
752                                     sizeof(errstr));
753                 }
754
755                 /* This drive is now up for sure. */
756                 gv_set_drive_state(d, GV_DRIVE_UP, 0);
757
758                 /*
759                  * If there are subdisks on this drive, we need to create
760                  * providers for them.
761                  */ 
762                 if (d->sdcount)
763                         gv_drive_modify(d);
764
765                 return (gp);
766
767         } while (0);
768
769         g_topology_lock();
770         g_access(cp, -1, 0, 0);
771
772         g_detach(cp);
773         g_destroy_consumer(cp);
774         g_destroy_geom(gp);
775         return (NULL);
776 }
777
778 /*
779  * Modify the providers for the given drive 'd'.  It is assumed that the
780  * subdisk list of 'd' is already correctly set up.
781  */
782 void
783 gv_drive_modify(struct gv_drive *d)
784 {
785         struct g_geom *gp;
786         struct g_consumer *cp;
787         struct g_provider *pp, *pp2;
788         struct gv_sd *s;
789
790         KASSERT(d != NULL, ("gv_drive_modify: null d"));
791         gp = d->geom;
792         KASSERT(gp != NULL, ("gv_drive_modify: null gp"));
793         cp = LIST_FIRST(&gp->consumer);
794         KASSERT(cp != NULL, ("gv_drive_modify: null cp"));
795         pp = cp->provider;
796         KASSERT(pp != NULL, ("gv_drive_modify: null pp"));
797
798         g_topology_assert();
799
800         LIST_FOREACH(s, &d->subdisks, from_drive) {
801                 /* This subdisk already has a provider. */
802                 if (s->provider != NULL)
803                         continue;
804                 pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name);
805                 pp2->mediasize = s->size;
806                 pp2->sectorsize = pp->sectorsize;
807                 g_error_provider(pp2, 0);
808                 s->provider = pp2;
809                 pp2->private = s;
810         }
811 }
812
813 static void
814 gv_drive_dead(void *arg, int flag)
815 {
816         struct g_geom *gp;
817         struct g_consumer *cp;
818         struct gv_drive *d;
819         struct gv_sd *s;
820
821         g_topology_assert();
822         KASSERT(arg != NULL, ("gv_drive_dead: NULL arg"));
823
824         if (flag == EV_CANCEL)
825                 return;
826
827         d = arg;
828         if (d->state != GV_DRIVE_DOWN)
829                 return;
830
831         g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name);
832
833         gp = d->geom;
834         if (gp == NULL)
835                 return;
836
837         LIST_FOREACH(cp, &gp->consumer, consumer) {
838                 if (cp->nstart != cp->nend) {
839                         printf("GEOM_VINUM: dead drive '%s' has still "
840                             "active requests, can't detach consumer\n",
841                             d->name);
842                         g_post_event(gv_drive_dead, d, M_WAITOK, d,
843                             NULL);
844                         return;
845                 }
846                 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
847                         g_access(cp, -cp->acr, -cp->acw, -cp->ace);
848         }
849
850         printf("GEOM_VINUM: lost drive '%s'\n", d->name);
851         d->geom = NULL;
852         LIST_FOREACH(s, &d->subdisks, from_drive) {
853                 s->provider = NULL;
854                 s->consumer = NULL;
855         }
856         gv_kill_drive_thread(d);
857         gp->softc = NULL;
858         g_wither_geom(gp, ENXIO);
859 }
860
861 static int
862 gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp,
863     struct g_geom *gp)
864 {
865         struct gv_drive *d;
866
867         g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name);
868         g_topology_assert();
869
870         d = gp->softc;
871         gv_kill_drive_thread(d);
872
873         g_wither_geom(gp, ENXIO);
874         return (0);
875 }
876
877 #define VINUMDRIVE_CLASS_NAME "VINUMDRIVE"
878
879 static struct g_class g_vinum_drive_class = {
880         .name = VINUMDRIVE_CLASS_NAME,
881         .version = G_VERSION,
882         .taste = gv_drive_taste,
883         .destroy_geom = gv_drive_destroy_geom
884 };
885
886 DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive);