]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - sys/geom/vinum/geom_vinum_plex.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / sys / geom / vinum / geom_vinum_plex.c
1 /*-
2  * Copyright (c) 2004 Lukas Ertl
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/kernel.h>
33 #include <sys/kthread.h>
34 #include <sys/libkern.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/mutex.h>
39 #include <sys/systm.h>
40
41 #include <geom/geom.h>
42 #include <geom/vinum/geom_vinum_var.h>
43 #include <geom/vinum/geom_vinum_raid5.h>
44 #include <geom/vinum/geom_vinum.h>
45
46 static void gv_plex_completed_request(struct gv_plex *, struct bio *);
47 static void gv_plex_normal_request(struct gv_plex *, struct bio *);
48 static void gv_plex_worker(void *);
49 static int gv_check_parity(struct gv_plex *, struct bio *,
50     struct gv_raid5_packet *);
51 static int gv_normal_parity(struct gv_plex *, struct bio *,
52     struct gv_raid5_packet *);
53
54 /* XXX: is this the place to catch dying subdisks? */
55 static void
56 gv_plex_orphan(struct g_consumer *cp)
57 {
58         struct g_geom *gp;
59         struct gv_plex *p;
60         int error;
61
62         g_topology_assert();
63         gp = cp->geom;
64         g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name);
65
66         if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
67                 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
68         error = cp->provider->error;
69         if (error == 0)
70                 error = ENXIO;
71         g_detach(cp);
72         g_destroy_consumer(cp); 
73         if (!LIST_EMPTY(&gp->consumer))
74                 return;
75
76         p = gp->softc;
77         if (p != NULL) {
78                 gv_kill_plex_thread(p);
79                 p->geom = NULL;
80                 p->provider = NULL;
81                 p->consumer = NULL;
82         }
83         gp->softc = NULL;
84         g_wither_geom(gp, error);
85 }
86
87 void
88 gv_plex_done(struct bio *bp)
89 {
90         struct gv_plex *p;
91
92         p = bp->bio_from->geom->softc;
93         bp->bio_cflags |= GV_BIO_DONE;
94         mtx_lock(&p->bqueue_mtx);
95         bioq_insert_tail(p->bqueue, bp);
96         wakeup(p);
97         mtx_unlock(&p->bqueue_mtx);
98 }
99
100 /* Find the correct subdisk to send the bio to and build a bio to send. */
101 static int
102 gv_plexbuffer(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff, off_t bcount)
103 {
104         struct g_geom *gp;
105         struct gv_sd *s;
106         struct bio *cbp, *pbp;
107         int i, sdno;
108         off_t len_left, real_len, real_off;
109         off_t stripeend, stripeno, stripestart;
110
111         if (p == NULL || LIST_EMPTY(&p->subdisks))
112                 return (ENXIO);
113
114         s = NULL;
115         gp = bp->bio_to->geom;
116
117         /*
118          * We only handle concatenated and striped plexes here.  RAID5 plexes
119          * are handled in build_raid5_request().
120          */
121         switch (p->org) {
122         case GV_PLEX_CONCAT:
123                 /*
124                  * Find the subdisk where this request starts.  The subdisks in
125                  * this list must be ordered by plex_offset.
126                  */
127                 LIST_FOREACH(s, &p->subdisks, in_plex) {
128                         if (s->plex_offset <= boff &&
129                             s->plex_offset + s->size > boff)
130                                 break;
131                 }
132                 /* Subdisk not found. */
133                 if (s == NULL)
134                         return (ENXIO);
135
136                 /* Calculate corresponding offsets on disk. */
137                 real_off = boff - s->plex_offset;
138                 len_left = s->size - real_off;
139                 real_len = (bcount > len_left) ? len_left : bcount;
140                 break;
141
142         case GV_PLEX_STRIPED:
143                 /* The number of the stripe where the request starts. */
144                 stripeno = boff / p->stripesize;
145
146                 /* The number of the subdisk where the stripe resides. */
147                 sdno = stripeno % p->sdcount;
148
149                 /* Find the right subdisk. */
150                 i = 0;
151                 LIST_FOREACH(s, &p->subdisks, in_plex) {
152                         if (i == sdno)
153                                 break;
154                         i++;
155                 }
156
157                 /* Subdisk not found. */
158                 if (s == NULL)
159                         return (ENXIO);
160
161                 /* The offset of the stripe from the start of the subdisk. */ 
162                 stripestart = (stripeno / p->sdcount) *
163                     p->stripesize;
164
165                 /* The offset at the end of the stripe. */
166                 stripeend = stripestart + p->stripesize;
167
168                 /* The offset of the request on this subdisk. */
169                 real_off = boff - (stripeno * p->stripesize) +
170                     stripestart;
171
172                 /* The length left in this stripe. */
173                 len_left = stripeend - real_off;
174
175                 real_len = (bcount <= len_left) ? bcount : len_left;
176                 break;
177
178         default:
179                 return (EINVAL);
180         }
181
182         /* Now check if we can handle the request on this subdisk. */
183         switch (s->state) {
184         case GV_SD_UP:
185                 /* If the subdisk is up, just continue. */
186                 break;
187
188         case GV_SD_STALE:
189                 if (!(bp->bio_cflags & GV_BIO_SYNCREQ))
190                         return (ENXIO);
191
192                 printf("GEOM_VINUM: sd %s is initializing\n", s->name);
193                 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
194                 break;
195
196         case GV_SD_INITIALIZING:
197                 if (bp->bio_cmd == BIO_READ)
198                         return (ENXIO);
199                 break;
200
201         default:
202                 /* All other subdisk states mean it's not accessible. */
203                 return (ENXIO);
204         }
205
206         /* Clone the bio and adjust the offsets and sizes. */
207         cbp = g_clone_bio(bp);
208         if (cbp == NULL)
209                 return (ENOMEM);
210         cbp->bio_offset = real_off;
211         cbp->bio_length = real_len;
212         cbp->bio_data = addr;
213         cbp->bio_done = g_std_done;
214         cbp->bio_caller2 = s->consumer;
215         if ((bp->bio_cflags & GV_BIO_SYNCREQ)) {
216                 cbp->bio_cflags |= GV_BIO_SYNCREQ;
217                 cbp->bio_done = gv_plex_done;
218         }
219
220         if (bp->bio_driver1 == NULL) {
221                 bp->bio_driver1 = cbp;
222         } else {
223                 pbp = bp->bio_driver1;
224                 while (pbp->bio_caller1 != NULL)
225                         pbp = pbp->bio_caller1;
226                 pbp->bio_caller1 = cbp;
227         }
228
229         return (0);
230 }
231
232 static void
233 gv_plex_start(struct bio *bp)
234 {
235         struct gv_plex *p;
236
237         switch(bp->bio_cmd) {
238         case BIO_READ:
239         case BIO_WRITE:
240         case BIO_DELETE:
241                 break;
242         case BIO_GETATTR:
243         default:
244                 g_io_deliver(bp, EOPNOTSUPP);
245                 return;
246         }
247
248         /*
249          * We cannot handle this request if too many of our subdisks are
250          * inaccessible.
251          */
252         p = bp->bio_to->geom->softc;
253         if ((p->state < GV_PLEX_DEGRADED) &&
254             !(bp->bio_cflags & GV_BIO_SYNCREQ)) {
255                 g_io_deliver(bp, ENXIO);
256                 return;
257         }
258
259         mtx_lock(&p->bqueue_mtx);
260         bioq_disksort(p->bqueue, bp);
261         wakeup(p);
262         mtx_unlock(&p->bqueue_mtx);
263 }
264
265 static void
266 gv_plex_worker(void *arg)
267 {
268         struct bio *bp;
269         struct gv_plex *p;
270         struct gv_sd *s;
271
272         p = arg;
273         KASSERT(p != NULL, ("NULL p"));
274
275         mtx_lock(&p->bqueue_mtx);
276         for (;;) {
277                 /* We were signaled to exit. */
278                 if (p->flags & GV_PLEX_THREAD_DIE)
279                         break;
280
281                 /* Take the first BIO from our queue. */
282                 bp = bioq_takefirst(p->bqueue);
283                 if (bp == NULL) {
284                         msleep(p, &p->bqueue_mtx, PRIBIO, "-", hz/10);
285                         continue;
286                 }
287                 mtx_unlock(&p->bqueue_mtx);
288
289                 /* A completed request. */
290                 if (bp->bio_cflags & GV_BIO_DONE) {
291                         if (bp->bio_cflags & GV_BIO_SYNCREQ ||
292                             bp->bio_cflags & GV_BIO_REBUILD) {
293                                 s = bp->bio_to->private;
294                                 if (bp->bio_error == 0)
295                                         s->initialized += bp->bio_length;
296                                 if (s->initialized >= s->size) {
297                                         g_topology_lock();
298                                         gv_set_sd_state(s, GV_SD_UP,
299                                             GV_SETSTATE_CONFIG);
300                                         g_topology_unlock();
301                                         s->initialized = 0;
302                                 }
303                         }
304
305                         if (bp->bio_cflags & GV_BIO_SYNCREQ)
306                                 g_std_done(bp);
307                         else
308                                 gv_plex_completed_request(p, bp);
309                 /*
310                  * A sub-request that was hold back because it interfered with
311                  * another sub-request.
312                  */
313                 } else if (bp->bio_cflags & GV_BIO_ONHOLD) {
314                         /* Is it still locked out? */
315                         if (gv_stripe_active(p, bp)) {
316                                 /* Park the bio on the waiting queue. */
317                                 mtx_lock(&p->bqueue_mtx);
318                                 bioq_disksort(p->wqueue, bp);
319                                 mtx_unlock(&p->bqueue_mtx);
320                         } else {
321                                 bp->bio_cflags &= ~GV_BIO_ONHOLD;
322                                 g_io_request(bp, bp->bio_caller2);
323                         }
324
325                 /* A normal request to this plex. */
326                 } else
327                         gv_plex_normal_request(p, bp);
328
329                 mtx_lock(&p->bqueue_mtx);
330         }
331         mtx_unlock(&p->bqueue_mtx);
332         p->flags |= GV_PLEX_THREAD_DEAD;
333         wakeup(p);
334
335         kthread_exit(ENXIO);
336 }
337
338 static int
339 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
340 {
341         struct bio *cbp, *pbp;
342         int finished, i;
343
344         finished = 1;
345
346         if (wp->waiting != NULL) {
347                 pbp = wp->waiting;
348                 wp->waiting = NULL;
349                 cbp = wp->parity;
350                 for (i = 0; i < wp->length; i++)
351                         cbp->bio_data[i] ^= pbp->bio_data[i];
352                 g_io_request(pbp, pbp->bio_caller2);
353                 finished = 0;
354
355         } else if (wp->parity != NULL) {
356                 cbp = wp->parity;
357                 wp->parity = NULL;
358                 g_io_request(cbp, cbp->bio_caller2);
359                 finished = 0;
360         }
361
362         return (finished);
363 }
364
365 static int
366 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
367 {
368         struct bio *pbp;
369         int err, finished, i;
370
371         err = 0;
372         finished = 1;
373
374         if (wp->waiting != NULL) {
375                 pbp = wp->waiting;
376                 wp->waiting = NULL;
377                 g_io_request(pbp, pbp->bio_caller2);
378                 finished = 0;
379
380         } else if (wp->parity != NULL) {
381                 pbp = wp->parity;
382                 wp->parity = NULL;
383
384                 /* Check if the parity is correct. */
385                 for (i = 0; i < wp->length; i++) {
386                         if (bp->bio_data[i] != pbp->bio_data[i]) {
387                                 err = 1;
388                                 break;
389                         }
390                 }
391
392                 /* The parity is not correct... */
393                 if (err) {
394                         bp->bio_parent->bio_error = EAGAIN;
395
396                         /* ... but we rebuild it. */
397                         if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) {
398                                 g_io_request(pbp, pbp->bio_caller2);
399                                 finished = 0;
400                         }
401                 }
402
403                 /*
404                  * Clean up the BIO we would have used for rebuilding the
405                  * parity.
406                  */
407                 if (finished) {
408                         bp->bio_parent->bio_inbed++;
409                         g_destroy_bio(pbp);
410                 }
411
412         }
413
414         return (finished);
415 }
416
417 void
418 gv_plex_completed_request(struct gv_plex *p, struct bio *bp)
419 {
420         struct bio *cbp, *pbp;
421         struct gv_bioq *bq, *bq2;
422         struct gv_raid5_packet *wp;
423         int i;
424
425         wp = bp->bio_driver1;
426
427         switch (bp->bio_parent->bio_cmd) {
428         case BIO_READ:
429                 if (wp == NULL)
430                         break;
431
432                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
433                         if (bq->bp == bp) {
434                                 TAILQ_REMOVE(&wp->bits, bq, queue);
435                                 g_free(bq);
436                                 for (i = 0; i < wp->length; i++)
437                                         wp->data[i] ^= bp->bio_data[i];
438                                 break;
439                         }
440                 }
441                 if (TAILQ_EMPTY(&wp->bits)) {
442                         bp->bio_parent->bio_completed += wp->length;
443                         if (wp->lockbase != -1) {
444                                 TAILQ_REMOVE(&p->packets, wp, list);
445                                 /* Bring the waiting bios back into the game. */
446                                 mtx_lock(&p->bqueue_mtx);
447                                 pbp = bioq_takefirst(p->wqueue);
448                                 while (pbp != NULL) {
449                                         bioq_disksort(p->bqueue, pbp);
450                                         pbp = bioq_takefirst(p->wqueue);
451                                 }
452                                 mtx_unlock(&p->bqueue_mtx);
453                         }
454                         g_free(wp);
455                 }
456
457                 break;
458
459         case BIO_WRITE:
460                 if (wp == NULL)
461                         break;
462
463                 /* Check if we need to handle parity data. */
464                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
465                         if (bq->bp == bp) {
466                                 TAILQ_REMOVE(&wp->bits, bq, queue);
467                                 g_free(bq);
468                                 cbp = wp->parity;
469                                 if (cbp != NULL) {
470                                         for (i = 0; i < wp->length; i++)
471                                                 cbp->bio_data[i] ^=
472                                                     bp->bio_data[i];
473                                 }
474                                 break;
475                         }
476                 }
477
478                 /* Handle parity data. */
479                 if (TAILQ_EMPTY(&wp->bits)) {
480                         if (bp->bio_parent->bio_cflags & GV_BIO_CHECK)
481                                 i = gv_check_parity(p, bp, wp);
482                         else
483                                 i = gv_normal_parity(p, bp, wp);
484
485                         /* All of our sub-requests have finished. */
486                         if (i) {
487                                 bp->bio_parent->bio_completed += wp->length;
488                                 TAILQ_REMOVE(&p->packets, wp, list);
489                                 /* Bring the waiting bios back into the game. */
490                                 mtx_lock(&p->bqueue_mtx);
491                                 pbp = bioq_takefirst(p->wqueue);
492                                 while (pbp != NULL) {
493                                         bioq_disksort(p->bqueue, pbp);
494                                         pbp = bioq_takefirst(p->wqueue);
495                                 }
496                                 mtx_unlock(&p->bqueue_mtx);
497                                 g_free(wp);
498                         }
499                 }
500
501                 break;
502         }
503
504         pbp = bp->bio_parent;
505         if (pbp->bio_error == 0)
506                 pbp->bio_error = bp->bio_error;
507
508         /* When the original request is finished, we deliver it. */
509         pbp->bio_inbed++;
510         if (pbp->bio_inbed == pbp->bio_children)
511                 g_io_deliver(pbp, pbp->bio_error);
512
513         /* Clean up what we allocated. */
514         if (bp->bio_cflags & GV_BIO_MALLOC)
515                 g_free(bp->bio_data);
516         g_destroy_bio(bp);
517 }
518
519 void
520 gv_plex_normal_request(struct gv_plex *p, struct bio *bp)
521 {
522         struct bio *cbp, *pbp;
523         struct gv_bioq *bq, *bq2;
524         struct gv_raid5_packet *wp, *wp2;
525         caddr_t addr;
526         off_t bcount, boff;
527         int err;
528
529         bcount = bp->bio_length;
530         addr = bp->bio_data;
531         boff = bp->bio_offset;
532
533         /* Walk over the whole length of the request, we might split it up. */
534         while (bcount > 0) {
535                 wp = NULL;
536
537                 /*
538                  * RAID5 plexes need special treatment, as a single write
539                  * request involves several read/write sub-requests.
540                  */
541                 if (p->org == GV_PLEX_RAID5) {
542                         wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
543                         wp->bio = bp;
544                         TAILQ_INIT(&wp->bits);
545
546                         if (bp->bio_cflags & GV_BIO_REBUILD)
547                                 err = gv_rebuild_raid5(p, wp, bp, addr,
548                                     boff, bcount);
549                         else if (bp->bio_cflags & GV_BIO_CHECK)
550                                 err = gv_check_raid5(p, wp, bp, addr,
551                                     boff, bcount);
552                         else
553                                 err = gv_build_raid5_req(p, wp, bp, addr,
554                                     boff, bcount);
555
556                         /*
557                          * Building the sub-request failed, we probably need to
558                          * clean up a lot.
559                          */
560                         if (err) {
561                                 printf("GEOM_VINUM: plex request failed for ");
562                                 g_print_bio(bp);
563                                 printf("\n");
564                                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
565                                         TAILQ_REMOVE(&wp->bits, bq, queue);
566                                         g_free(bq);
567                                 }
568                                 if (wp->waiting != NULL) {
569                                         if (wp->waiting->bio_cflags &
570                                             GV_BIO_MALLOC)
571                                                 g_free(wp->waiting->bio_data);
572                                         g_destroy_bio(wp->waiting);
573                                 }
574                                 if (wp->parity != NULL) {
575                                         if (wp->parity->bio_cflags &
576                                             GV_BIO_MALLOC)
577                                                 g_free(wp->parity->bio_data);
578                                         g_destroy_bio(wp->parity);
579                                 }
580                                 g_free(wp);
581
582                                 TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
583                                         if (wp->bio == bp) {
584                                                 TAILQ_REMOVE(&p->packets, wp,
585                                                     list);
586                                                 TAILQ_FOREACH_SAFE(bq,
587                                                     &wp->bits, queue, bq2) {
588                                                         TAILQ_REMOVE(&wp->bits,
589                                                             bq, queue);
590                                                         g_free(bq);
591                                                 }
592                                                 g_free(wp);
593                                         }
594                                 }
595
596                                 cbp = bp->bio_driver1;
597                                 while (cbp != NULL) {
598                                         pbp = cbp->bio_caller1;
599                                         if (cbp->bio_cflags & GV_BIO_MALLOC)
600                                                 g_free(cbp->bio_data);
601                                         g_destroy_bio(cbp);
602                                         cbp = pbp;
603                                 }
604
605                                 g_io_deliver(bp, err);
606                                 return;
607                         }
608  
609                         if (TAILQ_EMPTY(&wp->bits))
610                                 g_free(wp);
611                         else if (wp->lockbase != -1)
612                                 TAILQ_INSERT_TAIL(&p->packets, wp, list);
613
614                 /*
615                  * Requests to concatenated and striped plexes go straight
616                  * through.
617                  */
618                 } else {
619                         err = gv_plexbuffer(p, bp, addr, boff, bcount);
620
621                         /* Building the sub-request failed. */
622                         if (err) {
623                                 printf("GEOM_VINUM: plex request failed for ");
624                                 g_print_bio(bp);
625                                 printf("\n");
626                                 cbp = bp->bio_driver1;
627                                 while (cbp != NULL) {
628                                         pbp = cbp->bio_caller1;
629                                         g_destroy_bio(cbp);
630                                         cbp = pbp;
631                                 }
632                                 g_io_deliver(bp, err);
633                                 return;
634                         }
635                 }
636  
637                 /* Abuse bio_caller1 as linked list. */
638                 pbp = bp->bio_driver1;
639                 while (pbp->bio_caller1 != NULL)
640                         pbp = pbp->bio_caller1;
641                 bcount -= pbp->bio_length;
642                 addr += pbp->bio_length;
643                 boff += pbp->bio_length;
644         }
645
646         /* Fire off all sub-requests. */
647         pbp = bp->bio_driver1;
648         while (pbp != NULL) {
649                 /*
650                  * RAID5 sub-requests need to come in correct order, otherwise
651                  * we trip over the parity, as it might be overwritten by
652                  * another sub-request.
653                  */
654                 if (pbp->bio_driver1 != NULL &&
655                     gv_stripe_active(p, pbp)) {
656                         /* Park the bio on the waiting queue. */
657                         pbp->bio_cflags |= GV_BIO_ONHOLD;
658                         mtx_lock(&p->bqueue_mtx);
659                         bioq_disksort(p->wqueue, pbp);
660                         mtx_unlock(&p->bqueue_mtx);
661                 } else
662                         g_io_request(pbp, pbp->bio_caller2);
663                 pbp = pbp->bio_caller1;
664         }
665 }
666
667 static int
668 gv_plex_access(struct g_provider *pp, int dr, int dw, int de)
669 {
670         struct gv_plex *p;
671         struct g_geom *gp;
672         struct g_consumer *cp, *cp2;
673         int error;
674
675         gp = pp->geom;
676         p = gp->softc;
677         KASSERT(p != NULL, ("NULL p"));
678
679         if (p->org == GV_PLEX_RAID5) {
680                 if (dw > 0 && dr == 0)
681                         dr = 1;
682                 else if (dw < 0 && dr == 0)
683                         dr = -1;
684         }
685
686         LIST_FOREACH(cp, &gp->consumer, consumer) {
687                 error = g_access(cp, dr, dw, de);
688                 if (error) {
689                         LIST_FOREACH(cp2, &gp->consumer, consumer) {
690                                 if (cp == cp2)
691                                         break;
692                                 g_access(cp2, -dr, -dw, -de);
693                         }
694                         return (error);
695                 }
696         }
697         return (0);
698 }
699
700 static struct g_geom *
701 gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
702 {
703         struct g_geom *gp;
704         struct g_consumer *cp, *cp2;
705         struct g_provider *pp2;
706         struct gv_plex *p;
707         struct gv_sd *s;
708         struct gv_softc *sc;
709         int error;
710
711         g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name);
712         g_topology_assert();
713
714         /* We only want to attach to subdisks. */
715         if (strcmp(pp->geom->class->name, "VINUMDRIVE"))
716                 return (NULL);
717
718         /* Find the VINUM class and its associated geom. */
719         gp = find_vinum_geom();
720         if (gp == NULL)
721                 return (NULL);
722         sc = gp->softc;
723         KASSERT(sc != NULL, ("gv_plex_taste: NULL sc"));
724
725         /* Find out which subdisk the offered provider corresponds to. */
726         s = pp->private;
727         KASSERT(s != NULL, ("gv_plex_taste: NULL s"));
728
729         /* Now find the correct plex where this subdisk belongs to. */
730         p = gv_find_plex(sc, s->plex);
731         if (p == NULL) {
732                 printf("gv_plex_taste: NULL p for '%s'\n", s->name);
733                 return (NULL);
734         }
735
736         /*
737          * Add this subdisk to this plex.  Since we trust the on-disk
738          * configuration, we don't check the given value (should we?).
739          * XXX: shouldn't be done here
740          */
741         gv_sd_to_plex(p, s, 0);
742
743         /* Now check if there's already a geom for this plex. */
744         gp = p->geom;
745
746         /* Yes, there is already a geom, so we just add the consumer. */
747         if (gp != NULL) {
748                 cp2 = LIST_FIRST(&gp->consumer);
749                 /* Need to attach a new consumer to this subdisk. */
750                 cp = g_new_consumer(gp);
751                 error = g_attach(cp, pp);
752                 if (error) {
753                         printf("geom_vinum: couldn't attach consumer to %s\n",
754                             pp->name);
755                         g_destroy_consumer(cp);
756                         return (NULL);
757                 }
758                 /* Adjust the access counts of the new consumer. */
759                 if ((cp2 != NULL) && (cp2->acr || cp2->acw || cp2->ace)) {
760                         error = g_access(cp, cp2->acr, cp2->acw, cp2->ace);
761                         if (error) {
762                                 printf("geom_vinum: couldn't set access counts"
763                                     " for consumer on %s\n", pp->name);
764                                 g_detach(cp);
765                                 g_destroy_consumer(cp);
766                                 return (NULL);
767                         }
768                 }
769                 s->consumer = cp;
770
771                 /* Adjust the size of the providers this plex has. */
772                 LIST_FOREACH(pp2, &gp->provider, provider)
773                         pp2->mediasize = p->size;
774
775                 /* Update the size of the volume this plex is attached to. */
776                 if (p->vol_sc != NULL)
777                         gv_update_vol_size(p->vol_sc, p->size);
778
779                 /*
780                  * If necessary, create bio queues, queue mutex and a worker
781                  * thread.
782                  */
783                 if (p->bqueue == NULL) {
784                         p->bqueue = g_malloc(sizeof(struct bio_queue_head),
785                             M_WAITOK | M_ZERO);
786                         bioq_init(p->bqueue);
787                 }
788                 if (p->wqueue == NULL) {
789                         p->wqueue = g_malloc(sizeof(struct bio_queue_head),
790                             M_WAITOK | M_ZERO);
791                         bioq_init(p->wqueue);
792                 }
793                 if (mtx_initialized(&p->bqueue_mtx) == 0)
794                         mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
795                 if (!(p->flags & GV_PLEX_THREAD_ACTIVE)) {
796                         kthread_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
797                             p->name);
798                         p->flags |= GV_PLEX_THREAD_ACTIVE;
799                 }
800
801                 return (NULL);
802
803         /* We need to create a new geom. */
804         } else {
805                 gp = g_new_geomf(mp, "%s", p->name);
806                 gp->start = gv_plex_start;
807                 gp->orphan = gv_plex_orphan;
808                 gp->access = gv_plex_access;
809                 gp->softc = p;
810                 p->geom = gp;
811
812                 TAILQ_INIT(&p->packets);
813                 p->bqueue = g_malloc(sizeof(struct bio_queue_head),
814                     M_WAITOK | M_ZERO);
815                 bioq_init(p->bqueue);
816                 p->wqueue = g_malloc(sizeof(struct bio_queue_head),
817                     M_WAITOK | M_ZERO);
818                 bioq_init(p->wqueue);
819                 mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
820                 kthread_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
821                     p->name);
822                 p->flags |= GV_PLEX_THREAD_ACTIVE;
823
824                 /* Attach a consumer to this provider. */
825                 cp = g_new_consumer(gp);
826                 g_attach(cp, pp);
827                 s->consumer = cp;
828
829                 /* Create a provider for the outside world. */
830                 pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name);
831                 pp2->mediasize = p->size;
832                 pp2->sectorsize = pp->sectorsize;
833                 p->provider = pp2;
834                 g_error_provider(pp2, 0);
835                 return (gp);
836         }
837 }
838
839 static int
840 gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp,
841     struct g_geom *gp)
842 {
843         struct gv_plex *p;
844
845         g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name);
846         g_topology_assert();
847
848         p = gp->softc;
849
850         KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name));
851
852         /*
853          * If this is a RAID5 plex, check if its worker thread is still active
854          * and signal it to self destruct.
855          */
856         gv_kill_plex_thread(p);
857         /* g_free(sc); */
858         g_wither_geom(gp, ENXIO);
859         return (0);
860 }
861
862 #define VINUMPLEX_CLASS_NAME "VINUMPLEX"
863
864 static struct g_class g_vinum_plex_class = {
865         .name = VINUMPLEX_CLASS_NAME,
866         .version = G_VERSION,
867         .taste = gv_plex_taste,
868         .destroy_geom = gv_plex_destroy_geom,
869 };
870
871 DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex);