sys/geom/vinum/geom_vinum_plex.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3  *
   4  * Copyright (c) 2004, 2007 Lukas Ertl
   5  * Copyright (c) 2007, 2009 Ulf Lilleengen
   6  * All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  */
  29
  30 #include <sys/cdefs.h>
  31 __FBSDID("$FreeBSD$");
  32
  33 #include <sys/param.h>
  34 #include <sys/bio.h>
  35 #include <sys/lock.h>
  36 #include <sys/malloc.h>
  37 #include <sys/systm.h>
  38
  39 #include <geom/geom.h>
  40 #include <geom/geom_dbg.h>
  41 #include <geom/vinum/geom_vinum_var.h>
  42 #include <geom/vinum/geom_vinum_raid5.h>
  43 #include <geom/vinum/geom_vinum.h>
  44
  45 static int      gv_check_parity(struct gv_plex *, struct bio *,
  46                     struct gv_raid5_packet *);
  47 static int      gv_normal_parity(struct gv_plex *, struct bio *,
  48                     struct gv_raid5_packet *);
  49 static void     gv_plex_flush(struct gv_plex *);
  50 static int      gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *,
  51                     int *, int);
  52 static int      gv_plex_normal_request(struct gv_plex *, struct bio *, off_t,
  53                     off_t,  caddr_t);
  54 static void     gv_post_bio(struct gv_softc *, struct bio *);
  55
  56 void
  57 gv_plex_start(struct gv_plex *p, struct bio *bp)
  58 {
  59         struct bio *cbp;
  60         struct gv_sd *s;
  61         struct gv_raid5_packet *wp;
  62         caddr_t addr;
  63         off_t bcount, boff, len;
  64
  65         bcount = bp->bio_length;
  66         addr = bp->bio_data;
  67         boff = bp->bio_offset;
  68
  69         /* Walk over the whole length of the request, we might split it up. */
  70         while (bcount > 0) {
  71                 wp = NULL;
  72
  73                 /*
  74                  * RAID5 plexes need special treatment, as a single request
  75                  * might involve several read/write sub-requests.
  76                  */
  77                 if (p->org == GV_PLEX_RAID5) {
  78                         wp = gv_raid5_start(p, bp, addr, boff, bcount);
  79                         if (wp == NULL)
  80                                 return;
  81
  82                         len = wp->length;
  83
  84                         if (TAILQ_EMPTY(&wp->bits))
  85                                 g_free(wp);
  86                         else if (wp->lockbase != -1)
  87                                 TAILQ_INSERT_TAIL(&p->packets, wp, list);
  88
  89                 /*
  90                  * Requests to concatenated and striped plexes go straight
  91                  * through.
  92                  */
  93                 } else {
  94                         len = gv_plex_normal_request(p, bp, boff, bcount, addr);
  95                 }
  96                 if (len < 0)
  97                         return;
  98
  99                 bcount -= len;
 100                 addr += len;
 101                 boff += len;
 102         }
 103
 104         /*
 105          * Fire off all sub-requests.  We get the correct consumer (== drive)
 106          * to send each request to via the subdisk that was stored in
 107          * cbp->bio_caller1.
 108          */
 109         cbp = bioq_takefirst(p->bqueue);
 110         while (cbp != NULL) {
 111                 /*
 112                  * RAID5 sub-requests need to come in correct order, otherwise
 113                  * we trip over the parity, as it might be overwritten by
 114                  * another sub-request.  We abuse cbp->bio_caller2 to mark
 115                  * potential overlap situations.
 116                  */
 117                 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) {
 118                         /* Park the bio on the waiting queue. */
 119                         cbp->bio_pflags |= GV_BIO_ONHOLD;
 120                         bioq_disksort(p->wqueue, cbp);
 121                 } else {
 122                         s = cbp->bio_caller1;
 123                         g_io_request(cbp, s->drive_sc->consumer);
 124                 }
 125                 cbp = bioq_takefirst(p->bqueue);
 126         }
 127 }
 128
 129 static int
 130 gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
 131     off_t *real_len, int *sdno, int growing)
 132 {
 133         struct gv_sd *s;
 134         int i, sdcount;
 135         off_t len_left, stripeend, stripeno, stripestart;
 136
 137         switch (p->org) {
 138         case GV_PLEX_CONCAT:
 139                 /*
 140                  * Find the subdisk where this request starts.  The subdisks in
 141                  * this list must be ordered by plex_offset.
 142                  */
 143                 i = 0;
 144                 LIST_FOREACH(s, &p->subdisks, in_plex) {
 145                         if (s->plex_offset <= boff &&
 146                             s->plex_offset + s->size > boff) {
 147                                 *sdno = i;
 148                                 break;
 149                         }
 150                         i++;
 151                 }
 152                 if (s == NULL || s->drive_sc == NULL)
 153                         return (GV_ERR_NOTFOUND);
 154
 155                 /* Calculate corresponding offsets on disk. */
 156                 *real_off = boff - s->plex_offset;
 157                 len_left = s->size - (*real_off);
 158                 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
 159                 *real_len = (bcount > len_left) ? len_left : bcount;
 160                 break;
 161
 162         case GV_PLEX_STRIPED:
 163                 /* The number of the stripe where the request starts. */
 164                 stripeno = boff / p->stripesize;
 165                 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0"));
 166
 167                 /* Take growing subdisks into account when calculating. */
 168                 sdcount = gv_sdcount(p, (boff >= p->synced));
 169
 170                 if (!(boff + bcount <= p->synced) &&
 171                     (p->flags & GV_PLEX_GROWING) &&
 172                     !growing)
 173                         return (GV_ERR_ISBUSY);
 174                 *sdno = stripeno % sdcount;
 175
 176                 KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0"));
 177                 stripestart = (stripeno / sdcount) *
 178                     p->stripesize;
 179                 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0"));
 180                 stripeend = stripestart + p->stripesize;
 181                 *real_off = boff - (stripeno * p->stripesize) +
 182                     stripestart;
 183                 len_left = stripeend - *real_off;
 184                 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
 185
 186                 *real_len = (bcount <= len_left) ? bcount : len_left;
 187                 break;
 188
 189         default:
 190                 return (GV_ERR_PLEXORG);
 191         }
 192         return (0);
 193 }
 194
 195 /*
 196  * Prepare a normal plex request.
 197  */
 198 static int
 199 gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
 200     off_t bcount,  caddr_t addr)
 201 {
 202         struct gv_sd *s;
 203         struct bio *cbp;
 204         off_t real_len, real_off;
 205         int i, err, sdno;
 206
 207         s = NULL;
 208         sdno = -1;
 209         real_len = real_off = 0;
 210
 211         err = ENXIO;
 212
 213         if (p == NULL || LIST_EMPTY(&p->subdisks))
 214                 goto bad;
 215
 216         err = gv_plex_offset(p, boff, bcount, &real_off,
 217             &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW));
 218         /* If the request was blocked, put it into wait. */
 219         if (err == GV_ERR_ISBUSY) {
 220                 bioq_disksort(p->rqueue, bp);
 221                 return (-1); /* "Fail", and delay request. */
 222         }
 223         if (err) {
 224                 err = ENXIO;
 225                 goto bad;
 226         }
 227         err = ENXIO;
 228
 229         /* Find the right subdisk. */
 230         i = 0;
 231         LIST_FOREACH(s, &p->subdisks, in_plex) {
 232                 if (i == sdno)
 233                         break;
 234                 i++;
 235         }
 236
 237         /* Subdisk not found. */
 238         if (s == NULL || s->drive_sc == NULL)
 239                 goto bad;
 240
 241         /* Now check if we can handle the request on this subdisk. */
 242         switch (s->state) {
 243         case GV_SD_UP:
 244                 /* If the subdisk is up, just continue. */
 245                 break;
 246         case GV_SD_DOWN:
 247                 if (bp->bio_pflags & GV_BIO_INTERNAL)
 248                         G_VINUM_DEBUG(0, "subdisk must be in the stale state in"
 249                             " order to perform administrative requests");
 250                 goto bad;
 251         case GV_SD_STALE:
 252                 if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) {
 253                         G_VINUM_DEBUG(0, "subdisk stale, unable to perform "
 254                             "regular requests");
 255                         goto bad;
 256                 }
 257
 258                 G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
 259                 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
 260                 break;
 261         case GV_SD_INITIALIZING:
 262                 if (bp->bio_cmd == BIO_READ)
 263                         goto bad;
 264                 break;
 265         default:
 266                 /* All other subdisk states mean it's not accessible. */
 267                 goto bad;
 268         }
 269
 270         /* Clone the bio and adjust the offsets and sizes. */
 271         cbp = g_clone_bio(bp);
 272         if (cbp == NULL) {
 273                 err = ENOMEM;
 274                 goto bad;
 275         }
 276         cbp->bio_offset = real_off + s->drive_offset;
 277         cbp->bio_length = real_len;
 278         cbp->bio_data = addr;
 279         cbp->bio_done = gv_done;
 280         cbp->bio_caller1 = s;
 281         s->drive_sc->active++;
 282
 283         /* Store the sub-requests now and let others issue them. */
 284         bioq_insert_tail(p->bqueue, cbp);
 285         return (real_len);
 286 bad:
 287         G_VINUM_LOGREQ(0, bp, "plex request failed.");
 288         /* Building the sub-request failed. If internal BIO, do not deliver. */
 289         if (bp->bio_pflags & GV_BIO_INTERNAL) {
 290                 if (bp->bio_pflags & GV_BIO_MALLOC)
 291                         g_free(bp->bio_data);
 292                 g_destroy_bio(bp);
 293                 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
 294                     GV_PLEX_GROWING);
 295                 return (-1);
 296         }
 297         g_io_deliver(bp, err);
 298         return (-1);
 299 }
 300
 301 /*
 302  * Handle a completed request to a striped or concatenated plex.
 303  */
 304 void
 305 gv_plex_normal_done(struct gv_plex *p, struct bio *bp)
 306 {
 307         struct bio *pbp;
 308
 309         pbp = bp->bio_parent;
 310         if (pbp->bio_error == 0)
 311                 pbp->bio_error = bp->bio_error;
 312         g_destroy_bio(bp);
 313         pbp->bio_inbed++;
 314         if (pbp->bio_children == pbp->bio_inbed) {
 315                 /* Just set it to length since multiple plexes will
 316                  * screw things up. */
 317                 pbp->bio_completed = pbp->bio_length;
 318                 if (pbp->bio_pflags & GV_BIO_SYNCREQ)
 319                         gv_sync_complete(p, pbp);
 320                 else if (pbp->bio_pflags & GV_BIO_GROW)
 321                         gv_grow_complete(p, pbp);
 322                 else
 323                         g_io_deliver(pbp, pbp->bio_error);
 324         }
 325 }
 326
 327 /*
 328  * Handle a completed request to a RAID-5 plex.
 329  */
 330 void
 331 gv_plex_raid5_done(struct gv_plex *p, struct bio *bp)
 332 {
 333         struct gv_softc *sc;
 334         struct bio *cbp, *pbp;
 335         struct gv_bioq *bq, *bq2;
 336         struct gv_raid5_packet *wp;
 337         off_t completed;
 338         int i;
 339
 340         completed = 0;
 341         sc = p->vinumconf;
 342         wp = bp->bio_caller2;
 343
 344         switch (bp->bio_parent->bio_cmd) {
 345         case BIO_READ:
 346                 if (wp == NULL) {
 347                         completed = bp->bio_completed;
 348                         break;
 349                 }
 350
 351                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
 352                         if (bq->bp != bp)
 353                                 continue;
 354                         TAILQ_REMOVE(&wp->bits, bq, queue);
 355                         g_free(bq);
 356                         for (i = 0; i < wp->length; i++)
 357                                 wp->data[i] ^= bp->bio_data[i];
 358                         break;
 359                 }
 360                 if (TAILQ_EMPTY(&wp->bits)) {
 361                         completed = wp->length;
 362                         if (wp->lockbase != -1) {
 363                                 TAILQ_REMOVE(&p->packets, wp, list);
 364                                 /* Bring the waiting bios back into the game. */
 365                                 pbp = bioq_takefirst(p->wqueue);
 366                                 while (pbp != NULL) {
 367                                         gv_post_bio(sc, pbp);
 368                                         pbp = bioq_takefirst(p->wqueue);
 369                                 }
 370                         }
 371                         g_free(wp);
 372                 }
 373
 374                 break;
 375
 376         case BIO_WRITE:
 377                 /* XXX can this ever happen? */
 378                 if (wp == NULL) {
 379                         completed = bp->bio_completed;
 380                         break;
 381                 }
 382
 383                 /* Check if we need to handle parity data. */
 384                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
 385                         if (bq->bp != bp)
 386                                 continue;
 387                         TAILQ_REMOVE(&wp->bits, bq, queue);
 388                         g_free(bq);
 389                         cbp = wp->parity;
 390                         if (cbp != NULL) {
 391                                 for (i = 0; i < wp->length; i++)
 392                                         cbp->bio_data[i] ^= bp->bio_data[i];
 393                         }
 394                         break;
 395                 }
 396
 397                 /* Handle parity data. */
 398                 if (TAILQ_EMPTY(&wp->bits)) {
 399                         if (bp->bio_parent->bio_pflags & GV_BIO_CHECK)
 400                                 i = gv_check_parity(p, bp, wp);
 401                         else
 402                                 i = gv_normal_parity(p, bp, wp);
 403
 404                         /* All of our sub-requests have finished. */
 405                         if (i) {
 406                                 completed = wp->length;
 407                                 TAILQ_REMOVE(&p->packets, wp, list);
 408                                 /* Bring the waiting bios back into the game. */
 409                                 pbp = bioq_takefirst(p->wqueue);
 410                                 while (pbp != NULL) {
 411                                         gv_post_bio(sc, pbp);
 412                                         pbp = bioq_takefirst(p->wqueue);
 413                                 }
 414                                 g_free(wp);
 415                         }
 416                 }
 417
 418                 break;
 419         }
 420
 421         pbp = bp->bio_parent;
 422         if (pbp->bio_error == 0)
 423                 pbp->bio_error = bp->bio_error;
 424         pbp->bio_completed += completed;
 425
 426         /* When the original request is finished, we deliver it. */
 427         pbp->bio_inbed++;
 428         if (pbp->bio_inbed == pbp->bio_children) {
 429                 /* Hand it over for checking or delivery. */
 430                 if (pbp->bio_cmd == BIO_WRITE &&
 431                     (pbp->bio_pflags & GV_BIO_CHECK)) {
 432                         gv_parity_complete(p, pbp);
 433                 } else if (pbp->bio_cmd == BIO_WRITE &&
 434                     (pbp->bio_pflags & GV_BIO_REBUILD)) {
 435                         gv_rebuild_complete(p, pbp);
 436                 } else if (pbp->bio_pflags & GV_BIO_INIT) {
 437                         gv_init_complete(p, pbp);
 438                 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) {
 439                         gv_sync_complete(p, pbp);
 440                 } else if (pbp->bio_pflags & GV_BIO_GROW) {
 441                         gv_grow_complete(p, pbp);
 442                 } else {
 443                         g_io_deliver(pbp, pbp->bio_error);
 444                 }
 445         }
 446
 447         /* Clean up what we allocated. */
 448         if (bp->bio_cflags & GV_BIO_MALLOC)
 449                 g_free(bp->bio_data);
 450         g_destroy_bio(bp);
 451 }
 452
 453 static int
 454 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
 455 {
 456         struct bio *pbp;
 457         struct gv_sd *s;
 458         int err, finished, i;
 459
 460         err = 0;
 461         finished = 1;
 462
 463         if (wp->waiting != NULL) {
 464                 pbp = wp->waiting;
 465                 wp->waiting = NULL;
 466                 s = pbp->bio_caller1;
 467                 g_io_request(pbp, s->drive_sc->consumer);
 468                 finished = 0;
 469
 470         } else if (wp->parity != NULL) {
 471                 pbp = wp->parity;
 472                 wp->parity = NULL;
 473
 474                 /* Check if the parity is correct. */
 475                 for (i = 0; i < wp->length; i++) {
 476                         if (bp->bio_data[i] != pbp->bio_data[i]) {
 477                                 err = 1;
 478                                 break;
 479                         }
 480                 }
 481
 482                 /* The parity is not correct... */
 483                 if (err) {
 484                         bp->bio_parent->bio_error = EAGAIN;
 485
 486                         /* ... but we rebuild it. */
 487                         if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) {
 488                                 s = pbp->bio_caller1;
 489                                 g_io_request(pbp, s->drive_sc->consumer);
 490                                 finished = 0;
 491                         }
 492                 }
 493
 494                 /*
 495                  * Clean up the BIO we would have used for rebuilding the
 496                  * parity.
 497                  */
 498                 if (finished) {
 499                         bp->bio_parent->bio_inbed++;
 500                         g_destroy_bio(pbp);
 501                 }
 502         }
 503
 504         return (finished);
 505 }
 506
 507 static int
 508 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
 509 {
 510         struct bio *cbp, *pbp;
 511         struct gv_sd *s;
 512         int finished, i;
 513
 514         finished = 1;
 515
 516         if (wp->waiting != NULL) {
 517                 pbp = wp->waiting;
 518                 wp->waiting = NULL;
 519                 cbp = wp->parity;
 520                 for (i = 0; i < wp->length; i++)
 521                         cbp->bio_data[i] ^= pbp->bio_data[i];
 522                 s = pbp->bio_caller1;
 523                 g_io_request(pbp, s->drive_sc->consumer);
 524                 finished = 0;
 525
 526         } else if (wp->parity != NULL) {
 527                 cbp = wp->parity;
 528                 wp->parity = NULL;
 529                 s = cbp->bio_caller1;
 530                 g_io_request(cbp, s->drive_sc->consumer);
 531                 finished = 0;
 532         }
 533
 534         return (finished);
 535 }
 536
 537 /* Flush the queue with delayed requests. */
 538 static void
 539 gv_plex_flush(struct gv_plex *p)
 540 {
 541         struct gv_softc *sc;
 542         struct bio *bp;
 543
 544         sc = p->vinumconf;
 545         bp = bioq_takefirst(p->rqueue);
 546         while (bp != NULL) {
 547                 gv_plex_start(p, bp);
 548                 bp = bioq_takefirst(p->rqueue);
 549         }
 550 }
 551
 552 static void
 553 gv_post_bio(struct gv_softc *sc, struct bio *bp)
 554 {
 555
 556         KASSERT(sc != NULL, ("NULL sc"));
 557         KASSERT(bp != NULL, ("NULL bp"));
 558         mtx_lock(&sc->bqueue_mtx);
 559         bioq_disksort(sc->bqueue_down, bp);
 560         wakeup(sc);
 561         mtx_unlock(&sc->bqueue_mtx);
 562 }
 563
 564 int
 565 gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset,
 566     off_t length, int type, caddr_t data)
 567 {
 568         struct gv_softc *sc;
 569         struct bio *bp;
 570
 571         KASSERT(from != NULL, ("NULL from"));
 572         KASSERT(to != NULL, ("NULL to"));
 573         sc = from->vinumconf;
 574         KASSERT(sc != NULL, ("NULL sc"));
 575
 576         bp = g_new_bio();
 577         if (bp == NULL) {
 578                 G_VINUM_DEBUG(0, "sync from '%s' failed at offset "
 579                     " %jd; out of memory", from->name, offset);
 580                 return (ENOMEM);
 581         }
 582         bp->bio_length = length;
 583         bp->bio_done = NULL;
 584         bp->bio_pflags |= GV_BIO_SYNCREQ;
 585         bp->bio_offset = offset;
 586         bp->bio_caller1 = from;
 587         bp->bio_caller2 = to;
 588         bp->bio_cmd = type;
 589         if (data == NULL)
 590                 data = g_malloc(length, M_WAITOK);
 591         bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */
 592         bp->bio_data = data;
 593
 594         /* Send down next. */
 595         gv_post_bio(sc, bp);
 596         //gv_plex_start(from, bp);
 597         return (0);
 598 }
 599
 600 /*
 601  * Handle a finished plex sync bio.
 602  */
 603 int
 604 gv_sync_complete(struct gv_plex *to, struct bio *bp)
 605 {
 606         struct gv_plex *from, *p;
 607         struct gv_sd *s;
 608         struct gv_volume *v;
 609         struct gv_softc *sc;
 610         off_t offset;
 611         int err;
 612
 613         g_topology_assert_not();
 614
 615         err = 0;
 616         KASSERT(to != NULL, ("NULL to"));
 617         KASSERT(bp != NULL, ("NULL bp"));
 618         from = bp->bio_caller2;
 619         KASSERT(from != NULL, ("NULL from"));
 620         v = to->vol_sc;
 621         KASSERT(v != NULL, ("NULL v"));
 622         sc = v->vinumconf;
 623         KASSERT(sc != NULL, ("NULL sc"));
 624
 625         /* If it was a read, write it. */
 626         if (bp->bio_cmd == BIO_READ) {
 627                 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length,
 628                     BIO_WRITE, bp->bio_data);
 629         /* If it was a write, read the next one. */
 630         } else if (bp->bio_cmd == BIO_WRITE) {
 631                 if (bp->bio_pflags & GV_BIO_MALLOC)
 632                         g_free(bp->bio_data);
 633                 to->synced += bp->bio_length;
 634                 /* If we're finished, clean up. */
 635                 if (bp->bio_offset + bp->bio_length >= from->size) {
 636                         G_VINUM_DEBUG(1, "syncing of %s from %s completed",
 637                             to->name, from->name);
 638                         /* Update our state. */
 639                         LIST_FOREACH(s, &to->subdisks, in_plex)
 640                                 gv_set_sd_state(s, GV_SD_UP, 0);
 641                         gv_update_plex_state(to);
 642                         to->flags &= ~GV_PLEX_SYNCING;
 643                         to->synced = 0;
 644                         gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 645                 } else {
 646                         offset = bp->bio_offset + bp->bio_length;
 647                         err = gv_sync_request(from, to, offset,
 648                             MIN(bp->bio_length, from->size - offset),
 649                             BIO_READ, NULL);
 650                 }
 651         }
 652         g_destroy_bio(bp);
 653         /* Clean up if there was an error. */
 654         if (err) {
 655                 to->flags &= ~GV_PLEX_SYNCING;
 656                 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err);
 657         }
 658
 659         /* Check if all plexes are synced, and lower refcounts. */
 660         g_topology_lock();
 661         LIST_FOREACH(p, &v->plexes, in_volume) {
 662                 if (p->flags & GV_PLEX_SYNCING) {
 663                         g_topology_unlock();
 664                         return (-1);
 665                 }
 666         }
 667         /* If we came here, all plexes are synced, and we're free. */
 668         gv_access(v->provider, -1, -1, 0);
 669         g_topology_unlock();
 670         G_VINUM_DEBUG(1, "plex sync completed");
 671         gv_volume_flush(v);
 672         return (0);
 673 }
 674
 675 /*
 676  * Create a new bio struct for the next grow request.
 677  */
 678 int
 679 gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type,
 680     caddr_t data)
 681 {
 682         struct gv_softc *sc;
 683         struct bio *bp;
 684
 685         KASSERT(p != NULL, ("gv_grow_request: NULL p"));
 686         sc = p->vinumconf;
 687         KASSERT(sc != NULL, ("gv_grow_request: NULL sc"));
 688
 689         bp = g_new_bio();
 690         if (bp == NULL) {
 691                 G_VINUM_DEBUG(0, "grow of %s failed creating bio: "
 692                     "out of memory", p->name);
 693                 return (ENOMEM);
 694         }
 695
 696         bp->bio_cmd = type;
 697         bp->bio_done = NULL;
 698         bp->bio_error = 0;
 699         bp->bio_caller1 = p;
 700         bp->bio_offset = offset;
 701         bp->bio_length = length;
 702         bp->bio_pflags |= GV_BIO_GROW;
 703         if (data == NULL)
 704                 data = g_malloc(length, M_WAITOK);
 705         bp->bio_pflags |= GV_BIO_MALLOC;
 706         bp->bio_data = data;
 707
 708         gv_post_bio(sc, bp);
 709         //gv_plex_start(p, bp);
 710         return (0);
 711 }
 712
 713 /*
 714  * Finish handling of a bio to a growing plex.
 715  */
 716 void
 717 gv_grow_complete(struct gv_plex *p, struct bio *bp)
 718 {
 719         struct gv_softc *sc;
 720         struct gv_sd *s;
 721         struct gv_volume *v;
 722         off_t origsize, offset;
 723         int sdcount, err;
 724
 725         v = p->vol_sc;
 726         KASSERT(v != NULL, ("gv_grow_complete: NULL v"));
 727         sc = v->vinumconf;
 728         KASSERT(sc != NULL, ("gv_grow_complete: NULL sc"));
 729         err = 0;
 730
 731         /* If it was a read, write it. */
 732         if (bp->bio_cmd == BIO_READ) {
 733                 p->synced += bp->bio_length;
 734                 err = gv_grow_request(p, bp->bio_offset, bp->bio_length,
 735                     BIO_WRITE, bp->bio_data);
 736         /* If it was a write, read next. */
 737         } else if (bp->bio_cmd == BIO_WRITE) {
 738                 if (bp->bio_pflags & GV_BIO_MALLOC)
 739                         g_free(bp->bio_data);
 740
 741                 /* Find the real size of the plex. */
 742                 sdcount = gv_sdcount(p, 1);
 743                 s = LIST_FIRST(&p->subdisks);
 744                 KASSERT(s != NULL, ("NULL s"));
 745                 origsize = (s->size * (sdcount - 1));
 746                 if (bp->bio_offset + bp->bio_length >= origsize) {
 747                         G_VINUM_DEBUG(1, "growing of %s completed", p->name);
 748                         p->flags &= ~GV_PLEX_GROWING;
 749                         LIST_FOREACH(s, &p->subdisks, in_plex) {
 750                                 s->flags &= ~GV_SD_GROW;
 751                                 gv_set_sd_state(s, GV_SD_UP, 0);
 752                         }
 753                         p->size = gv_plex_size(p);
 754                         gv_update_vol_size(v, gv_vol_size(v));
 755                         gv_set_plex_state(p, GV_PLEX_UP, 0);
 756                         g_topology_lock();
 757                         gv_access(v->provider, -1, -1, 0);
 758                         g_topology_unlock();
 759                         p->synced = 0;
 760                         gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 761                         /* Issue delayed requests. */
 762                         gv_plex_flush(p);
 763                 } else {
 764                         offset = bp->bio_offset + bp->bio_length;
 765                         err = gv_grow_request(p, offset,
 766                            MIN(bp->bio_length, origsize - offset),
 767                            BIO_READ, NULL);
 768                 }
 769         }
 770         g_destroy_bio(bp);
 771
 772         if (err) {
 773                 p->flags &= ~GV_PLEX_GROWING;
 774                 G_VINUM_DEBUG(0, "error growing plex: error code %d", err);
 775         }
 776 }
 777
 778 /*
 779  * Create an initialization BIO and send it off to the consumer. Assume that
 780  * we're given initialization data as parameter.
 781  */
 782 void
 783 gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length)
 784 {
 785         struct gv_drive *d;
 786         struct g_consumer *cp;
 787         struct bio *bp, *cbp;
 788
 789         KASSERT(s != NULL, ("gv_init_request: NULL s"));
 790         d = s->drive_sc;
 791         KASSERT(d != NULL, ("gv_init_request: NULL d"));
 792         cp = d->consumer;
 793         KASSERT(cp != NULL, ("gv_init_request: NULL cp"));
 794
 795         bp = g_new_bio();
 796         if (bp == NULL) {
 797                 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
 798                     " (drive offset %jd); out of memory", s->name,
 799                     (intmax_t)s->initialized, (intmax_t)start);
 800                 return; /* XXX: Error codes. */
 801         }
 802         bp->bio_cmd = BIO_WRITE;
 803         bp->bio_data = data;
 804         bp->bio_done = NULL;
 805         bp->bio_error = 0;
 806         bp->bio_length = length;
 807         bp->bio_pflags |= GV_BIO_INIT;
 808         bp->bio_offset = start;
 809         bp->bio_caller1 = s;
 810
 811         /* Then ofcourse, we have to clone it. */
 812         cbp = g_clone_bio(bp);
 813         if (cbp == NULL) {
 814                 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
 815                     " (drive offset %jd); out of memory", s->name,
 816                     (intmax_t)s->initialized, (intmax_t)start);
 817                 return; /* XXX: Error codes. */
 818         }
 819         cbp->bio_done = gv_done;
 820         cbp->bio_caller1 = s;
 821         d->active++;
 822         /* Send it off to the consumer. */
 823         g_io_request(cbp, cp);
 824 }
 825
 826 /*
 827  * Handle a finished initialization BIO.
 828  */
 829 void
 830 gv_init_complete(struct gv_plex *p, struct bio *bp)
 831 {
 832         struct gv_softc *sc;
 833         struct gv_drive *d;
 834         struct g_consumer *cp;
 835         struct gv_sd *s;
 836         off_t start, length;
 837         caddr_t data;
 838         int error;
 839
 840         s = bp->bio_caller1;
 841         start = bp->bio_offset;
 842         length = bp->bio_length;
 843         error = bp->bio_error;
 844         data = bp->bio_data;
 845
 846         KASSERT(s != NULL, ("gv_init_complete: NULL s"));
 847         d = s->drive_sc;
 848         KASSERT(d != NULL, ("gv_init_complete: NULL d"));
 849         cp = d->consumer;
 850         KASSERT(cp != NULL, ("gv_init_complete: NULL cp"));
 851         sc = p->vinumconf;
 852         KASSERT(sc != NULL, ("gv_init_complete: NULL sc"));
 853
 854         g_destroy_bio(bp);
 855
 856         /*
 857          * First we need to find out if it was okay, and abort if it's not.
 858          * Then we need to free previous buffers, find out the correct subdisk,
 859          * as well as getting the correct starting point and length of the BIO.
 860          */
 861         if (start >= s->drive_offset + s->size) {
 862                 /* Free the data we initialized. */
 863                 if (data != NULL)
 864                         g_free(data);
 865                 g_topology_assert_not();
 866                 g_topology_lock();
 867                 g_access(cp, 0, -1, 0);
 868                 g_topology_unlock();
 869                 if (error) {
 870                         gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE |
 871                             GV_SETSTATE_CONFIG);
 872                 } else {
 873                         gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
 874                         s->initialized = 0;
 875                         gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 876                         G_VINUM_DEBUG(1, "subdisk '%s' init: finished "
 877                             "successfully", s->name);
 878                 }
 879                 return;
 880         }
 881         s->initialized += length;
 882         start += length;
 883         gv_init_request(s, start, data, length);
 884 }
 885
 886 /*
 887  * Create a new bio struct for the next parity rebuild. Used both by internal
 888  * rebuild of degraded plexes as well as user initiated rebuilds/checks.
 889  */
 890 void
 891 gv_parity_request(struct gv_plex *p, int flags, off_t offset)
 892 {
 893         struct gv_softc *sc;
 894         struct bio *bp;
 895
 896         KASSERT(p != NULL, ("gv_parity_request: NULL p"));
 897         sc = p->vinumconf;
 898         KASSERT(sc != NULL, ("gv_parity_request: NULL sc"));
 899
 900         bp = g_new_bio();
 901         if (bp == NULL) {
 902                 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: "
 903                     "out of memory", p->name);
 904                 return;
 905         }
 906
 907         bp->bio_cmd = BIO_WRITE;
 908         bp->bio_done = NULL;
 909         bp->bio_error = 0;
 910         bp->bio_length = p->stripesize;
 911         bp->bio_caller1 = p;
 912
 913         /*
 914          * Check if it's a rebuild of a degraded plex or a user request of
 915          * parity rebuild.
 916          */
 917         if (flags & GV_BIO_REBUILD)
 918                 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK);
 919         else if (flags & GV_BIO_CHECK)
 920                 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
 921         else {
 922                 G_VINUM_DEBUG(0, "invalid flags given in rebuild");
 923                 return;
 924         }
 925
 926         bp->bio_pflags = flags;
 927         bp->bio_pflags |= GV_BIO_MALLOC;
 928
 929         /* We still have more parity to build. */
 930         bp->bio_offset = offset;
 931         gv_post_bio(sc, bp);
 932         //gv_plex_start(p, bp); /* Send it down to the plex. */
 933 }
 934
 935 /*
 936  * Handle a finished parity write.
 937  */
 938 void
 939 gv_parity_complete(struct gv_plex *p, struct bio *bp)
 940 {
 941         struct gv_softc *sc;
 942         int error, flags;
 943
 944         error = bp->bio_error;
 945         flags = bp->bio_pflags;
 946         flags &= ~GV_BIO_MALLOC;
 947
 948         sc = p->vinumconf;
 949         KASSERT(sc != NULL, ("gv_parity_complete: NULL sc"));
 950
 951         /* Clean up what we allocated. */
 952         if (bp->bio_pflags & GV_BIO_MALLOC)
 953                 g_free(bp->bio_data);
 954         g_destroy_bio(bp);
 955
 956         if (error == EAGAIN) {
 957                 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx",
 958                     (intmax_t)p->synced);
 959         }
 960
 961         /* Any error is fatal, except EAGAIN when we're rebuilding. */
 962         if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) {
 963                 /* Make sure we don't have the lock. */
 964                 g_topology_assert_not();
 965                 g_topology_lock();
 966                 gv_access(p->vol_sc->provider, -1, -1, 0);
 967                 g_topology_unlock();
 968                 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx "
 969                     "errno %d", p->name, (intmax_t)p->synced, error);
 970                 return;
 971         } else {
 972                 p->synced += p->stripesize;
 973         }
 974
 975         if (p->synced >= p->size) {
 976                 /* Make sure we don't have the lock. */
 977                 g_topology_assert_not();
 978                 g_topology_lock();
 979                 gv_access(p->vol_sc->provider, -1, -1, 0);
 980                 g_topology_unlock();
 981                 /* We're finished. */
 982                 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name);
 983                 p->synced = 0;
 984                 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 985                 return;
 986         }
 987
 988         /* Send down next. It will determine if we need to itself. */
 989         gv_parity_request(p, flags, p->synced);
 990 }
 991
 992 /*
 993  * Handle a finished plex rebuild bio.
 994  */
 995 void
 996 gv_rebuild_complete(struct gv_plex *p, struct bio *bp)
 997 {
 998         struct gv_softc *sc;
 999         struct gv_sd *s;
1000         int error, flags;
1001         off_t offset;
1002
1003         error = bp->bio_error;
1004         flags = bp->bio_pflags;
1005         offset = bp->bio_offset;
1006         flags &= ~GV_BIO_MALLOC;
1007         sc = p->vinumconf;
1008         KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc"));
1009
1010         /* Clean up what we allocated. */
1011         if (bp->bio_pflags & GV_BIO_MALLOC)
1012                 g_free(bp->bio_data);
1013         g_destroy_bio(bp);
1014
1015         if (error) {
1016                 g_topology_assert_not();
1017                 g_topology_lock();
1018                 gv_access(p->vol_sc->provider, -1, -1, 0);
1019                 g_topology_unlock();
1020
1021                 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d",
1022                     p->name, (intmax_t)offset, error);
1023                 p->flags &= ~GV_PLEX_REBUILDING;
1024                 p->synced = 0;
1025                 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
1026                 return;
1027         }
1028
1029         offset += (p->stripesize * (gv_sdcount(p, 1) - 1));
1030         if (offset >= p->size) {
1031                 /* We're finished. */
1032                 g_topology_assert_not();
1033                 g_topology_lock();
1034                 gv_access(p->vol_sc->provider, -1, -1, 0);
1035                 g_topology_unlock();
1036
1037                 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name);
1038                 gv_save_config(p->vinumconf);
1039                 p->flags &= ~GV_PLEX_REBUILDING;
1040                 p->synced = 0;
1041                 /* Try to up all subdisks. */
1042                 LIST_FOREACH(s, &p->subdisks, in_plex)
1043                         gv_update_sd_state(s);
1044                 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
1045                 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
1046                 return;
1047         }
1048
1049         /* Send down next. It will determine if we need to itself. */
1050         gv_parity_request(p, flags, offset);
1051 }