sys/geom/vinum/geom_vinum_raid5.c

   1 /*-
   2  * Copyright (c) 2004, 2007 Lukas Ertl
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  */
  26
  27 #include <sys/cdefs.h>
  28 __FBSDID("$FreeBSD$");
  29
  30 #include <sys/param.h>
  31 #include <sys/bio.h>
  32 #include <sys/lock.h>
  33 #include <sys/malloc.h>
  34 #include <sys/systm.h>
  35
  36 #include <geom/geom.h>
  37 #include <geom/vinum/geom_vinum_var.h>
  38 #include <geom/vinum/geom_vinum_raid5.h>
  39 #include <geom/vinum/geom_vinum.h>
  40
  41 static int              gv_raid5_offset(struct gv_plex *, off_t, off_t,
  42                             off_t *, off_t *, int *, int *, int);
  43 static struct bio *     gv_raid5_clone_bio(struct bio *, struct gv_sd *,
  44                             struct gv_raid5_packet *, caddr_t, int);
  45 static int      gv_raid5_request(struct gv_plex *, struct gv_raid5_packet *,
  46                     struct bio *, caddr_t, off_t, off_t, int *);
  47 static int      gv_raid5_check(struct gv_plex *, struct gv_raid5_packet *,
  48                     struct bio *, caddr_t, off_t, off_t);
  49 static int      gv_raid5_rebuild(struct gv_plex *, struct gv_raid5_packet *,
  50                     struct bio *, caddr_t, off_t, off_t);
  51
  52 struct gv_raid5_packet *
  53 gv_raid5_start(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff,
  54     off_t bcount)
  55 {
  56         struct bio *cbp;
  57         struct gv_raid5_packet *wp, *wp2;
  58         struct gv_bioq *bq, *bq2;
  59         int err, delay;
  60
  61         delay = 0;
  62         wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
  63         wp->bio = bp;
  64         wp->waiting = NULL;
  65         wp->parity = NULL;
  66         TAILQ_INIT(&wp->bits);
  67
  68         if (bp->bio_pflags & GV_BIO_REBUILD)
  69                 err = gv_raid5_rebuild(p, wp, bp, addr, boff, bcount);
  70         else if (bp->bio_pflags & GV_BIO_CHECK)
  71                 err = gv_raid5_check(p, wp, bp, addr, boff, bcount);
  72         else
  73                 err = gv_raid5_request(p, wp, bp, addr, boff, bcount, &delay);
  74
  75         /* Means we have a delayed request. */
  76         if (delay) {
  77                 g_free(wp);
  78                 return (NULL);
  79         }
  80
  81         /*
  82          * Building the sub-request failed, we probably need to clean up a lot.
  83          */
  84         if (err) {
  85                 G_VINUM_LOGREQ(0, bp, "raid5 plex request failed.");
  86                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
  87                         TAILQ_REMOVE(&wp->bits, bq, queue);
  88                         g_free(bq);
  89                 }
  90                 if (wp->waiting != NULL) {
  91                         if (wp->waiting->bio_cflags & GV_BIO_MALLOC)
  92                                 g_free(wp->waiting->bio_data);
  93                         g_destroy_bio(wp->waiting);
  94                 }
  95                 if (wp->parity != NULL) {
  96                         if (wp->parity->bio_cflags & GV_BIO_MALLOC)
  97                                 g_free(wp->parity->bio_data);
  98                         g_destroy_bio(wp->parity);
  99                 }
 100                 g_free(wp);
 101
 102                 TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
 103                         if (wp->bio != bp)
 104                                 continue;
 105
 106                         TAILQ_REMOVE(&p->packets, wp, list);
 107                         TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
 108                                 TAILQ_REMOVE(&wp->bits, bq, queue);
 109                                 g_free(bq);
 110                         }
 111                         g_free(wp);
 112                 }
 113
 114                 cbp = bioq_takefirst(p->bqueue);
 115                 while (cbp != NULL) {
 116                         if (cbp->bio_cflags & GV_BIO_MALLOC)
 117                                 g_free(cbp->bio_data);
 118                         g_destroy_bio(cbp);
 119                         cbp = bioq_takefirst(p->bqueue);
 120                 }
 121
 122                 /* If internal, stop and reset state. */
 123                 if (bp->bio_pflags & GV_BIO_INTERNAL) {
 124                         if (bp->bio_pflags & GV_BIO_MALLOC)
 125                                 g_free(bp->bio_data);
 126                         g_destroy_bio(bp);
 127                         /* Reset flags. */
 128                         p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
 129                             GV_PLEX_GROWING);
 130                         return (NULL);
 131                 }
 132                 g_io_deliver(bp, err);
 133                 return (NULL);
 134         }
 135
 136         return (wp);
 137 }
 138
 139 /*
 140  * Check if the stripe that the work packet wants is already being used by
 141  * some other work packet.
 142  */
 143 int
 144 gv_stripe_active(struct gv_plex *p, struct bio *bp)
 145 {
 146         struct gv_raid5_packet *wp, *owp;
 147         int overlap;
 148
 149         wp = bp->bio_caller2;
 150         if (wp->lockbase == -1)
 151                 return (0);
 152
 153         overlap = 0;
 154         TAILQ_FOREACH(owp, &p->packets, list) {
 155                 if (owp == wp)
 156                         break;
 157                 if ((wp->lockbase >= owp->lockbase) &&
 158                     (wp->lockbase <= owp->lockbase + owp->length)) {
 159                         overlap++;
 160                         break;
 161                 }
 162                 if ((wp->lockbase <= owp->lockbase) &&
 163                     (wp->lockbase + wp->length >= owp->lockbase)) {
 164                         overlap++;
 165                         break;
 166                 }
 167         }
 168
 169         return (overlap);
 170 }
 171
 172 static int
 173 gv_raid5_check(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
 174     caddr_t addr, off_t boff, off_t bcount)
 175 {
 176         struct gv_sd *parity, *s;
 177         struct gv_bioq *bq;
 178         struct bio *cbp;
 179         int i, psdno;
 180         off_t real_len, real_off;
 181
 182         if (p == NULL || LIST_EMPTY(&p->subdisks))
 183                 return (ENXIO);
 184
 185         gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, &psdno, 1);
 186
 187         /* Find the right subdisk. */
 188         parity = NULL;
 189         i = 0;
 190         LIST_FOREACH(s, &p->subdisks, in_plex) {
 191                 if (i == psdno) {
 192                         parity = s;
 193                         break;
 194                 }
 195                 i++;
 196         }
 197
 198         /* Parity stripe not found. */
 199         if (parity == NULL)
 200                 return (ENXIO);
 201
 202         if (parity->state != GV_SD_UP)
 203                 return (ENXIO);
 204
 205         wp->length = real_len;
 206         wp->data = addr;
 207         wp->lockbase = real_off;
 208
 209         /* Read all subdisks. */
 210         LIST_FOREACH(s, &p->subdisks, in_plex) {
 211                 /* Skip the parity subdisk. */
 212                 if (s == parity)
 213                         continue;
 214                 /* Skip growing subdisks. */
 215                 if (s->flags & GV_SD_GROW)
 216                         continue;
 217
 218                 cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
 219                 if (cbp == NULL)
 220                         return (ENOMEM);
 221                 cbp->bio_cmd = BIO_READ;
 222
 223                 bioq_insert_tail(p->bqueue, cbp);
 224
 225                 bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 226                 bq->bp = cbp;
 227                 TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 228         }
 229
 230         /* Read the parity data. */
 231         cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
 232         if (cbp == NULL)
 233                 return (ENOMEM);
 234         cbp->bio_cmd = BIO_READ;
 235         wp->waiting = cbp;
 236
 237         /*
 238          * In case we want to rebuild the parity, create an extra BIO to write
 239          * it out.  It also acts as buffer for the XOR operations.
 240          */
 241         cbp = gv_raid5_clone_bio(bp, parity, wp, addr, 1);
 242         if (cbp == NULL)
 243                 return (ENOMEM);
 244         wp->parity = cbp;
 245
 246         return (0);
 247 }
 248
 249 /* Rebuild a degraded RAID5 plex. */
 250 static int
 251 gv_raid5_rebuild(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
 252     caddr_t addr, off_t boff, off_t bcount)
 253 {
 254         struct gv_sd *broken, *s;
 255         struct gv_bioq *bq;
 256         struct bio *cbp;
 257         off_t real_len, real_off;
 258
 259         if (p == NULL || LIST_EMPTY(&p->subdisks))
 260                 return (ENXIO);
 261
 262         gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, NULL, 1);
 263
 264         /* Find the right subdisk. */
 265         broken = NULL;
 266         LIST_FOREACH(s, &p->subdisks, in_plex) {
 267                 if (s->state != GV_SD_UP)
 268                         broken = s;
 269         }
 270
 271         /* Broken stripe not found. */
 272         if (broken == NULL)
 273                 return (ENXIO);
 274
 275         switch (broken->state) {
 276         case GV_SD_UP:
 277                 return (EINVAL);
 278
 279         case GV_SD_STALE:
 280                 if (!(bp->bio_pflags & GV_BIO_REBUILD))
 281                         return (ENXIO);
 282
 283                 G_VINUM_DEBUG(1, "sd %s is reviving", broken->name);
 284                 gv_set_sd_state(broken, GV_SD_REVIVING, GV_SETSTATE_FORCE);
 285                 /* Set this bit now, but should be set at end. */
 286                 broken->flags |= GV_SD_CANGOUP;
 287                 break;
 288
 289         case GV_SD_REVIVING:
 290                 break;
 291
 292         default:
 293                 /* All other subdisk states mean it's not accessible. */
 294                 return (ENXIO);
 295         }
 296
 297         wp->length = real_len;
 298         wp->data = addr;
 299         wp->lockbase = real_off;
 300
 301         KASSERT(wp->length >= 0, ("gv_rebuild_raid5: wp->length < 0"));
 302
 303         /* Read all subdisks. */
 304         LIST_FOREACH(s, &p->subdisks, in_plex) {
 305                 /* Skip the broken subdisk. */
 306                 if (s == broken)
 307                         continue;
 308
 309                 /* Skip growing subdisks. */
 310                 if (s->flags & GV_SD_GROW)
 311                         continue;
 312
 313                 cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
 314                 if (cbp == NULL)
 315                         return (ENOMEM);
 316                 cbp->bio_cmd = BIO_READ;
 317
 318                 bioq_insert_tail(p->bqueue, cbp);
 319
 320                 bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 321                 bq->bp = cbp;
 322                 TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 323         }
 324
 325         /* Write the parity data. */
 326         cbp = gv_raid5_clone_bio(bp, broken, wp, NULL, 1);
 327         if (cbp == NULL)
 328                 return (ENOMEM);
 329         wp->parity = cbp;
 330
 331         p->synced = boff;
 332
 333         /* Post notification that we're finished. */
 334         return (0);
 335 }
 336
 337 /* Build a request group to perform (part of) a RAID5 request. */
 338 static int
 339 gv_raid5_request(struct gv_plex *p, struct gv_raid5_packet *wp,
 340     struct bio *bp, caddr_t addr, off_t boff, off_t bcount, int *delay)
 341 {
 342         struct g_geom *gp;
 343         struct gv_sd *broken, *original, *parity, *s;
 344         struct gv_bioq *bq;
 345         struct bio *cbp;
 346         int i, psdno, sdno, type, grow;
 347         off_t real_len, real_off;
 348
 349         gp = bp->bio_to->geom;
 350
 351         if (p == NULL || LIST_EMPTY(&p->subdisks))
 352                 return (ENXIO);
 353
 354         /* We are optimistic and assume that this request will be OK. */
 355 #define REQ_TYPE_NORMAL         0
 356 #define REQ_TYPE_DEGRADED       1
 357 #define REQ_TYPE_NOPARITY       2
 358
 359         type = REQ_TYPE_NORMAL;
 360         original = parity = broken = NULL;
 361
 362         /* XXX: The resize won't crash with rebuild or sync, but we should still
 363          * be aware of it. Also this should perhaps be done on rebuild/check as
 364          * well?
 365          */
 366         /* If we're over, we must use the old. */
 367         if (boff >= p->synced) {
 368                 grow = 1;
 369         /* Or if over the resized offset, we use all drives. */
 370         } else if (boff + bcount <= p->synced) {
 371                 grow = 0;
 372         /* Else, we're in the middle, and must wait a bit. */
 373         } else {
 374                 bioq_disksort(p->rqueue, bp);
 375                 *delay = 1;
 376                 return (0);
 377         }
 378         gv_raid5_offset(p, boff, bcount, &real_off, &real_len,
 379             &sdno, &psdno, grow);
 380
 381         /* Find the right subdisks. */
 382         i = 0;
 383         LIST_FOREACH(s, &p->subdisks, in_plex) {
 384                 if (i == sdno)
 385                         original = s;
 386                 if (i == psdno)
 387                         parity = s;
 388                 if (s->state != GV_SD_UP)
 389                         broken = s;
 390                 i++;
 391         }
 392
 393         if ((original == NULL) || (parity == NULL))
 394                 return (ENXIO);
 395
 396         /* Our data stripe is missing. */
 397         if (original->state != GV_SD_UP)
 398                 type = REQ_TYPE_DEGRADED;
 399
 400         /* If synchronizing request, just write it if disks are stale. */
 401         if (original->state == GV_SD_STALE && parity->state == GV_SD_STALE &&
 402             bp->bio_pflags & GV_BIO_SYNCREQ && bp->bio_cmd == BIO_WRITE) {
 403                 type = REQ_TYPE_NORMAL;
 404         /* Our parity stripe is missing. */
 405         } else if (parity->state != GV_SD_UP) {
 406                 /* We cannot take another failure if we're already degraded. */
 407                 if (type != REQ_TYPE_NORMAL)
 408                         return (ENXIO);
 409                 else
 410                         type = REQ_TYPE_NOPARITY;
 411         }
 412
 413         wp->length = real_len;
 414         wp->data = addr;
 415         wp->lockbase = real_off;
 416
 417         KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0"));
 418
 419         if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len < p->synced))
 420                 type = REQ_TYPE_NORMAL;
 421
 422         if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len >= p->synced)) {
 423                 bioq_disksort(p->rqueue, bp);
 424                 *delay = 1;
 425                 return (0);
 426         }
 427
 428         switch (bp->bio_cmd) {
 429         case BIO_READ:
 430                 /*
 431                  * For a degraded read we need to read in all stripes except
 432                  * the broken one plus the parity stripe and then recalculate
 433                  * the desired data.
 434                  */
 435                 if (type == REQ_TYPE_DEGRADED) {
 436                         bzero(wp->data, wp->length);
 437                         LIST_FOREACH(s, &p->subdisks, in_plex) {
 438                                 /* Skip the broken subdisk. */
 439                                 if (s == broken)
 440                                         continue;
 441                                 /* Skip growing if within offset. */
 442                                 if (grow && s->flags & GV_SD_GROW)
 443                                         continue;
 444                                 cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
 445                                 if (cbp == NULL)
 446                                         return (ENOMEM);
 447
 448                                 bioq_insert_tail(p->bqueue, cbp);
 449
 450                                 bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 451                                 bq->bp = cbp;
 452                                 TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 453                         }
 454
 455                 /* A normal read can be fulfilled with the original subdisk. */
 456                 } else {
 457                         cbp = gv_raid5_clone_bio(bp, original, wp, addr, 0);
 458                         if (cbp == NULL)
 459                                 return (ENOMEM);
 460
 461                         bioq_insert_tail(p->bqueue, cbp);
 462                 }
 463                 wp->lockbase = -1;
 464
 465                 break;
 466
 467         case BIO_WRITE:
 468                 /*
 469                  * A degraded write means we cannot write to the original data
 470                  * subdisk.  Thus we need to read in all valid stripes,
 471                  * recalculate the parity from the original data, and then
 472                  * write the parity stripe back out.
 473                  */
 474                 if (type == REQ_TYPE_DEGRADED) {
 475                         /* Read all subdisks. */
 476                         LIST_FOREACH(s, &p->subdisks, in_plex) {
 477                                 /* Skip the broken and the parity subdisk. */
 478                                 if ((s == broken) || (s == parity))
 479                                         continue;
 480                                 /* Skip growing if within offset. */
 481                                 if (grow && s->flags & GV_SD_GROW)
 482                                         continue;
 483
 484                                 cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
 485                                 if (cbp == NULL)
 486                                         return (ENOMEM);
 487                                 cbp->bio_cmd = BIO_READ;
 488
 489                                 bioq_insert_tail(p->bqueue, cbp);
 490
 491                                 bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 492                                 bq->bp = cbp;
 493                                 TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 494                         }
 495
 496                         /* Write the parity data. */
 497                         cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
 498                         if (cbp == NULL)
 499                                 return (ENOMEM);
 500                         bcopy(addr, cbp->bio_data, wp->length);
 501                         wp->parity = cbp;
 502
 503                 /*
 504                  * When the parity stripe is missing we just write out the data.
 505                  */
 506                 } else if (type == REQ_TYPE_NOPARITY) {
 507                         cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
 508                         if (cbp == NULL)
 509                                 return (ENOMEM);
 510
 511                         bioq_insert_tail(p->bqueue, cbp);
 512
 513                         bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 514                         bq->bp = cbp;
 515                         TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 516
 517                 /*
 518                  * A normal write request goes to the original subdisk, then we
 519                  * read in all other stripes, recalculate the parity and write
 520                  * out the parity again.
 521                  */
 522                 } else {
 523                         /* Read old parity. */
 524                         cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
 525                         if (cbp == NULL)
 526                                 return (ENOMEM);
 527                         cbp->bio_cmd = BIO_READ;
 528
 529                         bioq_insert_tail(p->bqueue, cbp);
 530
 531                         bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 532                         bq->bp = cbp;
 533                         TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 534
 535                         /* Read old data. */
 536                         cbp = gv_raid5_clone_bio(bp, original, wp, NULL, 1);
 537                         if (cbp == NULL)
 538                                 return (ENOMEM);
 539                         cbp->bio_cmd = BIO_READ;
 540
 541                         bioq_insert_tail(p->bqueue, cbp);
 542
 543                         bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
 544                         bq->bp = cbp;
 545                         TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
 546
 547                         /* Write new data. */
 548                         cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
 549                         if (cbp == NULL)
 550                                 return (ENOMEM);
 551
 552                         /*
 553                          * We must not write the new data until the old data
 554                          * was read, so hold this BIO back until we're ready
 555                          * for it.
 556                          */
 557                         wp->waiting = cbp;
 558
 559                         /* The final bio for the parity. */
 560                         cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
 561                         if (cbp == NULL)
 562                                 return (ENOMEM);
 563
 564                         /* Remember that this is the BIO for the parity data. */
 565                         wp->parity = cbp;
 566                 }
 567                 break;
 568
 569         default:
 570                 return (EINVAL);
 571         }
 572
 573         return (0);
 574 }
 575
 576 /*
 577  * Calculate the offsets in the various subdisks for a RAID5 request. Also take
 578  * care of new subdisks in an expanded RAID5 array.
 579  * XXX: This assumes that the new subdisks are inserted after the others (which
 580  * is okay as long as plex_offset is larger). If subdisks are inserted into the
 581  * plexlist before, we get problems.
 582  */
 583 static int
 584 gv_raid5_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
 585     off_t *real_len, int *sdno, int *psdno, int growing)
 586 {
 587         struct gv_sd *s;
 588         int sd, psd, sdcount;
 589         off_t len_left, stripeend, stripeoff, stripestart;
 590
 591         sdcount = p->sdcount;
 592         if (growing) {
 593                 LIST_FOREACH(s, &p->subdisks, in_plex) {
 594                         if (s->flags & GV_SD_GROW)
 595                                 sdcount--;
 596                 }
 597         }
 598
 599         /* The number of the subdisk containing the parity stripe. */
 600         psd = sdcount - 1 - ( boff / (p->stripesize * (sdcount - 1))) %
 601             sdcount;
 602         KASSERT(psdno >= 0, ("gv_raid5_offset: psdno < 0"));
 603
 604         /* Offset of the start address from the start of the stripe. */
 605         stripeoff = boff % (p->stripesize * (sdcount - 1));
 606         KASSERT(stripeoff >= 0, ("gv_raid5_offset: stripeoff < 0"));
 607
 608         /* The number of the subdisk where the stripe resides. */
 609         sd = stripeoff / p->stripesize;
 610         KASSERT(sdno >= 0, ("gv_raid5_offset: sdno < 0"));
 611
 612         /* At or past parity subdisk. */
 613         if (sd >= psd)
 614                 sd++;
 615
 616         /* The offset of the stripe on this subdisk. */
 617         stripestart = (boff - stripeoff) / (sdcount - 1);
 618         KASSERT(stripestart >= 0, ("gv_raid5_offset: stripestart < 0"));
 619
 620         stripeoff %= p->stripesize;
 621
 622         /* The offset of the request on this subdisk. */
 623         *real_off = stripestart + stripeoff;
 624
 625         stripeend = stripestart + p->stripesize;
 626         len_left = stripeend - *real_off;
 627         KASSERT(len_left >= 0, ("gv_raid5_offset: len_left < 0"));
 628
 629         *real_len = (bcount <= len_left) ? bcount : len_left;
 630
 631         if (sdno != NULL)
 632                 *sdno = sd;
 633         if (psdno != NULL)
 634                 *psdno = psd;
 635
 636         return (0);
 637 }
 638
 639 static struct bio *
 640 gv_raid5_clone_bio(struct bio *bp, struct gv_sd *s, struct gv_raid5_packet *wp,
 641     caddr_t addr, int use_wp)
 642 {
 643         struct bio *cbp;
 644
 645         cbp = g_clone_bio(bp);
 646         if (cbp == NULL)
 647                 return (NULL);
 648         if (addr == NULL) {
 649                 cbp->bio_data = g_malloc(wp->length, M_WAITOK | M_ZERO);
 650                 cbp->bio_cflags |= GV_BIO_MALLOC;
 651         } else
 652                 cbp->bio_data = addr;
 653         cbp->bio_offset = wp->lockbase + s->drive_offset;
 654         cbp->bio_length = wp->length;
 655         cbp->bio_done = gv_done;
 656         cbp->bio_caller1 = s;
 657         if (use_wp)
 658                 cbp->bio_caller2 = wp;
 659
 660         return (cbp);
 661 }