sbin/fsck_ffs/fsutil.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-3-Clause
   3  *
   4  * Copyright (c) 1980, 1986, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  * 3. Neither the name of the University nor the names of its contributors
  16  *    may be used to endorse or promote products derived from this software
  17  *    without specific prior written permission.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  29  * SUCH DAMAGE.
  30  */
  31
  32 #if 0
  33 #ifndef lint
  34 static const char sccsid[] = "@(#)utilities.c   8.6 (Berkeley) 5/19/95";
  35 #endif /* not lint */
  36 #endif
  37 #include <sys/cdefs.h>
  38 #include <sys/param.h>
  39 #include <sys/time.h>
  40 #include <sys/types.h>
  41 #include <sys/sysctl.h>
  42 #include <sys/disk.h>
  43 #include <sys/disklabel.h>
  44 #include <sys/ioctl.h>
  45 #include <sys/stat.h>
  46
  47 #include <ufs/ufs/dinode.h>
  48 #include <ufs/ufs/dir.h>
  49 #include <ufs/ffs/fs.h>
  50
  51 #include <err.h>
  52 #include <errno.h>
  53 #include <string.h>
  54 #include <ctype.h>
  55 #include <fstab.h>
  56 #include <stdint.h>
  57 #include <stdio.h>
  58 #include <stdlib.h>
  59 #include <time.h>
  60 #include <unistd.h>
  61 #include <libufs.h>
  62
  63 #include "fsck.h"
  64
  65 int             sujrecovery = 0;
  66
  67 static struct bufarea *allocbuf(const char *);
  68 static void cg_write(struct bufarea *);
  69 static void slowio_start(void);
  70 static void slowio_end(void);
  71 static void printIOstats(void);
  72
  73 static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */
  74 static struct timespec startpass, finishpass;
  75 struct timeval slowio_starttime;
  76 int slowio_delay_usec = 10000;  /* Initial IO delay for background fsck */
  77 int slowio_pollcnt;
  78 static struct bufarea cgblk;    /* backup buffer for cylinder group blocks */
  79 static struct bufarea failedbuf; /* returned by failed getdatablk() */
  80 static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */
  81 static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */
  82 static struct bufhash freebufs; /* unused buffers */
  83 static int numbufs;             /* size of buffer cache */
  84 static int cachelookups;        /* number of cache lookups */
  85 static int cachereads;          /* number of cache reads */
  86 static int flushtries;          /* number of tries to reclaim memory */
  87
  88 char *buftype[BT_NUMBUFTYPES] = BT_NAMES;
  89
  90 void
  91 fsutilinit(void)
  92 {
  93         diskreads = totaldiskreads = totalreads = 0;
  94         bzero(&startpass, sizeof(struct timespec));
  95         bzero(&finishpass, sizeof(struct timespec));
  96         bzero(&slowio_starttime, sizeof(struct timeval));
  97         slowio_delay_usec = 10000;
  98         slowio_pollcnt = 0;
  99         flushtries = 0;
 100 }
 101
 102 int
 103 ftypeok(union dinode *dp)
 104 {
 105         switch (DIP(dp, di_mode) & IFMT) {
 106
 107         case IFDIR:
 108         case IFREG:
 109         case IFBLK:
 110         case IFCHR:
 111         case IFLNK:
 112         case IFSOCK:
 113         case IFIFO:
 114                 return (1);
 115
 116         default:
 117                 if (debug)
 118                         printf("bad file type 0%o\n", DIP(dp, di_mode));
 119                 return (0);
 120         }
 121 }
 122
 123 int
 124 reply(const char *question)
 125 {
 126         int persevere;
 127         char c;
 128
 129         if (preen)
 130                 pfatal("INTERNAL ERROR: GOT TO reply()");
 131         persevere = strcmp(question, "CONTINUE") == 0 ||
 132                 strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0;
 133         printf("\n");
 134         if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) {
 135                 printf("%s? no\n\n", question);
 136                 resolved = 0;
 137                 return (0);
 138         }
 139         if (yflag || (persevere && nflag)) {
 140                 printf("%s? yes\n\n", question);
 141                 return (1);
 142         }
 143         do      {
 144                 printf("%s? [yn] ", question);
 145                 (void) fflush(stdout);
 146                 c = getc(stdin);
 147                 while (c != '\n' && getc(stdin) != '\n') {
 148                         if (feof(stdin)) {
 149                                 resolved = 0;
 150                                 return (0);
 151                         }
 152                 }
 153         } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N');
 154         printf("\n");
 155         if (c == 'y' || c == 'Y')
 156                 return (1);
 157         resolved = 0;
 158         return (0);
 159 }
 160
 161 /*
 162  * Look up state information for an inode.
 163  */
 164 struct inostat *
 165 inoinfo(ino_t inum)
 166 {
 167         static struct inostat unallocated = { USTATE, 0, 0, 0 };
 168         struct inostatlist *ilp;
 169         int iloff;
 170
 171         if (inum >= maxino)
 172                 errx(EEXIT, "inoinfo: inumber %ju out of range",
 173                     (uintmax_t)inum);
 174         ilp = &inostathead[inum / sblock.fs_ipg];
 175         iloff = inum % sblock.fs_ipg;
 176         if (iloff >= ilp->il_numalloced)
 177                 return (&unallocated);
 178         return (&ilp->il_stat[iloff]);
 179 }
 180
 181 /*
 182  * Malloc buffers and set up cache.
 183  */
 184 void
 185 bufinit(void)
 186 {
 187         int i;
 188
 189         initbarea(&failedbuf, BT_UNKNOWN);
 190         failedbuf.b_errs = -1;
 191         failedbuf.b_un.b_buf = NULL;
 192         if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL)
 193                 errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize);
 194         initbarea(&cgblk, BT_CYLGRP);
 195         numbufs = cachelookups = cachereads = 0;
 196         TAILQ_INIT(&bufqueuehd);
 197         LIST_INIT(&freebufs);
 198         for (i = 0; i < HASHSIZE; i++)
 199                 LIST_INIT(&bufhashhd[i]);
 200         for (i = 0; i < BT_NUMBUFTYPES; i++) {
 201                 readtime[i].tv_sec = totalreadtime[i].tv_sec = 0;
 202                 readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0;
 203                 readcnt[i] = totalreadcnt[i] = 0;
 204         }
 205 }
 206
 207 static struct bufarea *
 208 allocbuf(const char *failreason)
 209 {
 210         struct bufarea *bp;
 211         char *bufp;
 212
 213         bp = (struct bufarea *)Malloc(sizeof(struct bufarea));
 214         bufp = Malloc((unsigned int)sblock.fs_bsize);
 215         if (bp == NULL || bufp == NULL) {
 216                 errx(EEXIT, "%s", failreason);
 217                 /* NOTREACHED */
 218         }
 219         numbufs++;
 220         bp->b_un.b_buf = bufp;
 221         TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
 222         initbarea(bp, BT_UNKNOWN);
 223         return (bp);
 224 }
 225
 226 /*
 227  * Manage cylinder group buffers.
 228  *
 229  * Use getblk() here rather than cgget() because the cylinder group
 230  * may be corrupted but we want it anyway so we can fix it.
 231  */
 232 static struct bufarea *cgbufs;  /* header for cylinder group cache */
 233 static int flushtries;          /* number of tries to reclaim memory */
 234
 235 struct bufarea *
 236 cglookup(int cg)
 237 {
 238         struct bufarea *cgbp;
 239         struct cg *cgp;
 240
 241         if ((unsigned) cg >= sblock.fs_ncg)
 242                 errx(EEXIT, "cglookup: out of range cylinder group %d", cg);
 243         if (cgbufs == NULL) {
 244                 cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea));
 245                 if (cgbufs == NULL)
 246                         errx(EEXIT, "Cannot allocate cylinder group buffers");
 247         }
 248         cgbp = &cgbufs[cg];
 249         if (cgbp->b_un.b_cg != NULL)
 250                 return (cgbp);
 251         cgp = NULL;
 252         if (flushtries == 0)
 253                 cgp = Malloc((unsigned int)sblock.fs_cgsize);
 254         if (cgp == NULL) {
 255                 if (sujrecovery)
 256                         errx(EEXIT,"Ran out of memory during journal recovery");
 257                 flush(fswritefd, &cgblk);
 258                 getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
 259                 return (&cgblk);
 260         }
 261         cgbp->b_un.b_cg = cgp;
 262         initbarea(cgbp, BT_CYLGRP);
 263         getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize);
 264         return (cgbp);
 265 }
 266
 267 /*
 268  * Mark a cylinder group buffer as dirty.
 269  * Update its check-hash if they are enabled.
 270  */
 271 void
 272 cgdirty(struct bufarea *cgbp)
 273 {
 274         struct cg *cg;
 275
 276         cg = cgbp->b_un.b_cg;
 277         if ((sblock.fs_metackhash & CK_CYLGRP) != 0) {
 278                 cg->cg_ckhash = 0;
 279                 cg->cg_ckhash =
 280                     calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize);
 281         }
 282         dirty(cgbp);
 283 }
 284
 285 /*
 286  * Attempt to flush a cylinder group cache entry.
 287  * Return whether the flush was successful.
 288  */
 289 int
 290 flushentry(void)
 291 {
 292         struct bufarea *cgbp;
 293
 294         if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL)
 295                 return (0);
 296         cgbp = &cgbufs[flushtries++];
 297         if (cgbp->b_un.b_cg == NULL)
 298                 return (0);
 299         flush(fswritefd, cgbp);
 300         free(cgbp->b_un.b_buf);
 301         cgbp->b_un.b_buf = NULL;
 302         return (1);
 303 }
 304
 305 /*
 306  * Manage a cache of filesystem disk blocks.
 307  */
 308 struct bufarea *
 309 getdatablk(ufs2_daddr_t blkno, long size, int type)
 310 {
 311         struct bufarea *bp;
 312         struct bufhash *bhdp;
 313
 314         cachelookups++;
 315         /*
 316          * If out of range, return empty buffer with b_err == -1
 317          *
 318          * Skip check for inodes because chkrange() considers
 319          * metadata areas invalid to write data.
 320          */
 321         if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) {
 322                 failedbuf.b_refcnt++;
 323                 return (&failedbuf);
 324         }
 325         bhdp = &bufhashhd[HASH(blkno)];
 326         LIST_FOREACH(bp, bhdp, b_hash)
 327                 if (bp->b_bno == fsbtodb(&sblock, blkno)) {
 328                         if (debug && bp->b_size != size) {
 329                                 prtbuf(bp, "getdatablk: size mismatch");
 330                                 pfatal("getdatablk: b_size %d != size %ld\n",
 331                                     bp->b_size, size);
 332                         }
 333                         TAILQ_REMOVE(&bufqueuehd, bp, b_list);
 334                         goto foundit;
 335                 }
 336         /*
 337          * Move long-term busy buffer back to the front of the LRU so we
 338          * do not endless inspect them for recycling.
 339          */
 340         bp = TAILQ_LAST(&bufqueuehd, bufqueue);
 341         if (bp != NULL && bp->b_refcnt != 0) {
 342                 TAILQ_REMOVE(&bufqueuehd, bp, b_list);
 343                 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
 344         }
 345         /*
 346          * Allocate up to the minimum number of buffers before
 347          * considering recycling any of them.
 348          */
 349         if (size > sblock.fs_bsize)
 350                 errx(EEXIT, "Excessive buffer size %ld > %d\n", size,
 351                     sblock.fs_bsize);
 352         if ((bp = LIST_FIRST(&freebufs)) != NULL) {
 353                 LIST_REMOVE(bp, b_hash);
 354         } else if (numbufs < MINBUFS) {
 355                 bp = allocbuf("cannot create minimal buffer pool");
 356         } else if (sujrecovery) {
 357                 /*
 358                  * SUJ recovery does not want anything written until it
 359                  * has successfully completed (so it can fail back to
 360                  * full fsck). Thus, we can only recycle clean buffers.
 361                  */
 362                 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
 363                         if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0)
 364                                 break;
 365                 if (bp == NULL)
 366                         bp = allocbuf("Ran out of memory during "
 367                             "journal recovery");
 368                 else
 369                         LIST_REMOVE(bp, b_hash);
 370         } else {
 371                 /*
 372                  * Recycle oldest non-busy buffer.
 373                  */
 374                 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
 375                         if (bp->b_refcnt == 0)
 376                                 break;
 377                 if (bp == NULL)
 378                         bp = allocbuf("Ran out of memory for buffers");
 379                 else
 380                         LIST_REMOVE(bp, b_hash);
 381         }
 382         TAILQ_REMOVE(&bufqueuehd, bp, b_list);
 383         flush(fswritefd, bp);
 384         bp->b_type = type;
 385         LIST_INSERT_HEAD(bhdp, bp, b_hash);
 386         getblk(bp, blkno, size);
 387         cachereads++;
 388         /* fall through */
 389 foundit:
 390         TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
 391         if (debug && bp->b_type != type) {
 392                 printf("getdatablk: buffer type changed to %s",
 393                     BT_BUFTYPE(type));
 394                 prtbuf(bp, "");
 395         }
 396         if (bp->b_errs == 0)
 397                 bp->b_refcnt++;
 398         return (bp);
 399 }
 400
 401 void
 402 getblk(struct bufarea *bp, ufs2_daddr_t blk, long size)
 403 {
 404         ufs2_daddr_t dblk;
 405         struct timespec start, finish;
 406
 407         dblk = fsbtodb(&sblock, blk);
 408         if (bp->b_bno == dblk) {
 409                 totalreads++;
 410         } else {
 411                 if (debug) {
 412                         readcnt[bp->b_type]++;
 413                         clock_gettime(CLOCK_REALTIME_PRECISE, &start);
 414                 }
 415                 bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size);
 416                 if (debug) {
 417                         clock_gettime(CLOCK_REALTIME_PRECISE, &finish);
 418                         timespecsub(&finish, &start, &finish);
 419                         timespecadd(&readtime[bp->b_type], &finish,
 420                             &readtime[bp->b_type]);
 421                 }
 422                 bp->b_bno = dblk;
 423                 bp->b_size = size;
 424         }
 425 }
 426
 427 void
 428 brelse(struct bufarea *bp)
 429 {
 430
 431         if (bp->b_refcnt <= 0)
 432                 prtbuf(bp, "brelse: buffer with negative reference count");
 433         bp->b_refcnt--;
 434 }
 435
 436 void
 437 binval(struct bufarea *bp)
 438 {
 439
 440         bp->b_flags &= ~B_DIRTY;
 441         LIST_REMOVE(bp, b_hash);
 442         LIST_INSERT_HEAD(&freebufs, bp, b_hash);
 443 }
 444
 445 void
 446 flush(int fd, struct bufarea *bp)
 447 {
 448         struct inode ip;
 449
 450         if ((bp->b_flags & B_DIRTY) == 0)
 451                 return;
 452         bp->b_flags &= ~B_DIRTY;
 453         if (fswritefd < 0) {
 454                 pfatal("WRITING IN READ_ONLY MODE.\n");
 455                 return;
 456         }
 457         if (bp->b_errs != 0)
 458                 pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n",
 459                     (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ",
 460                     (long long)bp->b_bno);
 461         bp->b_errs = 0;
 462         /*
 463          * Write using the appropriate function.
 464          */
 465         switch (bp->b_type) {
 466         case BT_SUPERBLK:
 467                 if (bp != &sblk)
 468                         pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n",
 469                             bp, &sblk);
 470                 /*
 471                  * Superblocks are always pre-copied so we do not need
 472                  * to check them for copy-on-write.
 473                  */
 474                 if (sbput(fd, bp->b_un.b_fs, 0) == 0)
 475                         fsmodified = 1;
 476                 break;
 477         case BT_CYLGRP:
 478                 /*
 479                  * Cylinder groups are always pre-copied so we do not
 480                  * need to check them for copy-on-write.
 481                  */
 482                 if (sujrecovery)
 483                         cg_write(bp);
 484                 if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0)
 485                         fsmodified = 1;
 486                 break;
 487         case BT_INODES:
 488                 if (debug && sblock.fs_magic == FS_UFS2_MAGIC) {
 489                         struct ufs2_dinode *dp = bp->b_un.b_dinode2;
 490                         int i;
 491
 492                         for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) {
 493                                 if (ffs_verify_dinode_ckhash(&sblock, dp) == 0)
 494                                         continue;
 495                                 pwarn("flush: INODE CHECK-HASH FAILED");
 496                                 ip.i_bp = bp;
 497                                 ip.i_dp = (union dinode *)dp;
 498                                 ip.i_number = bp->b_index + (i / sizeof(*dp));
 499                                 prtinode(&ip);
 500                                 if (preen || reply("FIX") != 0) {
 501                                         if (preen)
 502                                                 printf(" (FIXED)\n");
 503                                         ffs_update_dinode_ckhash(&sblock, dp);
 504                                         inodirty(&ip);
 505                                 }
 506                         }
 507                 }
 508                 /* FALLTHROUGH */
 509         default:
 510                 copyonwrite(&sblock, bp, std_checkblkavail);
 511                 blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size);
 512                 break;
 513         }
 514 }
 515
 516 /*
 517  * If there are any snapshots, ensure that all the blocks that they
 518  * care about have been copied, then release the snapshot inodes.
 519  * These operations need to be done before we rebuild the cylinder
 520  * groups so that any block allocations are properly recorded.
 521  * Since all the cylinder group maps have already been copied in
 522  * the snapshots, no further snapshot copies will need to be done.
 523  */
 524 void
 525 snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long))
 526 {
 527         struct bufarea *bp;
 528         int cnt;
 529
 530         if (snapcnt > 0) {
 531                 if (debug)
 532                         printf("Check for snapshot copies\n");
 533                 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
 534                         if ((bp->b_flags & B_DIRTY) != 0)
 535                                 copyonwrite(&sblock, bp, checkblkavail);
 536                 for (cnt = 0; cnt < snapcnt; cnt++)
 537                         irelse(&snaplist[cnt]);
 538                 snapcnt = 0;
 539         }
 540 }
 541
 542 /*
 543  * Journaled soft updates does not maintain cylinder group summary
 544  * information during cleanup, so this routine recalculates the summary
 545  * information and updates the superblock summary in preparation for
 546  * writing out the cylinder group.
 547  */
 548 static void
 549 cg_write(struct bufarea *bp)
 550 {
 551         ufs1_daddr_t fragno, cgbno, maxbno;
 552         u_int8_t *blksfree;
 553         struct csum *csp;
 554         struct cg *cgp;
 555         int blk;
 556         int i;
 557
 558         /*
 559          * Fix the frag and cluster summary.
 560          */
 561         cgp = bp->b_un.b_cg;
 562         cgp->cg_cs.cs_nbfree = 0;
 563         cgp->cg_cs.cs_nffree = 0;
 564         bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum));
 565         maxbno = fragstoblks(&sblock, sblock.fs_fpg);
 566         if (sblock.fs_contigsumsize > 0) {
 567                 for (i = 1; i <= sblock.fs_contigsumsize; i++)
 568                         cg_clustersum(cgp)[i] = 0;
 569                 bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT));
 570         }
 571         blksfree = cg_blksfree(cgp);
 572         for (cgbno = 0; cgbno < maxbno; cgbno++) {
 573                 if (ffs_isfreeblock(&sblock, blksfree, cgbno))
 574                         continue;
 575                 if (ffs_isblock(&sblock, blksfree, cgbno)) {
 576                         ffs_clusteracct(&sblock, cgp, cgbno, 1);
 577                         cgp->cg_cs.cs_nbfree++;
 578                         continue;
 579                 }
 580                 fragno = blkstofrags(&sblock, cgbno);
 581                 blk = blkmap(&sblock, blksfree, fragno);
 582                 ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1);
 583                 for (i = 0; i < sblock.fs_frag; i++)
 584                         if (isset(blksfree, fragno + i))
 585                                 cgp->cg_cs.cs_nffree++;
 586         }
 587         /*
 588          * Update the superblock cg summary from our now correct values
 589          * before writing the block.
 590          */
 591         csp = &sblock.fs_cs(&sblock, cgp->cg_cgx);
 592         sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir;
 593         sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree;
 594         sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree;
 595         sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree;
 596         sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs;
 597 }
 598
 599 void
 600 rwerror(const char *mesg, ufs2_daddr_t blk)
 601 {
 602
 603         if (bkgrdcheck)
 604                 exit(EEXIT);
 605         if (preen == 0)
 606                 printf("\n");
 607         pfatal("CANNOT %s: %ld", mesg, (long)blk);
 608         if (reply("CONTINUE") == 0)
 609                 exit(EEXIT);
 610 }
 611
 612 void
 613 ckfini(int markclean)
 614 {
 615         struct bufarea *bp, *nbp;
 616         int ofsmodified, cnt, cg;
 617
 618         if (bkgrdflag) {
 619                 unlink(snapname);
 620                 if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) {
 621                         cmd.value = FS_UNCLEAN;
 622                         cmd.size = markclean ? -1 : 1;
 623                         if (sysctlbyname("vfs.ffs.setflags", 0, 0,
 624                             &cmd, sizeof cmd) == -1)
 625                                 pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n");
 626                         if (!preen) {
 627                                 printf("\n***** FILE SYSTEM MARKED %s *****\n",
 628                                     markclean ? "CLEAN" : "DIRTY");
 629                                 if (!markclean)
 630                                         rerun = 1;
 631                         }
 632                 } else if (!preen && !markclean) {
 633                         printf("\n***** FILE SYSTEM STILL DIRTY *****\n");
 634                         rerun = 1;
 635                 }
 636                 bkgrdflag = 0;
 637         }
 638         if (debug && cachelookups > 0)
 639                 printf("cache with %d buffers missed %d of %d (%d%%)\n",
 640                     numbufs, cachereads, cachelookups,
 641                     (int)(cachereads * 100 / cachelookups));
 642         if (fswritefd < 0) {
 643                 (void)close(fsreadfd);
 644                 return;
 645         }
 646
 647         /*
 648          * To remain idempotent with partial truncations the buffers
 649          * must be flushed in this order:
 650          *  1) cylinder groups (bitmaps)
 651          *  2) indirect, directory, external attribute, and data blocks
 652          *  3) inode blocks
 653          *  4) superblock
 654          * This ordering preserves access to the modified pointers
 655          * until they are freed.
 656          */
 657         /* Step 1: cylinder groups */
 658         if (debug)
 659                 printf("Flush Cylinder groups\n");
 660         if (cgbufs != NULL) {
 661                 for (cnt = 0; cnt < sblock.fs_ncg; cnt++) {
 662                         if (cgbufs[cnt].b_un.b_cg == NULL)
 663                                 continue;
 664                         flush(fswritefd, &cgbufs[cnt]);
 665                         free(cgbufs[cnt].b_un.b_cg);
 666                 }
 667                 free(cgbufs);
 668                 cgbufs = NULL;
 669         }
 670         flush(fswritefd, &cgblk);
 671         free(cgblk.b_un.b_buf);
 672         cgblk.b_un.b_buf = NULL;
 673         cnt = 0;
 674         /* Step 2: indirect, directory, external attribute, and data blocks */
 675         if (debug)
 676                 printf("Flush indirect, directory, external attribute, "
 677                     "and data blocks\n");
 678         if (pdirbp != NULL) {
 679                 brelse(pdirbp);
 680                 pdirbp = NULL;
 681         }
 682         TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) {
 683                 switch (bp->b_type) {
 684                 /* These should not be in the buffer cache list */
 685                 case BT_UNKNOWN:
 686                 case BT_SUPERBLK:
 687                 case BT_CYLGRP:
 688                 default:
 689                         prtbuf(bp,"ckfini: improper buffer type on cache list");
 690                         continue;
 691                 /* These are the ones to flush in this step */
 692                 case BT_LEVEL1:
 693                 case BT_LEVEL2:
 694                 case BT_LEVEL3:
 695                 case BT_EXTATTR:
 696                 case BT_DIRDATA:
 697                 case BT_DATA:
 698                         break;
 699                 /* These are the ones to flush in the next step */
 700                 case BT_INODES:
 701                         continue;
 702                 }
 703                 if (debug && bp->b_refcnt != 0)
 704                         prtbuf(bp, "ckfini: clearing in-use buffer");
 705                 TAILQ_REMOVE(&bufqueuehd, bp, b_list);
 706                 LIST_REMOVE(bp, b_hash);
 707                 cnt++;
 708                 flush(fswritefd, bp);
 709                 free(bp->b_un.b_buf);
 710                 free((char *)bp);
 711         }
 712         /* Step 3: inode blocks */
 713         if (debug)
 714                 printf("Flush inode blocks\n");
 715         if (icachebp != NULL) {
 716                 brelse(icachebp);
 717                 icachebp = NULL;
 718         }
 719         TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) {
 720                 if (debug && bp->b_refcnt != 0)
 721                         prtbuf(bp, "ckfini: clearing in-use buffer");
 722                 TAILQ_REMOVE(&bufqueuehd, bp, b_list);
 723                 LIST_REMOVE(bp, b_hash);
 724                 cnt++;
 725                 flush(fswritefd, bp);
 726                 free(bp->b_un.b_buf);
 727                 free((char *)bp);
 728         }
 729         if (numbufs != cnt)
 730                 errx(EEXIT, "panic: lost %d buffers", numbufs - cnt);
 731         /* Step 4: superblock */
 732         if (debug)
 733                 printf("Flush the superblock\n");
 734         flush(fswritefd, &sblk);
 735         if (havesb && cursnapshot == 0 &&
 736             sblk.b_bno != sblock.fs_sblockloc / dev_bsize) {
 737                 if (preen || reply("UPDATE STANDARD SUPERBLOCK")) {
 738                         /* Change write destination to standard superblock */
 739                         sblock.fs_sblockactualloc = sblock.fs_sblockloc;
 740                         sblk.b_bno = sblock.fs_sblockloc / dev_bsize;
 741                         sbdirty();
 742                         flush(fswritefd, &sblk);
 743                 } else {
 744                         markclean = 0;
 745                 }
 746         }
 747         if (cursnapshot == 0 && sblock.fs_clean != markclean) {
 748                 if ((sblock.fs_clean = markclean) != 0) {
 749                         sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK);
 750                         sblock.fs_pendingblocks = 0;
 751                         sblock.fs_pendinginodes = 0;
 752                 }
 753                 sbdirty();
 754                 ofsmodified = fsmodified;
 755                 flush(fswritefd, &sblk);
 756                 fsmodified = ofsmodified;
 757                 if (!preen) {
 758                         printf("\n***** FILE SYSTEM MARKED %s *****\n",
 759                             markclean ? "CLEAN" : "DIRTY");
 760                         if (!markclean)
 761                                 rerun = 1;
 762                 }
 763         } else if (!preen) {
 764                 if (markclean) {
 765                         printf("\n***** FILE SYSTEM IS CLEAN *****\n");
 766                 } else {
 767                         printf("\n***** FILE SYSTEM STILL DIRTY *****\n");
 768                         rerun = 1;
 769                 }
 770         }
 771         /*
 772          * Free allocated tracking structures.
 773          */
 774         if (blockmap != NULL)
 775                 free(blockmap);
 776         blockmap = NULL;
 777         if (inostathead != NULL) {
 778                 for (cg = 0; cg < sblock.fs_ncg; cg++)
 779                         if (inostathead[cg].il_stat != NULL)
 780                                 free((char *)inostathead[cg].il_stat);
 781                 free(inostathead);
 782         }
 783         inostathead = NULL;
 784         inocleanup();
 785         finalIOstats();
 786         (void)close(fsreadfd);
 787         (void)close(fswritefd);
 788 }
 789
 790 /*
 791  * Print out I/O statistics.
 792  */
 793 void
 794 IOstats(char *what)
 795 {
 796         int i;
 797
 798         if (debug == 0)
 799                 return;
 800         if (diskreads == 0) {
 801                 printf("%s: no I/O\n\n", what);
 802                 return;
 803         }
 804         if (startpass.tv_sec == 0)
 805                 startpass = startprog;
 806         printf("%s: I/O statistics\n", what);
 807         printIOstats();
 808         totaldiskreads += diskreads;
 809         diskreads = 0;
 810         for (i = 0; i < BT_NUMBUFTYPES; i++) {
 811                 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]);
 812                 totalreadcnt[i] += readcnt[i];
 813                 readtime[i].tv_sec = readtime[i].tv_nsec = 0;
 814                 readcnt[i] = 0;
 815         }
 816         clock_gettime(CLOCK_REALTIME_PRECISE, &startpass);
 817 }
 818
 819 void
 820 finalIOstats(void)
 821 {
 822         int i;
 823
 824         if (debug == 0)
 825                 return;
 826         printf("Final I/O statistics\n");
 827         totaldiskreads += diskreads;
 828         diskreads = totaldiskreads;
 829         startpass = startprog;
 830         for (i = 0; i < BT_NUMBUFTYPES; i++) {
 831                 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]);
 832                 totalreadcnt[i] += readcnt[i];
 833                 readtime[i] = totalreadtime[i];
 834                 readcnt[i] = totalreadcnt[i];
 835         }
 836         printIOstats();
 837 }
 838
 839 static void printIOstats(void)
 840 {
 841         long long msec, totalmsec;
 842         int i;
 843
 844         clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass);
 845         timespecsub(&finishpass, &startpass, &finishpass);
 846         printf("Running time: %jd.%03ld sec\n",
 847                 (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000);
 848         printf("buffer reads by type:\n");
 849         for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++)
 850                 totalmsec += readtime[i].tv_sec * 1000 +
 851                     readtime[i].tv_nsec / 1000000;
 852         if (totalmsec == 0)
 853                 totalmsec = 1;
 854         for (i = 0; i < BT_NUMBUFTYPES; i++) {
 855                 if (readcnt[i] == 0)
 856                         continue;
 857                 msec =
 858                     readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000;
 859                 printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n",
 860                     buftype[i], readcnt[i], readcnt[i] * 100 / diskreads,
 861                     (readcnt[i] * 1000 / diskreads) % 10,
 862                     (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000,
 863                     msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10);
 864         }
 865         printf("\n");
 866 }
 867
 868 int
 869 blread(int fd, char *buf, ufs2_daddr_t blk, long size)
 870 {
 871         char *cp;
 872         int i, errs;
 873         off_t offset;
 874
 875         offset = blk;
 876         offset *= dev_bsize;
 877         if (bkgrdflag)
 878                 slowio_start();
 879         totalreads++;
 880         diskreads++;
 881         if (pread(fd, buf, (int)size, offset) == size) {
 882                 if (bkgrdflag)
 883                         slowio_end();
 884                 return (0);
 885         }
 886
 887         /*
 888          * This is handled specially here instead of in rwerror because
 889          * rwerror is used for all sorts of errors, not just true read/write
 890          * errors.  It should be refactored and fixed.
 891          */
 892         if (surrender) {
 893                 pfatal("CANNOT READ_BLK: %ld", (long)blk);
 894                 errx(EEXIT, "ABORTING DUE TO READ ERRORS");
 895         } else
 896                 rwerror("READ BLK", blk);
 897
 898         errs = 0;
 899         memset(buf, 0, (size_t)size);
 900         printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:");
 901         for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) {
 902                 if (pread(fd, cp, (int)secsize, offset + i) != secsize) {
 903                         if (secsize != dev_bsize && dev_bsize != 1)
 904                                 printf(" %jd (%jd),",
 905                                     (intmax_t)(blk * dev_bsize + i) / secsize,
 906                                     (intmax_t)blk + i / dev_bsize);
 907                         else
 908                                 printf(" %jd,", (intmax_t)blk + i / dev_bsize);
 909                         errs++;
 910                 }
 911         }
 912         printf("\n");
 913         if (errs)
 914                 resolved = 0;
 915         return (errs);
 916 }
 917
 918 void
 919 blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size)
 920 {
 921         int i;
 922         char *cp;
 923         off_t offset;
 924
 925         if (fd < 0)
 926                 return;
 927         offset = blk;
 928         offset *= dev_bsize;
 929         if (pwrite(fd, buf, size, offset) == size) {
 930                 fsmodified = 1;
 931                 return;
 932         }
 933         resolved = 0;
 934         rwerror("WRITE BLK", blk);
 935         printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
 936         for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize)
 937                 if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize)
 938                         printf(" %jd,", (intmax_t)blk + i / dev_bsize);
 939         printf("\n");
 940         return;
 941 }
 942
 943 void
 944 blerase(int fd, ufs2_daddr_t blk, long size)
 945 {
 946         off_t ioarg[2];
 947
 948         if (fd < 0)
 949                 return;
 950         ioarg[0] = blk * dev_bsize;
 951         ioarg[1] = size;
 952         ioctl(fd, DIOCGDELETE, ioarg);
 953         /* we don't really care if we succeed or not */
 954         return;
 955 }
 956
 957 /*
 958  * Fill a contiguous region with all-zeroes.  Note ZEROBUFSIZE is by
 959  * definition a multiple of dev_bsize.
 960  */
 961 void
 962 blzero(int fd, ufs2_daddr_t blk, long size)
 963 {
 964         static char *zero;
 965         off_t offset, len;
 966
 967         if (fd < 0)
 968                 return;
 969         if (zero == NULL) {
 970                 zero = calloc(ZEROBUFSIZE, 1);
 971                 if (zero == NULL)
 972                         errx(EEXIT, "cannot allocate buffer pool");
 973         }
 974         offset = blk * dev_bsize;
 975         if (lseek(fd, offset, 0) < 0)
 976                 rwerror("SEEK BLK", blk);
 977         while (size > 0) {
 978                 len = MIN(ZEROBUFSIZE, size);
 979                 if (write(fd, zero, len) != len)
 980                         rwerror("WRITE BLK", blk);
 981                 blk += len / dev_bsize;
 982                 size -= len;
 983         }
 984 }
 985
 986 /*
 987  * Verify cylinder group's magic number and other parameters.  If the
 988  * test fails, offer an option to rebuild the whole cylinder group.
 989  *
 990  * Return 1 if the cylinder group is good or return 0 if it is bad.
 991  */
 992 #undef CHK
 993 #define CHK(lhs, op, rhs, fmt)                                          \
 994         if (lhs op rhs) {                                               \
 995                 pwarn("UFS%d cylinder group %d failed: "                \
 996                     "%s (" #fmt ") %s %s (" #fmt ")\n",                 \
 997                     sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg,       \
 998                     #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs);     \
 999                 error = 1;                                              \
1000         }
1001 int
1002 check_cgmagic(int cg, struct bufarea *cgbp)
1003 {
1004         struct cg *cgp = cgbp->b_un.b_cg;
1005         uint32_t cghash, calchash;
1006         static int prevfailcg = -1;
1007         long start;
1008         int error;
1009
1010         /*
1011          * Extended cylinder group checks.
1012          */
1013         calchash = cgp->cg_ckhash;
1014         if ((sblock.fs_metackhash & CK_CYLGRP) != 0 &&
1015             (ckhashadd & CK_CYLGRP) == 0) {
1016                 cghash = cgp->cg_ckhash;
1017                 cgp->cg_ckhash = 0;
1018                 calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize);
1019                 cgp->cg_ckhash = cghash;
1020         }
1021         error = 0;
1022         CHK(cgp->cg_ckhash, !=, calchash, "%jd");
1023         CHK(cg_chkmagic(cgp), ==, 0, "%jd");
1024         CHK(cgp->cg_cgx, !=, cg, "%jd");
1025         CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd");
1026         if (sblock.fs_magic == FS_UFS1_MAGIC) {
1027                 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd");
1028                 CHK(cgp->cg_old_ncyl, >, sblock.fs_old_cpg, "%jd");
1029         } else if (sblock.fs_magic == FS_UFS2_MAGIC) {
1030                 CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd");
1031                 CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd");
1032         }
1033         if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) {
1034                 CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd");
1035         } else {
1036                 CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg),
1037                     "%jd");
1038         }
1039         start = sizeof(*cgp);
1040         if (sblock.fs_magic == FS_UFS2_MAGIC) {
1041                 CHK(cgp->cg_iusedoff, !=, start, "%jd");
1042         } else if (sblock.fs_magic == FS_UFS1_MAGIC) {
1043                 CHK(cgp->cg_niblk, !=, 0, "%jd");
1044                 CHK(cgp->cg_initediblk, !=, 0, "%jd");
1045                 CHK(cgp->cg_old_ncyl, !=, sblock.fs_old_cpg, "%jd");
1046                 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd");
1047                 CHK(cgp->cg_old_btotoff, !=, start, "%jd");
1048                 CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff +
1049                     sblock.fs_old_cpg * sizeof(int32_t), "%jd");
1050                 CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff +
1051                     sblock.fs_old_cpg * sizeof(u_int16_t), "%jd");
1052         }
1053         CHK(cgp->cg_freeoff, !=,
1054             cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd");
1055         if (sblock.fs_contigsumsize == 0) {
1056                 CHK(cgp->cg_nextfreeoff, !=,
1057                     cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd");
1058         } else {
1059                 CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag,
1060                     "%jd");
1061                 CHK(cgp->cg_clustersumoff, !=,
1062                     roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT),
1063                     sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd");
1064                 CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff +
1065                     (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd");
1066                 CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff +
1067                     howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT),
1068                     "%jd");
1069         }
1070         if (error == 0)
1071                 return (1);
1072         if (prevfailcg == cg)
1073                 return (0);
1074         prevfailcg = cg;
1075         pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg);
1076         printf("\n");
1077         return (0);
1078 }
1079
1080 void
1081 rebuild_cg(int cg, struct bufarea *cgbp)
1082 {
1083         struct cg *cgp = cgbp->b_un.b_cg;
1084         long start;
1085
1086         /*
1087          * Zero out the cylinder group and then initialize critical fields.
1088          * Bit maps and summaries will be recalculated by later passes.
1089          */
1090         memset(cgp, 0, (size_t)sblock.fs_cgsize);
1091         cgp->cg_magic = CG_MAGIC;
1092         cgp->cg_cgx = cg;
1093         cgp->cg_niblk = sblock.fs_ipg;
1094         cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock));
1095         if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size)
1096                 cgp->cg_ndblk = sblock.fs_fpg;
1097         else
1098                 cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg);
1099         start = sizeof(*cgp);
1100         if (sblock.fs_magic == FS_UFS2_MAGIC) {
1101                 cgp->cg_iusedoff = start;
1102         } else if (sblock.fs_magic == FS_UFS1_MAGIC) {
1103                 cgp->cg_niblk = 0;
1104                 cgp->cg_initediblk = 0;
1105                 cgp->cg_old_ncyl = sblock.fs_old_cpg;
1106                 cgp->cg_old_niblk = sblock.fs_ipg;
1107                 cgp->cg_old_btotoff = start;
1108                 cgp->cg_old_boff = cgp->cg_old_btotoff +
1109                     sblock.fs_old_cpg * sizeof(int32_t);
1110                 cgp->cg_iusedoff = cgp->cg_old_boff +
1111                     sblock.fs_old_cpg * sizeof(u_int16_t);
1112         }
1113         cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT);
1114         cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT);
1115         if (sblock.fs_contigsumsize > 0) {
1116                 cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag;
1117                 cgp->cg_clustersumoff =
1118                     roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t));
1119                 cgp->cg_clustersumoff -= sizeof(u_int32_t);
1120                 cgp->cg_clusteroff = cgp->cg_clustersumoff +
1121                     (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t);
1122                 cgp->cg_nextfreeoff = cgp->cg_clusteroff +
1123                     howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT);
1124         }
1125         cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize);
1126         cgdirty(cgbp);
1127 }
1128
1129 /*
1130  * allocate a data block with the specified number of fragments
1131  */
1132 ufs2_daddr_t
1133 allocblk(long startcg, long frags,
1134     ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
1135 {
1136         ufs2_daddr_t blkno, newblk;
1137
1138         if (sujrecovery && checkblkavail == std_checkblkavail) {
1139                 pfatal("allocblk: std_checkblkavail used for SUJ recovery\n");
1140                 return (0);
1141         }
1142         if (frags <= 0 || frags > sblock.fs_frag)
1143                 return (0);
1144         for (blkno = MAX(cgdata(&sblock, startcg), 0);
1145              blkno < maxfsblock - sblock.fs_frag;
1146              blkno += sblock.fs_frag) {
1147                 if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
1148                         continue;
1149                 if (newblk > 0)
1150                         return (newblk);
1151                 if (newblk < 0)
1152                         blkno = -newblk;
1153         }
1154         for (blkno = MAX(cgdata(&sblock, 0), 0);
1155              blkno < cgbase(&sblock, startcg) - sblock.fs_frag;
1156              blkno += sblock.fs_frag) {
1157                 if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
1158                         continue;
1159                 if (newblk > 0)
1160                         return (newblk);
1161                 if (newblk < 0)
1162                         blkno = -newblk;
1163         }
1164         return (0);
1165 }
1166
1167 ufs2_daddr_t
1168 std_checkblkavail(ufs2_daddr_t blkno, long frags)
1169 {
1170         struct bufarea *cgbp;
1171         struct cg *cgp;
1172         ufs2_daddr_t j, k, baseblk;
1173         long cg;
1174
1175         if ((u_int64_t)blkno > sblock.fs_size)
1176                 return (0);
1177         for (j = 0; j <= sblock.fs_frag - frags; j++) {
1178                 if (testbmap(blkno + j))
1179                         continue;
1180                 for (k = 1; k < frags; k++)
1181                         if (testbmap(blkno + j + k))
1182                                 break;
1183                 if (k < frags) {
1184                         j += k;
1185                         continue;
1186                 }
1187                 cg = dtog(&sblock, blkno + j);
1188                 cgbp = cglookup(cg);
1189                 cgp = cgbp->b_un.b_cg;
1190                 if (!check_cgmagic(cg, cgbp))
1191                         return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag));
1192                 baseblk = dtogd(&sblock, blkno + j);
1193                 for (k = 0; k < frags; k++) {
1194                         setbmap(blkno + j + k);
1195                         clrbit(cg_blksfree(cgp), baseblk + k);
1196                 }
1197                 n_blks += frags;
1198                 if (frags == sblock.fs_frag)
1199                         cgp->cg_cs.cs_nbfree--;
1200                 else
1201                         cgp->cg_cs.cs_nffree -= frags;
1202                 cgdirty(cgbp);
1203                 return (blkno + j);
1204         }
1205         return (0);
1206 }
1207
1208 /*
1209  * Check whether a file size is within the limits for the filesystem.
1210  * Return 1 when valid and 0 when too big.
1211  *
1212  * This should match the file size limit in ffs_mountfs().
1213  */
1214 int
1215 chkfilesize(mode_t mode, u_int64_t filesize)
1216 {
1217         u_int64_t kernmaxfilesize;
1218
1219         if (sblock.fs_magic == FS_UFS1_MAGIC)
1220                 kernmaxfilesize = (off_t)0x40000000 * sblock.fs_bsize - 1;
1221         else
1222                 kernmaxfilesize = sblock.fs_maxfilesize;
1223         if (filesize > kernmaxfilesize ||
1224             filesize > sblock.fs_maxfilesize ||
1225             (mode == IFDIR && filesize > MAXDIRSIZE)) {
1226                 if (debug)
1227                         printf("bad file size %ju:", (uintmax_t)filesize);
1228                 return (0);
1229         }
1230         return (1);
1231 }
1232
1233 /*
1234  * Slow down IO so as to leave some disk bandwidth for other processes
1235  */
1236 void
1237 slowio_start()
1238 {
1239
1240         /* Delay one in every 8 operations */
1241         slowio_pollcnt = (slowio_pollcnt + 1) & 7;
1242         if (slowio_pollcnt == 0) {
1243                 gettimeofday(&slowio_starttime, NULL);
1244         }
1245 }
1246
1247 void
1248 slowio_end()
1249 {
1250         struct timeval tv;
1251         int delay_usec;
1252
1253         if (slowio_pollcnt != 0)
1254                 return;
1255
1256         /* Update the slowdown interval. */
1257         gettimeofday(&tv, NULL);
1258         delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 +
1259             (tv.tv_usec - slowio_starttime.tv_usec);
1260         if (delay_usec < 64)
1261                 delay_usec = 64;
1262         if (delay_usec > 2500000)
1263                 delay_usec = 2500000;
1264         slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6;
1265         /* delay by 8 times the average IO delay */
1266         if (slowio_delay_usec > 64)
1267                 usleep(slowio_delay_usec * 8);
1268 }
1269
1270 /*
1271  * Find a pathname
1272  */
1273 void
1274 getpathname(char *namebuf, ino_t curdir, ino_t ino)
1275 {
1276         int len;
1277         char *cp;
1278         struct inode ip;
1279         struct inodesc idesc;
1280         static int busy = 0;
1281
1282         if (curdir == ino && ino == UFS_ROOTINO) {
1283                 (void)strcpy(namebuf, "/");
1284                 return;
1285         }
1286         if (busy || !INO_IS_DVALID(curdir)) {
1287                 (void)strcpy(namebuf, "?");
1288                 return;
1289         }
1290         busy = 1;
1291         memset(&idesc, 0, sizeof(struct inodesc));
1292         idesc.id_type = DATA;
1293         idesc.id_fix = IGNORE;
1294         cp = &namebuf[MAXPATHLEN - 1];
1295         *cp = '\0';
1296         if (curdir != ino) {
1297                 idesc.id_parent = curdir;
1298                 goto namelookup;
1299         }
1300         while (ino != UFS_ROOTINO) {
1301                 idesc.id_number = ino;
1302                 idesc.id_func = findino;
1303                 idesc.id_name = strdup("..");
1304                 ginode(ino, &ip);
1305                 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) {
1306                         irelse(&ip);
1307                         free(idesc.id_name);
1308                         break;
1309                 }
1310                 irelse(&ip);
1311                 free(idesc.id_name);
1312         namelookup:
1313                 idesc.id_number = idesc.id_parent;
1314                 idesc.id_parent = ino;
1315                 idesc.id_func = findname;
1316                 idesc.id_name = namebuf;
1317                 ginode(idesc.id_number, &ip);
1318                 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) {
1319                         irelse(&ip);
1320                         break;
1321                 }
1322                 irelse(&ip);
1323                 len = strlen(namebuf);
1324                 cp -= len;
1325                 memmove(cp, namebuf, (size_t)len);
1326                 *--cp = '/';
1327                 if (cp < &namebuf[UFS_MAXNAMLEN])
1328                         break;
1329                 ino = idesc.id_number;
1330         }
1331         busy = 0;
1332         if (ino != UFS_ROOTINO)
1333                 *--cp = '?';
1334         memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp));
1335 }
1336
1337 void
1338 catch(int sig __unused)
1339 {
1340
1341         ckfini(0);
1342         exit(12);
1343 }
1344
1345 /*
1346  * When preening, allow a single quit to signal
1347  * a special exit after file system checks complete
1348  * so that reboot sequence may be interrupted.
1349  */
1350 void
1351 catchquit(int sig __unused)
1352 {
1353         printf("returning to single-user after file system check\n");
1354         returntosingle = 1;
1355         (void)signal(SIGQUIT, SIG_DFL);
1356 }
1357
1358 /*
1359  * determine whether an inode should be fixed.
1360  */
1361 int
1362 dofix(struct inodesc *idesc, const char *msg)
1363 {
1364
1365         switch (idesc->id_fix) {
1366
1367         case DONTKNOW:
1368                 if (idesc->id_type == DATA)
1369                         direrror(idesc->id_number, msg);
1370                 else
1371                         pwarn("%s", msg);
1372                 if (preen) {
1373                         printf(" (SALVAGED)\n");
1374                         idesc->id_fix = FIX;
1375                         return (ALTERED);
1376                 }
1377                 if (reply("SALVAGE") == 0) {
1378                         idesc->id_fix = NOFIX;
1379                         return (0);
1380                 }
1381                 idesc->id_fix = FIX;
1382                 return (ALTERED);
1383
1384         case FIX:
1385                 return (ALTERED);
1386
1387         case NOFIX:
1388         case IGNORE:
1389                 return (0);
1390
1391         default:
1392                 errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix);
1393         }
1394         /* NOTREACHED */
1395         return (0);
1396 }
1397
1398 #include <stdarg.h>
1399
1400 /*
1401  * Print details about a buffer.
1402  */
1403 void
1404 prtbuf(struct bufarea *bp, const char *fmt, ...)
1405 {
1406         va_list ap;
1407         va_start(ap, fmt);
1408         if (preen)
1409                 (void)fprintf(stdout, "%s: ", cdevname);
1410         (void)vfprintf(stdout, fmt, ap);
1411         va_end(ap);
1412         printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, "
1413             "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno,
1414             bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean",
1415             (intmax_t) bp->b_index);
1416 }
1417
1418 /*
1419  * An unexpected inconsistency occurred.
1420  * Die if preening or file system is running with soft dependency protocol,
1421  * otherwise just print message and continue.
1422  */
1423 void
1424 pfatal(const char *fmt, ...)
1425 {
1426         va_list ap;
1427         va_start(ap, fmt);
1428         if (!preen) {
1429                 (void)vfprintf(stdout, fmt, ap);
1430                 va_end(ap);
1431                 if (usedsoftdep)
1432                         (void)fprintf(stdout,
1433                             "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n");
1434                 /*
1435                  * Force foreground fsck to clean up inconsistency.
1436                  */
1437                 if (bkgrdflag) {
1438                         cmd.value = FS_NEEDSFSCK;
1439                         cmd.size = 1;
1440                         if (sysctlbyname("vfs.ffs.setflags", 0, 0,
1441                             &cmd, sizeof cmd) == -1)
1442                                 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n");
1443                         fprintf(stdout, "CANNOT RUN IN BACKGROUND\n");
1444                         ckfini(0);
1445                         exit(EEXIT);
1446                 }
1447                 return;
1448         }
1449         if (cdevname == NULL)
1450                 cdevname = strdup("fsck");
1451         (void)fprintf(stdout, "%s: ", cdevname);
1452         (void)vfprintf(stdout, fmt, ap);
1453         (void)fprintf(stdout,
1454             "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n",
1455             cdevname, usedsoftdep ? " SOFT UPDATE " : " ");
1456         /*
1457          * Force foreground fsck to clean up inconsistency.
1458          */
1459         if (bkgrdflag) {
1460                 cmd.value = FS_NEEDSFSCK;
1461                 cmd.size = 1;
1462                 if (sysctlbyname("vfs.ffs.setflags", 0, 0,
1463                     &cmd, sizeof cmd) == -1)
1464                         pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n");
1465         }
1466         ckfini(0);
1467         exit(EEXIT);
1468 }
1469
1470 /*
1471  * Pwarn just prints a message when not preening or running soft dependency
1472  * protocol, or a warning (preceded by filename) when preening.
1473  */
1474 void
1475 pwarn(const char *fmt, ...)
1476 {
1477         va_list ap;
1478         va_start(ap, fmt);
1479         if (preen)
1480                 (void)fprintf(stdout, "%s: ", cdevname);
1481         (void)vfprintf(stdout, fmt, ap);
1482         va_end(ap);
1483 }
1484
1485 /*
1486  * Stub for routines from kernel.
1487  */
1488 void
1489 panic(const char *fmt, ...)
1490 {
1491         va_list ap;
1492         va_start(ap, fmt);
1493         pfatal("INTERNAL INCONSISTENCY:");
1494         (void)vfprintf(stdout, fmt, ap);
1495         va_end(ap);
1496         exit(EEXIT);
1497 }