sbin/fsck_ffs/inode.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-3-Clause
   3  *
   4  * Copyright (c) 1980, 1986, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  * 3. Neither the name of the University nor the names of its contributors
  16  *    may be used to endorse or promote products derived from this software
  17  *    without specific prior written permission.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  29  * SUCH DAMAGE.
  30  */
  31
  32 #if 0
  33 #ifndef lint
  34 static const char sccsid[] = "@(#)inode.c       8.8 (Berkeley) 4/28/95";
  35 #endif /* not lint */
  36 #endif
  37 #include <sys/cdefs.h>
  38 __FBSDID("$FreeBSD$");
  39
  40 #include <sys/param.h>
  41 #include <sys/stat.h>
  42 #include <sys/stdint.h>
  43 #include <sys/sysctl.h>
  44
  45 #include <ufs/ufs/dinode.h>
  46 #include <ufs/ufs/dir.h>
  47 #include <ufs/ffs/fs.h>
  48
  49 #include <err.h>
  50 #include <pwd.h>
  51 #include <string.h>
  52 #include <time.h>
  53 #include <libufs.h>
  54
  55 #include "fsck.h"
  56
  57 struct bufarea *icachebp;       /* inode cache buffer */
  58
  59 static int iblock(struct inodesc *, off_t isize, int type);
  60 static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t,
  61     struct bufarea **);
  62 static int snapclean(struct inodesc *idesc);
  63 static void chkcopyonwrite(struct fs *, ufs2_daddr_t,
  64     ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long));
  65
  66 int
  67 ckinode(union dinode *dp, struct inodesc *idesc)
  68 {
  69         off_t remsize, sizepb;
  70         int i, offset, ret;
  71         struct inode ip;
  72         union dinode dino;
  73         ufs2_daddr_t ndb;
  74         mode_t mode;
  75         char pathbuf[MAXPATHLEN + 1];
  76
  77         if (idesc->id_fix != IGNORE)
  78                 idesc->id_fix = DONTKNOW;
  79         idesc->id_dp = dp;
  80         idesc->id_lbn = -1;
  81         idesc->id_lballoc = -1;
  82         idesc->id_level = 0;
  83         idesc->id_entryno = 0;
  84         idesc->id_filesize = DIP(dp, di_size);
  85         mode = DIP(dp, di_mode) & IFMT;
  86         if (mode == IFBLK || mode == IFCHR || (mode == IFLNK &&
  87             DIP(dp, di_size) < (unsigned)sblock.fs_maxsymlinklen))
  88                 return (KEEPON);
  89         if (sblock.fs_magic == FS_UFS1_MAGIC)
  90                 dino.dp1 = dp->dp1;
  91         else
  92                 dino.dp2 = dp->dp2;
  93         if (DIP(&dino, di_size) < 0) {
  94                 pfatal("NEGATIVE INODE SIZE %jd\n", DIP(&dino, di_size));
  95                 return (STOP);
  96         }
  97         ndb = howmany(DIP(&dino, di_size), sblock.fs_bsize);
  98         for (i = 0; i < UFS_NDADDR; i++) {
  99                 idesc->id_lbn++;
 100                 if (--ndb == 0 &&
 101                     (offset = blkoff(&sblock, DIP(&dino, di_size))) != 0)
 102                         idesc->id_numfrags =
 103                                 numfrags(&sblock, fragroundup(&sblock, offset));
 104                 else
 105                         idesc->id_numfrags = sblock.fs_frag;
 106                 if (DIP(&dino, di_db[i]) == 0) {
 107                         if (idesc->id_type == DATA && ndb >= 0) {
 108                                 /* An empty block in a directory XXX */
 109                                 getpathname(pathbuf, idesc->id_number,
 110                                                 idesc->id_number);
 111                                 pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
 112                                         pathbuf);
 113                                 if (reply("ADJUST LENGTH") == 1) {
 114                                         ginode(idesc->id_number, &ip);
 115                                         DIP_SET(ip.i_dp, di_size,
 116                                             i * sblock.fs_bsize);
 117                                         printf(
 118                                             "YOU MUST RERUN FSCK AFTERWARDS\n");
 119                                         rerun = 1;
 120                                         inodirty(&ip);
 121                                         irelse(&ip);
 122                                 }
 123                                 return (STOP);
 124                         }
 125                         continue;
 126                 }
 127                 idesc->id_blkno = DIP(&dino, di_db[i]);
 128                 if (idesc->id_type != DATA)
 129                         ret = (*idesc->id_func)(idesc);
 130                 else
 131                         ret = dirscan(idesc);
 132                 if (ret & STOP)
 133                         return (ret);
 134         }
 135         idesc->id_numfrags = sblock.fs_frag;
 136         remsize = DIP(&dino, di_size) - sblock.fs_bsize * UFS_NDADDR;
 137         sizepb = sblock.fs_bsize;
 138         for (i = 0; i < UFS_NIADDR; i++) {
 139                 sizepb *= NINDIR(&sblock);
 140                 idesc->id_level = i + 1;
 141                 if (DIP(&dino, di_ib[i])) {
 142                         idesc->id_blkno = DIP(&dino, di_ib[i]);
 143                         ret = iblock(idesc, remsize, BT_LEVEL1 + i);
 144                         if (ret & STOP)
 145                                 return (ret);
 146                 } else if (remsize > 0) {
 147                         idesc->id_lbn += sizepb / sblock.fs_bsize;
 148                         if (idesc->id_type == DATA) {
 149                                 /* An empty block in a directory XXX */
 150                                 getpathname(pathbuf, idesc->id_number,
 151                                                 idesc->id_number);
 152                                 pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
 153                                         pathbuf);
 154                                 if (reply("ADJUST LENGTH") == 1) {
 155                                         ginode(idesc->id_number, &ip);
 156                                         DIP_SET(ip.i_dp, di_size,
 157                                             DIP(ip.i_dp, di_size) - remsize);
 158                                         remsize = 0;
 159                                         printf(
 160                                             "YOU MUST RERUN FSCK AFTERWARDS\n");
 161                                         rerun = 1;
 162                                         inodirty(&ip);
 163                                         irelse(&ip);
 164                                         break;
 165                                 }
 166                         }
 167                 }
 168                 remsize -= sizepb;
 169         }
 170         return (KEEPON);
 171 }
 172
 173 static int
 174 iblock(struct inodesc *idesc, off_t isize, int type)
 175 {
 176         struct inode ip;
 177         struct bufarea *bp;
 178         int i, n, (*func)(struct inodesc *), nif;
 179         off_t sizepb;
 180         char buf[BUFSIZ];
 181         char pathbuf[MAXPATHLEN + 1];
 182
 183         if (idesc->id_type != DATA) {
 184                 func = idesc->id_func;
 185                 if (((n = (*func)(idesc)) & KEEPON) == 0)
 186                         return (n);
 187         } else
 188                 func = dirscan;
 189         bp = getdatablk(idesc->id_blkno, sblock.fs_bsize, type);
 190         if (bp->b_errs != 0) {
 191                 brelse(bp);
 192                 return (SKIP);
 193         }
 194         idesc->id_bp = bp;
 195         idesc->id_level--;
 196         for (sizepb = sblock.fs_bsize, i = 0; i < idesc->id_level; i++)
 197                 sizepb *= NINDIR(&sblock);
 198         if (howmany(isize, sizepb) > NINDIR(&sblock))
 199                 nif = NINDIR(&sblock);
 200         else
 201                 nif = howmany(isize, sizepb);
 202         if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) {
 203                 for (i = nif; i < NINDIR(&sblock); i++) {
 204                         if (IBLK(bp, i) == 0)
 205                                 continue;
 206                         (void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
 207                             (u_long)idesc->id_number);
 208                         if (preen) {
 209                                 pfatal("%s", buf);
 210                         } else if (dofix(idesc, buf)) {
 211                                 IBLK_SET(bp, i, 0);
 212                                 dirty(bp);
 213                         }
 214                 }
 215                 flush(fswritefd, bp);
 216         }
 217         for (i = 0; i < nif; i++) {
 218                 if (IBLK(bp, i)) {
 219                         idesc->id_blkno = IBLK(bp, i);
 220                         bp->b_index = i;
 221                         if (idesc->id_level == 0) {
 222                                 idesc->id_lbn++;
 223                                 n = (*func)(idesc);
 224                         } else {
 225                                 n = iblock(idesc, isize, type - 1);
 226                                 idesc->id_level++;
 227                         }
 228                         if (n & STOP) {
 229                                 brelse(bp);
 230                                 return (n);
 231                         }
 232                 } else {
 233                         idesc->id_lbn += sizepb / sblock.fs_bsize;
 234                         if (idesc->id_type == DATA && isize > 0) {
 235                                 /* An empty block in a directory XXX */
 236                                 getpathname(pathbuf, idesc->id_number,
 237                                                 idesc->id_number);
 238                                 pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
 239                                         pathbuf);
 240                                 if (reply("ADJUST LENGTH") == 1) {
 241                                         ginode(idesc->id_number, &ip);
 242                                         DIP_SET(ip.i_dp, di_size,
 243                                             DIP(ip.i_dp, di_size) - isize);
 244                                         isize = 0;
 245                                         printf(
 246                                             "YOU MUST RERUN FSCK AFTERWARDS\n");
 247                                         rerun = 1;
 248                                         inodirty(&ip);
 249                                         brelse(bp);
 250                                         return(STOP);
 251                                 }
 252                         }
 253                 }
 254                 isize -= sizepb;
 255         }
 256         brelse(bp);
 257         return (KEEPON);
 258 }
 259
 260 /*
 261  * Finds the disk block address at the specified lbn within the inode
 262  * specified by dp.  This follows the whole tree and honors di_size and
 263  * di_extsize so it is a true test of reachability.  The lbn may be
 264  * negative if an extattr or indirect block is requested.
 265  */
 266 ufs2_daddr_t
 267 ino_blkatoff(union dinode *dp, ino_t ino, ufs_lbn_t lbn, int *frags,
 268     struct bufarea **bpp)
 269 {
 270         ufs_lbn_t tmpval;
 271         ufs_lbn_t cur;
 272         ufs_lbn_t next;
 273         int i;
 274
 275         *frags = 0;
 276         if (bpp != NULL)
 277                 *bpp = NULL;
 278         /*
 279          * Handle extattr blocks first.
 280          */
 281         if (lbn < 0 && lbn >= -UFS_NXADDR) {
 282                 lbn = -1 - lbn;
 283                 if (lbn > lblkno(&sblock, dp->dp2.di_extsize - 1))
 284                         return (0);
 285                 *frags = numfrags(&sblock,
 286                     sblksize(&sblock, dp->dp2.di_extsize, lbn));
 287                 return (dp->dp2.di_extb[lbn]);
 288         }
 289         /*
 290          * Now direct and indirect.
 291          */
 292         if (DIP(dp, di_mode) == IFLNK &&
 293             DIP(dp, di_size) < sblock.fs_maxsymlinklen)
 294                 return (0);
 295         if (lbn >= 0 && lbn < UFS_NDADDR) {
 296                 *frags = numfrags(&sblock,
 297                     sblksize(&sblock, DIP(dp, di_size), lbn));
 298                 return (DIP(dp, di_db[lbn]));
 299         }
 300         *frags = sblock.fs_frag;
 301
 302         for (i = 0, tmpval = NINDIR(&sblock), cur = UFS_NDADDR; i < UFS_NIADDR;
 303             i++, tmpval *= NINDIR(&sblock), cur = next) {
 304                 next = cur + tmpval;
 305                 if (lbn == -cur - i)
 306                         return (DIP(dp, di_ib[i]));
 307                 /*
 308                  * Determine whether the lbn in question is within this tree.
 309                  */
 310                 if (lbn < 0 && -lbn >= next)
 311                         continue;
 312                 if (lbn > 0 && lbn >= next)
 313                         continue;
 314                 if (DIP(dp, di_ib[i]) == 0)
 315                         return (0);
 316                 return (indir_blkatoff(DIP(dp, di_ib[i]), ino, -cur - i, lbn,
 317                     bpp));
 318         }
 319         pfatal("lbn %jd not in ino %ju\n", lbn, (uintmax_t)ino);
 320         return (0);
 321 }
 322
 323 /*
 324  * Fetch an indirect block to find the block at a given lbn.  The lbn
 325  * may be negative to fetch a specific indirect block pointer or positive
 326  * to fetch a specific block.
 327  */
 328 static ufs2_daddr_t
 329 indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn,
 330     struct bufarea **bpp)
 331 {
 332         struct bufarea *bp;
 333         ufs_lbn_t lbnadd;
 334         ufs_lbn_t base;
 335         int i, level;
 336
 337         level = lbn_level(cur);
 338         if (level == -1)
 339                 pfatal("Invalid indir lbn %jd in ino %ju\n",
 340                     lbn, (uintmax_t)ino);
 341         if (level == 0 && lbn < 0)
 342                 pfatal("Invalid lbn %jd in ino %ju\n",
 343                     lbn, (uintmax_t)ino);
 344         lbnadd = 1;
 345         base = -(cur + level);
 346         for (i = level; i > 0; i--)
 347                 lbnadd *= NINDIR(&sblock);
 348         if (lbn > 0)
 349                 i = (lbn - base) / lbnadd;
 350         else
 351                 i = (-lbn - base) / lbnadd;
 352         if (i < 0 || i >= NINDIR(&sblock)) {
 353                 pfatal("Invalid indirect index %d produced by lbn %jd "
 354                     "in ino %ju\n", i, lbn, (uintmax_t)ino);
 355                 return (0);
 356         }
 357         if (level == 0)
 358                 cur = base + (i * lbnadd);
 359         else
 360                 cur = -(base + (i * lbnadd)) - (level - 1);
 361         bp = getdatablk(blk, sblock.fs_bsize, BT_LEVEL1 + level);
 362         if (bp->b_errs != 0)
 363                 return (0);
 364         blk = IBLK(bp, i);
 365         bp->b_index = i;
 366         if (cur == lbn || blk == 0) {
 367                 if (bpp != NULL)
 368                         *bpp = bp;
 369                 else
 370                         brelse(bp);
 371                 return (blk);
 372         }
 373         brelse(bp);
 374         if (level == 0)
 375                 pfatal("Invalid lbn %jd at level 0 for ino %ju\n", lbn,
 376                     (uintmax_t)ino);
 377         return (indir_blkatoff(blk, ino, cur, lbn, bpp));
 378 }
 379
 380 /*
 381  * Check that a block in a legal block number.
 382  * Return 0 if in range, 1 if out of range.
 383  */
 384 int
 385 chkrange(ufs2_daddr_t blk, int cnt)
 386 {
 387         int c;
 388
 389         if (cnt <= 0 || blk <= 0 || blk >= maxfsblock ||
 390             cnt > maxfsblock - blk) {
 391                 if (debug)
 392                         printf("out of range: blk %ld, offset %i, size %d\n",
 393                             (long)blk, (int)fragnum(&sblock, blk), cnt);
 394                 return (1);
 395         }
 396         if (cnt > sblock.fs_frag ||
 397             fragnum(&sblock, blk) + cnt > sblock.fs_frag) {
 398                 if (debug)
 399                         printf("bad size: blk %ld, offset %i, size %d\n",
 400                             (long)blk, (int)fragnum(&sblock, blk), cnt);
 401                 return (1);
 402         }
 403         c = dtog(&sblock, blk);
 404         if (blk < cgdmin(&sblock, c)) {
 405                 if ((blk + cnt) > cgsblock(&sblock, c)) {
 406                         if (debug) {
 407                                 printf("blk %ld < cgdmin %ld;",
 408                                     (long)blk, (long)cgdmin(&sblock, c));
 409                                 printf(" blk + cnt %ld > cgsbase %ld\n",
 410                                     (long)(blk + cnt),
 411                                     (long)cgsblock(&sblock, c));
 412                         }
 413                         return (1);
 414                 }
 415         } else {
 416                 if ((blk + cnt) > cgbase(&sblock, c+1)) {
 417                         if (debug)  {
 418                                 printf("blk %ld >= cgdmin %ld;",
 419                                     (long)blk, (long)cgdmin(&sblock, c));
 420                                 printf(" blk + cnt %ld > sblock.fs_fpg %ld\n",
 421                                     (long)(blk + cnt), (long)sblock.fs_fpg);
 422                         }
 423                         return (1);
 424                 }
 425         }
 426         return (0);
 427 }
 428
 429 /*
 430  * General purpose interface for reading inodes.
 431  *
 432  * firstinum and lastinum track contents of getnextino() cache (below).
 433  */
 434 static ino_t firstinum, lastinum;
 435 static struct bufarea inobuf;
 436
 437 void
 438 ginode(ino_t inumber, struct inode *ip)
 439 {
 440         ufs2_daddr_t iblk;
 441         struct ufs2_dinode *dp;
 442
 443         if (inumber < UFS_ROOTINO || inumber >= maxino)
 444                 errx(EEXIT, "bad inode number %ju to ginode",
 445                     (uintmax_t)inumber);
 446         ip->i_number = inumber;
 447         if (inumber >= firstinum && inumber < lastinum) {
 448                 /* contents in getnextino() cache */
 449                 ip->i_bp = &inobuf;
 450                 inobuf.b_refcnt++;
 451                 inobuf.b_index = firstinum;
 452         } else if (icachebp != NULL &&
 453             inumber >= icachebp->b_index &&
 454             inumber < icachebp->b_index + INOPB(&sblock)) {
 455                 /* take an additional reference for the returned inode */
 456                 icachebp->b_refcnt++;
 457                 ip->i_bp = icachebp;
 458         } else {
 459                 iblk = ino_to_fsba(&sblock, inumber);
 460                 /* release our cache-hold reference on old icachebp */
 461                 if (icachebp != NULL)
 462                         brelse(icachebp);
 463                 icachebp = getdatablk(iblk, sblock.fs_bsize, BT_INODES);
 464                 if (icachebp->b_errs != 0) {
 465                         icachebp = NULL;
 466                         ip->i_bp = NULL;
 467                         ip->i_dp = &zino;
 468                         return;
 469                 }
 470                 /* take a cache-hold reference on new icachebp */
 471                 icachebp->b_refcnt++;
 472                 icachebp->b_index = rounddown(inumber, INOPB(&sblock));
 473                 ip->i_bp = icachebp;
 474         }
 475         if (sblock.fs_magic == FS_UFS1_MAGIC) {
 476                 ip->i_dp = (union dinode *)
 477                     &ip->i_bp->b_un.b_dinode1[inumber - ip->i_bp->b_index];
 478                 return;
 479         }
 480         ip->i_dp = (union dinode *)
 481             &ip->i_bp->b_un.b_dinode2[inumber - ip->i_bp->b_index];
 482         dp = (struct ufs2_dinode *)ip->i_dp;
 483         /* Do not check hash of inodes being created */
 484         if (dp->di_mode != 0 && ffs_verify_dinode_ckhash(&sblock, dp)) {
 485                 pwarn("INODE CHECK-HASH FAILED");
 486                 prtinode(ip);
 487                 if (preen || reply("FIX") != 0) {
 488                         if (preen)
 489                                 printf(" (FIXED)\n");
 490                         ffs_update_dinode_ckhash(&sblock, dp);
 491                         inodirty(ip);
 492                 }
 493         }
 494 }
 495
 496 /*
 497  * Release a held inode.
 498  */
 499 void
 500 irelse(struct inode *ip)
 501 {
 502
 503         /* Check for failed inode read */
 504         if (ip->i_bp == NULL)
 505                 return;
 506         if (debug && sblock.fs_magic == FS_UFS2_MAGIC &&
 507             ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp)) {
 508                 pwarn("irelse: releasing inode with bad check-hash");
 509                 prtinode(ip);
 510         }
 511         if (ip->i_bp->b_refcnt <= 0)
 512                 pfatal("irelse: releasing unreferenced ino %ju\n",
 513                     (uintmax_t) ip->i_number);
 514         brelse(ip->i_bp);
 515 }
 516
 517 /*
 518  * Special purpose version of ginode used to optimize first pass
 519  * over all the inodes in numerical order.
 520  */
 521 static ino_t nextinum, lastvalidinum;
 522 static long readcount, readpercg, fullcnt, inobufsize, partialcnt, partialsize;
 523
 524 union dinode *
 525 getnextinode(ino_t inumber, int rebuiltcg)
 526 {
 527         int j;
 528         long size;
 529         mode_t mode;
 530         ufs2_daddr_t ndb, blk;
 531         union dinode *dp;
 532         struct inode ip;
 533         static caddr_t nextinop;
 534
 535         if (inumber != nextinum++ || inumber > lastvalidinum)
 536                 errx(EEXIT, "bad inode number %ju to nextinode",
 537                     (uintmax_t)inumber);
 538         if (inumber >= lastinum) {
 539                 readcount++;
 540                 firstinum = lastinum;
 541                 blk = ino_to_fsba(&sblock, lastinum);
 542                 if (readcount % readpercg == 0) {
 543                         size = partialsize;
 544                         lastinum += partialcnt;
 545                 } else {
 546                         size = inobufsize;
 547                         lastinum += fullcnt;
 548                 }
 549                 /*
 550                  * Flush old contents in case they have been updated.
 551                  * If getblk encounters an error, it will already have zeroed
 552                  * out the buffer, so we do not need to do so here.
 553                  */
 554                 if (inobuf.b_refcnt != 0)
 555                         pfatal("Non-zero getnextinode() ref count %d\n",
 556                             inobuf.b_refcnt);
 557                 flush(fswritefd, &inobuf);
 558                 getblk(&inobuf, blk, size);
 559                 nextinop = inobuf.b_un.b_buf;
 560         }
 561         dp = (union dinode *)nextinop;
 562         if (sblock.fs_magic == FS_UFS1_MAGIC)
 563                 nextinop += sizeof(struct ufs1_dinode);
 564         else
 565                 nextinop += sizeof(struct ufs2_dinode);
 566         if ((ckhashadd & CK_INODE) != 0) {
 567                 ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp);
 568                 dirty(&inobuf);
 569         }
 570         if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) {
 571                 pwarn("INODE CHECK-HASH FAILED");
 572                 ip.i_bp = NULL;
 573                 ip.i_dp = dp;
 574                 ip.i_number = inumber;
 575                 prtinode(&ip);
 576                 if (preen || reply("FIX") != 0) {
 577                         if (preen)
 578                                 printf(" (FIXED)\n");
 579                         ffs_update_dinode_ckhash(&sblock,
 580                             (struct ufs2_dinode *)dp);
 581                         dirty(&inobuf);
 582                 }
 583         }
 584         if (rebuiltcg && (char *)dp == inobuf.b_un.b_buf) {
 585                 /*
 586                  * Try to determine if we have reached the end of the
 587                  * allocated inodes.
 588                  */
 589                 mode = DIP(dp, di_mode) & IFMT;
 590                 if (mode == 0) {
 591                         if (memcmp(dp->dp2.di_db, zino.dp2.di_db,
 592                                 UFS_NDADDR * sizeof(ufs2_daddr_t)) ||
 593                               memcmp(dp->dp2.di_ib, zino.dp2.di_ib,
 594                                 UFS_NIADDR * sizeof(ufs2_daddr_t)) ||
 595                               dp->dp2.di_mode || dp->dp2.di_size)
 596                                 return (NULL);
 597                         return (dp);
 598                 }
 599                 if (!ftypeok(dp))
 600                         return (NULL);
 601                 ndb = howmany(DIP(dp, di_size), sblock.fs_bsize);
 602                 if (ndb < 0)
 603                         return (NULL);
 604                 if (mode == IFBLK || mode == IFCHR)
 605                         ndb++;
 606                 if (mode == IFLNK) {
 607                         /*
 608                          * Fake ndb value so direct/indirect block checks below
 609                          * will detect any garbage after symlink string.
 610                          */
 611                         if (DIP(dp, di_size) < (off_t)sblock.fs_maxsymlinklen) {
 612                                 ndb = howmany(DIP(dp, di_size),
 613                                     sizeof(ufs2_daddr_t));
 614                                 if (ndb > UFS_NDADDR) {
 615                                         j = ndb - UFS_NDADDR;
 616                                         for (ndb = 1; j > 1; j--)
 617                                                 ndb *= NINDIR(&sblock);
 618                                         ndb += UFS_NDADDR;
 619                                 }
 620                         }
 621                 }
 622                 for (j = ndb; ndb < UFS_NDADDR && j < UFS_NDADDR; j++)
 623                         if (DIP(dp, di_db[j]) != 0)
 624                                 return (NULL);
 625                 for (j = 0, ndb -= UFS_NDADDR; ndb > 0; j++)
 626                         ndb /= NINDIR(&sblock);
 627                 for (; j < UFS_NIADDR; j++)
 628                         if (DIP(dp, di_ib[j]) != 0)
 629                                 return (NULL);
 630         }
 631         return (dp);
 632 }
 633
 634 void
 635 setinodebuf(int cg, ino_t inosused)
 636 {
 637         ino_t inum;
 638
 639         inum = cg * sblock.fs_ipg;
 640         lastvalidinum = inum + inosused - 1;
 641         nextinum = inum;
 642         lastinum = inum;
 643         readcount = 0;
 644         /* Flush old contents in case they have been updated */
 645         flush(fswritefd, &inobuf);
 646         inobuf.b_bno = 0;
 647         if (inobuf.b_un.b_buf == NULL) {
 648                 inobufsize = blkroundup(&sblock,
 649                     MAX(INOBUFSIZE, sblock.fs_bsize));
 650                 initbarea(&inobuf, BT_INODES);
 651                 if ((inobuf.b_un.b_buf = Malloc((unsigned)inobufsize)) == NULL)
 652                         errx(EEXIT, "cannot allocate space for inode buffer");
 653         }
 654         fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ?
 655             sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode));
 656         readpercg = inosused / fullcnt;
 657         partialcnt = inosused % fullcnt;
 658         partialsize = fragroundup(&sblock,
 659             partialcnt * ((sblock.fs_magic == FS_UFS1_MAGIC) ?
 660             sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
 661         if (partialcnt != 0) {
 662                 readpercg++;
 663         } else {
 664                 partialcnt = fullcnt;
 665                 partialsize = inobufsize;
 666         }
 667 }
 668
 669 int
 670 freeblock(struct inodesc *idesc)
 671 {
 672         struct dups *dlp;
 673         struct bufarea *cgbp;
 674         struct cg *cgp;
 675         ufs2_daddr_t blkno;
 676         long size, nfrags;
 677
 678         blkno = idesc->id_blkno;
 679         if (idesc->id_type == SNAP) {
 680                 pfatal("clearing a snapshot dinode\n");
 681                 return (STOP);
 682         }
 683         size = lfragtosize(&sblock, idesc->id_numfrags);
 684         if (snapblkfree(&sblock, blkno, size, idesc->id_number,
 685             std_checkblkavail))
 686                 return (KEEPON);
 687         for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
 688                 if (chkrange(blkno, 1)) {
 689                         return (SKIP);
 690                 } else if (testbmap(blkno)) {
 691                         for (dlp = duplist; dlp; dlp = dlp->next) {
 692                                 if (dlp->dup != blkno)
 693                                         continue;
 694                                 dlp->dup = duplist->dup;
 695                                 dlp = duplist;
 696                                 duplist = duplist->next;
 697                                 free((char *)dlp);
 698                                 break;
 699                         }
 700                         if (dlp == NULL) {
 701                                 clrbmap(blkno);
 702                                 n_blks--;
 703                         }
 704                 }
 705         }
 706         /*
 707          * If all successfully returned, account for them.
 708          */
 709         if (nfrags == 0) {
 710                 cgbp = cglookup(dtog(&sblock, idesc->id_blkno));
 711                 cgp = cgbp->b_un.b_cg;
 712                 if (idesc->id_numfrags == sblock.fs_frag)
 713                         cgp->cg_cs.cs_nbfree++;
 714                 else
 715                         cgp->cg_cs.cs_nffree += idesc->id_numfrags;
 716                 cgdirty(cgbp);
 717         }
 718         return (KEEPON);
 719 }
 720
 721 /*
 722  * Prepare a snapshot file for being removed.
 723  */
 724 void
 725 snapremove(ino_t inum)
 726 {
 727         struct inodesc idesc;
 728         struct inode ip;
 729         int i;
 730
 731         for (i = 0; i < snapcnt; i++)
 732                 if (snaplist[i].i_number == inum)
 733                         break;
 734         if (i == snapcnt)
 735                 ginode(inum, &ip);
 736         else
 737                 ip = snaplist[i];
 738         if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) {
 739                 printf("snapremove: inode %jd is not a snapshot\n",
 740                     (intmax_t)inum);
 741                 if (i == snapcnt)
 742                         irelse(&ip);
 743                 return;
 744         }
 745         if (debug)
 746                 printf("snapremove: remove %sactive snapshot %jd\n",
 747                     i == snapcnt ? "in" : "", (intmax_t)inum);
 748         /*
 749          * If on active snapshot list, remove it.
 750          */
 751         if (i < snapcnt) {
 752                 for (i++; i < FSMAXSNAP; i++) {
 753                         if (sblock.fs_snapinum[i] == 0)
 754                                 break;
 755                         snaplist[i - 1] = snaplist[i];
 756                         sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
 757                 }
 758                 sblock.fs_snapinum[i - 1] = 0;
 759                 bzero(&snaplist[i - 1], sizeof(struct inode));
 760                 snapcnt--;
 761         }
 762         memset(&idesc, 0, sizeof(struct inodesc));
 763         idesc.id_type = SNAP;
 764         idesc.id_func = snapclean;
 765         idesc.id_number = inum;
 766         (void)ckinode(ip.i_dp, &idesc);
 767         DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT);
 768         inodirty(&ip);
 769         irelse(&ip);
 770 }
 771
 772 static int
 773 snapclean(struct inodesc *idesc)
 774 {
 775         ufs2_daddr_t blkno;
 776         struct bufarea *bp;
 777         union dinode *dp;
 778
 779         blkno = idesc->id_blkno;
 780         if (blkno == 0)
 781                 return (KEEPON);
 782
 783         dp = idesc->id_dp;
 784         if (blkno == BLK_NOCOPY || blkno == BLK_SNAP) {
 785                 if (idesc->id_lbn < UFS_NDADDR) {
 786                         DIP_SET(dp, di_db[idesc->id_lbn], 0);
 787                 } else {
 788                         bp = idesc->id_bp;
 789                         IBLK_SET(bp, bp->b_index, 0);
 790                         dirty(bp);
 791                 }
 792         }
 793         return (KEEPON);
 794 }
 795
 796 /*
 797  * Notification that a block is being freed. Return zero if the free
 798  * should be allowed to proceed. Return non-zero if the snapshot file
 799  * wants to claim the block. The block will be claimed if it is an
 800  * uncopied part of one of the snapshots. It will be freed if it is
 801  * either a BLK_NOCOPY or has already been copied in all of the snapshots.
 802  * If a fragment is being freed, then all snapshots that care about
 803  * it must make a copy since a snapshot file can only claim full sized
 804  * blocks. Note that if more than one snapshot file maps the block,
 805  * we can pick one at random to claim it. Since none of the snapshots
 806  * can change, we are assurred that they will all see the same unmodified
 807  * image. When deleting a snapshot file (see ino_trunc above), we
 808  * must push any of these claimed blocks to one of the other snapshots
 809  * that maps it. These claimed blocks are easily identified as they will
 810  * have a block number equal to their logical block number within the
 811  * snapshot. A copied block can never have this property because they
 812  * must always have been allocated from a BLK_NOCOPY location.
 813  */
 814 int
 815 snapblkfree(struct fs *fs, ufs2_daddr_t bno, long size, ino_t inum,
 816         ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
 817 {
 818         union dinode *dp;
 819         struct inode ip;
 820         struct bufarea *snapbp;
 821         ufs_lbn_t lbn;
 822         ufs2_daddr_t blkno, relblkno;
 823         int i, frags, claimedblk, copydone;
 824
 825         /* If no snapshots, nothing to do */
 826         if (snapcnt == 0)
 827                 return (0);
 828         if (debug)
 829                 printf("snapblkfree: in ino %jd free blkno %jd, size %jd\n",
 830                     (intmax_t)inum, (intmax_t)bno, (intmax_t)size);
 831         relblkno = blknum(fs, bno);
 832         lbn = fragstoblks(fs, relblkno);
 833         /* Direct blocks are always pre-copied */
 834         if (lbn < UFS_NDADDR)
 835                 return (0);
 836         copydone = 0;
 837         claimedblk = 0;
 838         for (i = 0; i < snapcnt; i++) {
 839                 /*
 840                  * Lookup block being freed.
 841                  */
 842                 ip = snaplist[i];
 843                 dp = ip.i_dp;
 844                 blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number,
 845                     lbn, &frags, &snapbp);
 846                 /*
 847                  * Check to see if block needs to be copied.
 848                  */
 849                 if (blkno == 0) {
 850                         /*
 851                          * A block that we map is being freed. If it has not
 852                          * been claimed yet, we will claim or copy it (below).
 853                          */
 854                         claimedblk = 1;
 855                 } else if (blkno == BLK_SNAP) {
 856                         /*
 857                          * No previous snapshot claimed the block,
 858                          * so it will be freed and become a BLK_NOCOPY
 859                          * (don't care) for us.
 860                          */
 861                         if (claimedblk)
 862                                 pfatal("snapblkfree: inconsistent block type");
 863                         IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY);
 864                         dirty(snapbp);
 865                         brelse(snapbp);
 866                         continue;
 867                 } else /* BLK_NOCOPY or default */ {
 868                         /*
 869                          * If the snapshot has already copied the block
 870                          * (default), or does not care about the block,
 871                          * it is not needed.
 872                          */
 873                         brelse(snapbp);
 874                         continue;
 875                 }
 876                 /*
 877                  * If this is a full size block, we will just grab it
 878                  * and assign it to the snapshot inode. Otherwise we
 879                  * will proceed to copy it. See explanation for this
 880                  * routine as to why only a single snapshot needs to
 881                  * claim this block.
 882                  */
 883                 if (size == fs->fs_bsize) {
 884                         if (debug)
 885                                 printf("Grabonremove snapshot %ju lbn %jd "
 886                                     "from inum %ju\n", (intmax_t)ip.i_number,
 887                                     (intmax_t)lbn, (uintmax_t)inum);
 888                         IBLK_SET(snapbp, snapbp->b_index, relblkno);
 889                         dirty(snapbp);
 890                         brelse(snapbp);
 891                         DIP_SET(dp, di_blocks,
 892                             DIP(dp, di_blocks) + btodb(size));
 893                         inodirty(&ip);
 894                         return (1);
 895                 }
 896
 897                 /* First time through, read the contents of the old block. */
 898                 if (copydone == 0) {
 899                         copydone = 1;
 900                         if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno),
 901                             fs->fs_bsize) != 0) {
 902                                 pfatal("Could not read snapshot %ju block "
 903                                     "%jd\n", (intmax_t)ip.i_number,
 904                                     (intmax_t)relblkno);
 905                                 continue;
 906                         }
 907                 }
 908                 /*
 909                  * This allocation will never require any additional
 910                  * allocations for the snapshot inode.
 911                  */
 912                 blkno = allocblk(dtog(fs, relblkno), fs->fs_frag,
 913                     checkblkavail);
 914                 if (blkno == 0) {
 915                         pfatal("Could not allocate block for snapshot %ju\n",
 916                             (intmax_t)ip.i_number);
 917                         continue;
 918                 }
 919                 if (debug)
 920                         printf("Copyonremove: snapino %jd lbn %jd for inum %ju "
 921                             "size %ld new blkno %jd\n", (intmax_t)ip.i_number,
 922                             (intmax_t)lbn, (uintmax_t)inum, size,
 923                             (intmax_t)blkno);
 924                 blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
 925                 IBLK_SET(snapbp, snapbp->b_index, blkno);
 926                 dirty(snapbp);
 927                 brelse(snapbp);
 928                 DIP_SET(dp, di_blocks,
 929                     DIP(dp, di_blocks) + btodb(fs->fs_bsize));
 930                 inodirty(&ip);
 931         }
 932         return (0);
 933 }
 934
 935 /*
 936  * Notification that a block is being written. Return if the block
 937  * is part of a snapshot as snapshots never track other snapshots.
 938  * The block will be copied in all of the snapshots that are tracking
 939  * it and have not yet copied it. Some buffers may hold more than one
 940  * block. Here we need to check each block in the buffer.
 941  */
 942 void
 943 copyonwrite(struct fs *fs, struct bufarea *bp,
 944         ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
 945 {
 946         ufs2_daddr_t copyblkno;
 947         long i, numblks;
 948
 949         /* If no snapshots, nothing to do. */
 950         if (snapcnt == 0)
 951                 return;
 952         numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize;
 953         if (debug)
 954                 prtbuf(bp, "copyonwrite: checking %jd block%s in buffer",
 955                     (intmax_t)numblks, numblks > 1 ? "s" : "");
 956         copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno));
 957         for (i = 0; i < numblks; i++) {
 958                 chkcopyonwrite(fs, copyblkno, checkblkavail);
 959                 copyblkno += fs->fs_frag;
 960         }
 961 }
 962
 963 static void
 964 chkcopyonwrite(struct fs *fs, ufs2_daddr_t copyblkno,
 965         ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
 966 {
 967         struct inode ip;
 968         union dinode *dp;
 969         struct bufarea *snapbp;
 970         ufs2_daddr_t blkno;
 971         int i, frags, copydone;
 972         ufs_lbn_t lbn;
 973
 974         lbn = fragstoblks(fs, copyblkno);
 975         /* Direct blocks are always pre-copied */
 976         if (lbn < UFS_NDADDR)
 977                 return;
 978         copydone = 0;
 979         for (i = 0; i < snapcnt; i++) {
 980                 /*
 981                  * Lookup block being freed.
 982                  */
 983                 ip = snaplist[i];
 984                 dp = ip.i_dp;
 985                 blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp);
 986                 /*
 987                  * Check to see if block needs to be copied.
 988                  */
 989                 if (blkno != 0) {
 990                         /*
 991                          * A block that we have already copied or don't track.
 992                          */
 993                         brelse(snapbp);
 994                         continue;
 995                 }
 996                 /* First time through, read the contents of the old block. */
 997                 if (copydone == 0) {
 998                         copydone = 1;
 999                         if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno),
1000                             fs->fs_bsize) != 0) {
1001                                 pfatal("Could not read snapshot %ju block "
1002                                     "%jd\n", (intmax_t)ip.i_number,
1003                                     (intmax_t)copyblkno);
1004                                 continue;
1005                         }
1006                 }
1007                 /*
1008                  * This allocation will never require any additional
1009                  * allocations for the snapshot inode.
1010                  */
1011                 if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag,
1012                     checkblkavail)) == 0) {
1013                         pfatal("Could not allocate block for snapshot %ju\n",
1014                             (intmax_t)ip.i_number);
1015                         continue;
1016                 }
1017                 if (debug)
1018                         prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using "
1019                             "blkno %ju setting in buffer",
1020                             (intmax_t)ip.i_number, (intmax_t)lbn,
1021                             (intmax_t)blkno);
1022                 blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
1023                 IBLK_SET(snapbp, snapbp->b_index, blkno);
1024                 dirty(snapbp);
1025                 brelse(snapbp);
1026                 DIP_SET(dp, di_blocks,
1027                     DIP(dp, di_blocks) + btodb(fs->fs_bsize));
1028                 inodirty(&ip);
1029         }
1030         return;
1031 }
1032
1033 /*
1034  * Traverse an inode and check that its block count is correct
1035  * fixing it if necessary.
1036  */
1037 void
1038 check_blkcnt(struct inode *ip)
1039 {
1040         struct inodesc idesc;
1041         union dinode *dp;
1042         ufs2_daddr_t ndb;
1043         int j, ret, offset;
1044
1045         dp = ip->i_dp;
1046         memset(&idesc, 0, sizeof(struct inodesc));
1047         idesc.id_func = pass1check;
1048         idesc.id_number = ip->i_number;
1049         idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP;
1050         (void)ckinode(dp, &idesc);
1051         if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) {
1052                 ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize);
1053                 for (j = 0; j < UFS_NXADDR; j++) {
1054                         if (--ndb == 0 &&
1055                             (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0)
1056                                 idesc.id_numfrags = numfrags(&sblock,
1057                                     fragroundup(&sblock, offset));
1058                         else
1059                                 idesc.id_numfrags = sblock.fs_frag;
1060                         if (dp->dp2.di_extb[j] == 0)
1061                                 continue;
1062                         idesc.id_blkno = dp->dp2.di_extb[j];
1063                         ret = (*idesc.id_func)(&idesc);
1064                         if (ret & STOP)
1065                                 break;
1066                 }
1067         }
1068         idesc.id_entryno *= btodb(sblock.fs_fsize);
1069         if (DIP(dp, di_blocks) != idesc.id_entryno) {
1070                 if (!(sujrecovery && preen)) {
1071                         pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)",
1072                             (u_long)idesc.id_number,
1073                             (uintmax_t)DIP(dp, di_blocks),
1074                             (uintmax_t)idesc.id_entryno);
1075                         if (preen)
1076                                 printf(" (CORRECTED)\n");
1077                         else if (reply("CORRECT") == 0)
1078                                 return;
1079                 }
1080                 if (bkgrdflag == 0) {
1081                         DIP_SET(dp, di_blocks, idesc.id_entryno);
1082                         inodirty(ip);
1083                 } else {
1084                         cmd.value = idesc.id_number;
1085                         cmd.size = idesc.id_entryno - DIP(dp, di_blocks);
1086                         if (debug)
1087                                 printf("adjblkcnt ino %ju amount %lld\n",
1088                                     (uintmax_t)cmd.value, (long long)cmd.size);
1089                         if (sysctl(adjblkcnt, MIBSIZE, 0, 0,
1090                             &cmd, sizeof cmd) == -1)
1091                                 rwerror("ADJUST INODE BLOCK COUNT", cmd.value);
1092                 }
1093         }
1094 }
1095
1096 void
1097 freeinodebuf(void)
1098 {
1099         struct bufarea *bp;
1100         int i;
1101
1102         /*
1103          * Flush old contents in case they have been updated.
1104          */
1105         flush(fswritefd, &inobuf);
1106         if (inobuf.b_un.b_buf != NULL)
1107                 free((char *)inobuf.b_un.b_buf);
1108         inobuf.b_un.b_buf = NULL;
1109         firstinum = lastinum = 0;
1110         /*
1111          * Reload the snapshot inodes in case any of them changed.
1112          */
1113         for (i = 0; i < snapcnt; i++) {
1114                 bp = snaplist[i].i_bp;
1115                 bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno,
1116                     bp->b_size);
1117         }
1118 }
1119
1120 /*
1121  * Routines to maintain information about directory inodes.
1122  * This is built during the first pass and used during the
1123  * second and third passes.
1124  *
1125  * Enter inodes into the cache.
1126  */
1127 struct inoinfo *
1128 cacheino(union dinode *dp, ino_t inumber)
1129 {
1130         struct inoinfo *inp;
1131         int i, blks;
1132
1133         if (getinoinfo(inumber) != NULL)
1134                 pfatal("cacheino: duplicate entry for ino %jd\n",
1135                     (intmax_t)inumber);
1136         if (howmany(DIP(dp, di_size), sblock.fs_bsize) > UFS_NDADDR)
1137                 blks = UFS_NDADDR + UFS_NIADDR;
1138         else if (DIP(dp, di_size) > 0)
1139                 blks = howmany(DIP(dp, di_size), sblock.fs_bsize);
1140         else
1141                 blks = 1;
1142         inp = (struct inoinfo *)
1143                 Malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t));
1144         if (inp == NULL)
1145                 errx(EEXIT, "cannot increase directory list");
1146         SLIST_INSERT_HEAD(&inphash[inumber % dirhash], inp, i_hash);
1147         inp->i_flags = 0;
1148         inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0;
1149         inp->i_dotdot = (ino_t)0;
1150         inp->i_number = inumber;
1151         inp->i_isize = DIP(dp, di_size);
1152         inp->i_depth = DIP(dp, di_dirdepth);
1153         inp->i_numblks = blks;
1154         for (i = 0; i < MIN(blks, UFS_NDADDR); i++)
1155                 inp->i_blks[i] = DIP(dp, di_db[i]);
1156         if (blks > UFS_NDADDR)
1157                 for (i = 0; i < UFS_NIADDR; i++)
1158                         inp->i_blks[UFS_NDADDR + i] = DIP(dp, di_ib[i]);
1159         if (inplast == listmax) {
1160                 listmax += 100;
1161                 inpsort = (struct inoinfo **)reallocarray((char *)inpsort,
1162                     listmax, sizeof(struct inoinfo *));
1163                 if (inpsort == NULL)
1164                         errx(EEXIT, "cannot increase directory list");
1165         }
1166         inpsort[inplast++] = inp;
1167         return (inp);
1168 }
1169
1170 /*
1171  * Look up an inode cache structure.
1172  */
1173 struct inoinfo *
1174 getinoinfo(ino_t inumber)
1175 {
1176         struct inoinfo *inp;
1177
1178         SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1179                 if (inp->i_number != inumber)
1180                         continue;
1181                 return (inp);
1182         }
1183         return (NULL);
1184 }
1185
1186 /*
1187  * Remove an entry from the inode cache and disk-order sorted list.
1188  * Return 0 on success and 1 on failure.
1189  */
1190 int
1191 removecachedino(ino_t inumber)
1192 {
1193         struct inoinfo *inp, **inpp;
1194         char *listtype;
1195
1196         listtype = "hash";
1197         SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1198                 if (inp->i_number != inumber)
1199                         continue;
1200                 SLIST_REMOVE(&inphash[inumber % dirhash], inp, inoinfo, i_hash);
1201                 for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
1202                         if (*inpp != inp)
1203                                 continue;
1204                         *inpp = inpsort[inplast - 1];
1205                         inplast--;
1206                         free(inp);
1207                         return (0);
1208                 }
1209                 listtype = "sort";
1210                 break;
1211         }
1212         pfatal("removecachedino: entry for ino %jd not found on %s list\n",
1213             (intmax_t)inumber, listtype);
1214         return (1);
1215 }
1216
1217 /*
1218  * Clean up all the inode cache structure.
1219  */
1220 void
1221 inocleanup(void)
1222 {
1223         struct inoinfo **inpp;
1224
1225         if (inphash == NULL)
1226                 return;
1227         for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--)
1228                 free((char *)(*inpp));
1229         free((char *)inphash);
1230         inphash = NULL;
1231         free((char *)inpsort);
1232         inpsort = NULL;
1233 }
1234
1235 void
1236 inodirty(struct inode *ip)
1237 {
1238
1239         if (sblock.fs_magic == FS_UFS2_MAGIC)
1240                 ffs_update_dinode_ckhash(&sblock,
1241                     (struct ufs2_dinode *)ip->i_dp);
1242         dirty(ip->i_bp);
1243 }
1244
1245 void
1246 clri(struct inodesc *idesc, const char *type, int flag)
1247 {
1248         union dinode *dp;
1249         struct inode ip;
1250
1251         ginode(idesc->id_number, &ip);
1252         dp = ip.i_dp;
1253         if (flag == 1) {
1254                 pwarn("%s %s", type,
1255                     (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE");
1256                 prtinode(&ip);
1257                 printf("\n");
1258         }
1259         if (preen || reply("CLEAR") == 1) {
1260                 if (preen)
1261                         printf(" (CLEARED)\n");
1262                 n_files--;
1263                 if (bkgrdflag == 0) {
1264                         if (idesc->id_type == SNAP) {
1265                                 snapremove(idesc->id_number);
1266                                 idesc->id_type = ADDR;
1267                         }
1268                         (void)ckinode(dp, idesc);
1269                         inoinfo(idesc->id_number)->ino_state = USTATE;
1270                         clearinode(dp);
1271                         inodirty(&ip);
1272                 } else {
1273                         cmd.value = idesc->id_number;
1274                         cmd.size = -DIP(dp, di_nlink);
1275                         if (debug)
1276                                 printf("adjrefcnt ino %ld amt %lld\n",
1277                                     (long)cmd.value, (long long)cmd.size);
1278                         if (sysctl(adjrefcnt, MIBSIZE, 0, 0,
1279                             &cmd, sizeof cmd) == -1)
1280                                 rwerror("ADJUST INODE", cmd.value);
1281                 }
1282         }
1283         irelse(&ip);
1284 }
1285
1286 int
1287 findname(struct inodesc *idesc)
1288 {
1289         struct direct *dirp = idesc->id_dirp;
1290
1291         if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1292                 idesc->id_entryno++;
1293                 return (KEEPON);
1294         }
1295         memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1);
1296         return (STOP|FOUND);
1297 }
1298
1299 int
1300 findino(struct inodesc *idesc)
1301 {
1302         struct direct *dirp = idesc->id_dirp;
1303
1304         if (dirp->d_ino == 0)
1305                 return (KEEPON);
1306         if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1307             dirp->d_ino >= UFS_ROOTINO && dirp->d_ino < maxino) {
1308                 idesc->id_parent = dirp->d_ino;
1309                 return (STOP|FOUND);
1310         }
1311         return (KEEPON);
1312 }
1313
1314 int
1315 clearentry(struct inodesc *idesc)
1316 {
1317         struct direct *dirp = idesc->id_dirp;
1318
1319         if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1320                 idesc->id_entryno++;
1321                 return (KEEPON);
1322         }
1323         dirp->d_ino = 0;
1324         return (STOP|FOUND|ALTERED);
1325 }
1326
1327 void
1328 prtinode(struct inode *ip)
1329 {
1330         char *p;
1331         union dinode *dp;
1332         struct passwd *pw;
1333         time_t t;
1334
1335         dp = ip->i_dp;
1336         printf(" I=%lu ", (u_long)ip->i_number);
1337         if (ip->i_number < UFS_ROOTINO || ip->i_number >= maxino)
1338                 return;
1339         printf(" OWNER=");
1340         if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL)
1341                 printf("%s ", pw->pw_name);
1342         else
1343                 printf("%u ", (unsigned)DIP(dp, di_uid));
1344         printf("MODE=%o\n", DIP(dp, di_mode));
1345         if (preen)
1346                 printf("%s: ", cdevname);
1347         printf("SIZE=%ju ", (uintmax_t)DIP(dp, di_size));
1348         t = DIP(dp, di_mtime);
1349         if ((p = ctime(&t)) != NULL)
1350                 printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]);
1351 }
1352
1353 void
1354 blkerror(ino_t ino, const char *type, ufs2_daddr_t blk)
1355 {
1356
1357         pfatal("%jd %s I=%ju", (intmax_t)blk, type, (uintmax_t)ino);
1358         printf("\n");
1359         switch (inoinfo(ino)->ino_state) {
1360
1361         case FSTATE:
1362         case FZLINK:
1363                 inoinfo(ino)->ino_state = FCLEAR;
1364                 return;
1365
1366         case DSTATE:
1367         case DZLINK:
1368                 inoinfo(ino)->ino_state = DCLEAR;
1369                 return;
1370
1371         case FCLEAR:
1372         case DCLEAR:
1373                 return;
1374
1375         default:
1376                 errx(EEXIT, "BAD STATE %d TO BLKERR", inoinfo(ino)->ino_state);
1377                 /* NOTREACHED */
1378         }
1379 }
1380
1381 /*
1382  * allocate an unused inode
1383  */
1384 ino_t
1385 allocino(ino_t request, int type)
1386 {
1387         ino_t ino;
1388         struct inode ip;
1389         union dinode *dp;
1390         struct bufarea *cgbp;
1391         struct cg *cgp;
1392         int cg, anyino;
1393
1394         anyino = 0;
1395         if (request == 0) {
1396                 request = UFS_ROOTINO;
1397                 anyino = 1;
1398         } else if (inoinfo(request)->ino_state != USTATE)
1399                 return (0);
1400 retry:
1401         for (ino = request; ino < maxino; ino++)
1402                 if (inoinfo(ino)->ino_state == USTATE)
1403                         break;
1404         if (ino >= maxino)
1405                 return (0);
1406         cg = ino_to_cg(&sblock, ino);
1407         cgbp = cglookup(cg);
1408         cgp = cgbp->b_un.b_cg;
1409         if (!check_cgmagic(cg, cgbp)) {
1410                 if (anyino == 0)
1411                         return (0);
1412                 request = (cg + 1) * sblock.fs_ipg;
1413                 goto retry;
1414         }
1415         setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1416         cgp->cg_cs.cs_nifree--;
1417         switch (type & IFMT) {
1418         case IFDIR:
1419                 inoinfo(ino)->ino_state = DSTATE;
1420                 cgp->cg_cs.cs_ndir++;
1421                 break;
1422         case IFREG:
1423         case IFLNK:
1424                 inoinfo(ino)->ino_state = FSTATE;
1425                 break;
1426         default:
1427                 return (0);
1428         }
1429         cgdirty(cgbp);
1430         ginode(ino, &ip);
1431         dp = ip.i_dp;
1432         memset(dp, 0, ((sblock.fs_magic == FS_UFS1_MAGIC) ?
1433             sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
1434         DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1,
1435             std_checkblkavail));
1436         if (DIP(dp, di_db[0]) == 0) {
1437                 inoinfo(ino)->ino_state = USTATE;
1438                 inodirty(&ip);
1439                 irelse(&ip);
1440                 return (0);
1441         }
1442         DIP_SET(dp, di_mode, type);
1443         DIP_SET(dp, di_atime, time(NULL));
1444         DIP_SET(dp, di_ctime, DIP(dp, di_atime));
1445         DIP_SET(dp, di_mtime, DIP(dp, di_ctime));
1446         DIP_SET(dp, di_size, sblock.fs_fsize);
1447         DIP_SET(dp, di_blocks, btodb(sblock.fs_fsize));
1448         n_files++;
1449         inodirty(&ip);
1450         irelse(&ip);
1451         inoinfo(ino)->ino_type = IFTODT(type);
1452         return (ino);
1453 }
1454
1455 /*
1456  * deallocate an inode
1457  */
1458 void
1459 freeino(ino_t ino)
1460 {
1461         struct inodesc idesc;
1462         union dinode *dp;
1463         struct inode ip;
1464
1465         memset(&idesc, 0, sizeof(struct inodesc));
1466         idesc.id_type = ADDR;
1467         idesc.id_func = freeblock;
1468         idesc.id_number = ino;
1469         ginode(ino, &ip);
1470         dp = ip.i_dp;
1471         (void)ckinode(dp, &idesc);
1472         clearinode(dp);
1473         inodirty(&ip);
1474         irelse(&ip);
1475         inoinfo(ino)->ino_state = USTATE;
1476         n_files--;
1477 }