sys/ufs/ffs/ffs_inode.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-3-Clause
   3  *
   4  * Copyright (c) 1982, 1986, 1989, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  * 3. Neither the name of the University nor the names of its contributors
  16  *    may be used to endorse or promote products derived from this software
  17  *    without specific prior written permission.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  29  * SUCH DAMAGE.
  30  *
  31  *      @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95
  32  */
  33
  34 #include <sys/cdefs.h>
  35 __FBSDID("$FreeBSD$");
  36
  37 #include "opt_quota.h"
  38
  39 #include <sys/param.h>
  40 #include <sys/systm.h>
  41 #include <sys/bio.h>
  42 #include <sys/buf.h>
  43 #include <sys/malloc.h>
  44 #include <sys/mount.h>
  45 #include <sys/proc.h>
  46 #include <sys/racct.h>
  47 #include <sys/random.h>
  48 #include <sys/resourcevar.h>
  49 #include <sys/rwlock.h>
  50 #include <sys/stat.h>
  51 #include <sys/vmmeter.h>
  52 #include <sys/vnode.h>
  53
  54 #include <vm/vm.h>
  55 #include <vm/vm_extern.h>
  56 #include <vm/vm_object.h>
  57
  58 #include <ufs/ufs/extattr.h>
  59 #include <ufs/ufs/quota.h>
  60 #include <ufs/ufs/ufsmount.h>
  61 #include <ufs/ufs/inode.h>
  62 #include <ufs/ufs/ufs_extern.h>
  63
  64 #include <ufs/ffs/fs.h>
  65 #include <ufs/ffs/ffs_extern.h>
  66
  67 static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
  68             ufs2_daddr_t, int, ufs2_daddr_t *);
  69
  70 static void
  71 ffs_inode_bwrite(struct vnode *vp, struct buf *bp, int flags)
  72 {
  73         if ((flags & IO_SYNC) != 0)
  74                 bwrite(bp);
  75         else if (DOINGASYNC(vp))
  76                 bdwrite(bp);
  77         else
  78                 bawrite(bp);
  79 }
  80
  81 /*
  82  * Update the access, modified, and inode change times as specified by the
  83  * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively.  Write the inode
  84  * to disk if the IN_MODIFIED flag is set (it may be set initially, or by
  85  * the timestamp update).  The IN_LAZYMOD flag is set to force a write
  86  * later if not now.  The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs
  87  * is currently being suspended (or is suspended) and vnode has been accessed.
  88  * If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to
  89  * reflect the presumably successful write, and if waitfor is set, then wait
  90  * for the write to complete.
  91  */
  92 int
  93 ffs_update(vp, waitfor)
  94         struct vnode *vp;
  95         int waitfor;
  96 {
  97         struct fs *fs;
  98         struct buf *bp;
  99         struct inode *ip;
 100         daddr_t bn;
 101         int flags, error;
 102
 103         ASSERT_VOP_ELOCKED(vp, "ffs_update");
 104         ufs_itimes(vp);
 105         ip = VTOI(vp);
 106         if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0)
 107                 return (0);
 108         ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
 109         /*
 110          * The IN_SIZEMOD and IN_IBLKDATA flags indicate changes to the
 111          * file size and block pointer fields in the inode. When these
 112          * fields have been changed, the fsync() and fsyncdata() system
 113          * calls must write the inode to ensure their semantics that the
 114          * file is on stable store.
 115          *
 116          * The IN_SIZEMOD and IN_IBLKDATA flags cannot be cleared until
 117          * a synchronous write of the inode is done. If they are cleared
 118          * on an asynchronous write, then the inode may not yet have been
 119          * written to the disk when an fsync() or fsyncdata() call is done.
 120          * Absent these flags, these calls would not know that they needed
 121          * to write the inode. Thus, these flags only can be cleared on
 122          * synchronous writes of the inode. Since the inode will be locked
 123          * for the duration of the I/O that writes it to disk, no fsync()
 124          * or fsyncdata() will be able to run before the on-disk inode
 125          * is complete.
 126          */
 127         if (waitfor)
 128                 ip->i_flag &= ~(IN_SIZEMOD | IN_IBLKDATA);
 129         fs = ITOFS(ip);
 130         if (fs->fs_ronly && ITOUMP(ip)->um_fsckpid == 0)
 131                 return (0);
 132         /*
 133          * If we are updating a snapshot and another process is currently
 134          * writing the buffer containing the inode for this snapshot then
 135          * a deadlock can occur when it tries to check the snapshot to see
 136          * if that block needs to be copied. Thus when updating a snapshot
 137          * we check to see if the buffer is already locked, and if it is
 138          * we drop the snapshot lock until the buffer has been written
 139          * and is available to us. We have to grab a reference to the
 140          * snapshot vnode to prevent it from being removed while we are
 141          * waiting for the buffer.
 142          */
 143         flags = 0;
 144         if (IS_SNAPSHOT(ip))
 145                 flags = GB_LOCK_NOWAIT;
 146 loop:
 147         bn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number));
 148         error = ffs_breadz(VFSTOUFS(vp->v_mount), ITODEVVP(ip), bn, bn,
 149              (int) fs->fs_bsize, NULL, NULL, 0, NOCRED, flags, NULL, &bp);
 150         if (error != 0) {
 151                 if (error != EBUSY)
 152                         return (error);
 153                 KASSERT((IS_SNAPSHOT(ip)), ("EBUSY from non-snapshot"));
 154                 /*
 155                  * Wait for our inode block to become available.
 156                  *
 157                  * Hold a reference to the vnode to protect against
 158                  * ffs_snapgone(). Since we hold a reference, it can only
 159                  * get reclaimed (VIRF_DOOMED flag) in a forcible downgrade
 160                  * or unmount. For an unmount, the entire filesystem will be
 161                  * gone, so we cannot attempt to touch anything associated
 162                  * with it while the vnode is unlocked; all we can do is
 163                  * pause briefly and try again. If when we relock the vnode
 164                  * we discover that it has been reclaimed, updating it is no
 165                  * longer necessary and we can just return an error.
 166                  */
 167                 vref(vp);
 168                 VOP_UNLOCK(vp);
 169                 pause("ffsupd", 1);
 170                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 171                 vrele(vp);
 172                 if (VN_IS_DOOMED(vp))
 173                         return (ENOENT);
 174                 goto loop;
 175         }
 176         if (DOINGSOFTDEP(vp))
 177                 softdep_update_inodeblock(ip, bp, waitfor);
 178         else if (ip->i_effnlink != ip->i_nlink)
 179                 panic("ffs_update: bad link cnt");
 180         if (I_IS_UFS1(ip)) {
 181                 *((struct ufs1_dinode *)bp->b_data +
 182                     ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
 183                 /*
 184                  * XXX: FIX? The entropy here is desirable,
 185                  * but the harvesting may be expensive
 186                  */
 187                 random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), RANDOM_FS_ATIME);
 188         } else {
 189                 ffs_update_dinode_ckhash(fs, ip->i_din2);
 190                 *((struct ufs2_dinode *)bp->b_data +
 191                     ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
 192                 /*
 193                  * XXX: FIX? The entropy here is desirable,
 194                  * but the harvesting may be expensive
 195                  */
 196                 random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME);
 197         }
 198         if (waitfor) {
 199                 error = bwrite(bp);
 200                 if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
 201                         error = 0;
 202         } else if (vm_page_count_severe() || buf_dirty_count_severe()) {
 203                 bawrite(bp);
 204                 error = 0;
 205         } else {
 206                 if (bp->b_bufsize == fs->fs_bsize)
 207                         bp->b_flags |= B_CLUSTEROK;
 208                 bdwrite(bp);
 209                 error = 0;
 210         }
 211         return (error);
 212 }
 213
 214 #define SINGLE  0       /* index of single indirect block */
 215 #define DOUBLE  1       /* index of double indirect block */
 216 #define TRIPLE  2       /* index of triple indirect block */
 217 /*
 218  * Truncate the inode ip to at most length size, freeing the
 219  * disk blocks.
 220  */
 221 int
 222 ffs_truncate(vp, length, flags, cred)
 223         struct vnode *vp;
 224         off_t length;
 225         int flags;
 226         struct ucred *cred;
 227 {
 228         struct inode *ip;
 229         ufs2_daddr_t bn, lbn, lastblock, lastiblock[UFS_NIADDR];
 230         ufs2_daddr_t indir_lbn[UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR];
 231         ufs2_daddr_t newblks[UFS_NDADDR + UFS_NIADDR];
 232         ufs2_daddr_t count, blocksreleased = 0, datablocks, blkno;
 233         struct bufobj *bo;
 234         struct fs *fs;
 235         struct buf *bp;
 236         struct ufsmount *ump;
 237         int softdeptrunc, journaltrunc;
 238         int needextclean, extblocks;
 239         int offset, size, level, nblocks;
 240         int i, error, allerror, indiroff, waitforupdate;
 241         u_long key;
 242         off_t osize;
 243
 244         ip = VTOI(vp);
 245         ump = VFSTOUFS(vp->v_mount);
 246         fs = ump->um_fs;
 247         bo = &vp->v_bufobj;
 248
 249         ASSERT_VOP_LOCKED(vp, "ffs_truncate");
 250
 251         if (length < 0)
 252                 return (EINVAL);
 253         if (length > fs->fs_maxfilesize)
 254                 return (EFBIG);
 255 #ifdef QUOTA
 256         error = getinoquota(ip);
 257         if (error)
 258                 return (error);
 259 #endif
 260         /*
 261          * Historically clients did not have to specify which data
 262          * they were truncating. So, if not specified, we assume
 263          * traditional behavior, e.g., just the normal data.
 264          */
 265         if ((flags & (IO_EXT | IO_NORMAL)) == 0)
 266                 flags |= IO_NORMAL;
 267         if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp))
 268                 flags |= IO_SYNC;
 269         waitforupdate = (flags & IO_SYNC) != 0 || !DOINGASYNC(vp);
 270         /*
 271          * If we are truncating the extended-attributes, and cannot
 272          * do it with soft updates, then do it slowly here. If we are
 273          * truncating both the extended attributes and the file contents
 274          * (e.g., the file is being unlinked), then pick it off with
 275          * soft updates below.
 276          */
 277         allerror = 0;
 278         needextclean = 0;
 279         softdeptrunc = 0;
 280         journaltrunc = DOINGSUJ(vp);
 281         journaltrunc = 0;       /* XXX temp patch until bug found */
 282         if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0)
 283                 softdeptrunc = !softdep_slowdown(vp);
 284         extblocks = 0;
 285         datablocks = DIP(ip, i_blocks);
 286         if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) {
 287                 extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
 288                 datablocks -= extblocks;
 289         }
 290         if ((flags & IO_EXT) && extblocks > 0) {
 291                 if (length != 0)
 292                         panic("ffs_truncate: partial trunc of extdata");
 293                 if (softdeptrunc || journaltrunc) {
 294                         if ((flags & IO_NORMAL) == 0)
 295                                 goto extclean;
 296                         needextclean = 1;
 297                 } else {
 298                         if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
 299                                 return (error);
 300 #ifdef QUOTA
 301                         (void) chkdq(ip, -extblocks, NOCRED, FORCE);
 302 #endif
 303                         vinvalbuf(vp, V_ALT, 0, 0);
 304                         vn_pages_remove(vp,
 305                             OFF_TO_IDX(lblktosize(fs, -extblocks)), 0);
 306                         osize = ip->i_din2->di_extsize;
 307                         ip->i_din2->di_blocks -= extblocks;
 308                         ip->i_din2->di_extsize = 0;
 309                         for (i = 0; i < UFS_NXADDR; i++) {
 310                                 oldblks[i] = ip->i_din2->di_extb[i];
 311                                 ip->i_din2->di_extb[i] = 0;
 312                         }
 313                         UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
 314                         if ((error = ffs_update(vp, waitforupdate)))
 315                                 return (error);
 316                         for (i = 0; i < UFS_NXADDR; i++) {
 317                                 if (oldblks[i] == 0)
 318                                         continue;
 319                                 ffs_blkfree(ump, fs, ITODEVVP(ip), oldblks[i],
 320                                     sblksize(fs, osize, i), ip->i_number,
 321                                     vp->v_type, NULL, SINGLETON_KEY);
 322                         }
 323                 }
 324         }
 325         if ((flags & IO_NORMAL) == 0)
 326                 return (0);
 327         if (vp->v_type == VLNK &&
 328             (ip->i_size < vp->v_mount->mnt_maxsymlinklen ||
 329              datablocks == 0)) {
 330 #ifdef INVARIANTS
 331                 if (length != 0)
 332                         panic("ffs_truncate: partial truncate of symlink");
 333 #endif
 334                 bzero(SHORTLINK(ip), (u_int)ip->i_size);
 335                 ip->i_size = 0;
 336                 DIP_SET(ip, i_size, 0);
 337                 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
 338                 if (needextclean)
 339                         goto extclean;
 340                 return (ffs_update(vp, waitforupdate));
 341         }
 342         if (ip->i_size == length) {
 343                 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
 344                 if (needextclean)
 345                         goto extclean;
 346                 return (ffs_update(vp, 0));
 347         }
 348         if (fs->fs_ronly)
 349                 panic("ffs_truncate: read-only filesystem");
 350         if (IS_SNAPSHOT(ip))
 351                 ffs_snapremove(vp);
 352         vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
 353         osize = ip->i_size;
 354         /*
 355          * Lengthen the size of the file. We must ensure that the
 356          * last byte of the file is allocated. Since the smallest
 357          * value of osize is 0, length will be at least 1.
 358          */
 359         if (osize < length) {
 360                 vnode_pager_setsize(vp, length);
 361                 flags |= BA_CLRBUF;
 362                 error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
 363                 if (error) {
 364                         vnode_pager_setsize(vp, osize);
 365                         return (error);
 366                 }
 367                 ip->i_size = length;
 368                 DIP_SET(ip, i_size, length);
 369                 if (bp->b_bufsize == fs->fs_bsize)
 370                         bp->b_flags |= B_CLUSTEROK;
 371                 ffs_inode_bwrite(vp, bp, flags);
 372                 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
 373                 return (ffs_update(vp, waitforupdate));
 374         }
 375         /*
 376          * Lookup block number for a given offset. Zero length files
 377          * have no blocks, so return a blkno of -1.
 378          */
 379         lbn = lblkno(fs, length - 1);
 380         if (length == 0) {
 381                 blkno = -1;
 382         } else if (lbn < UFS_NDADDR) {
 383                 blkno = DIP(ip, i_db[lbn]);
 384         } else {
 385                 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize,
 386                     cred, BA_METAONLY, &bp);
 387                 if (error)
 388                         return (error);
 389                 indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
 390                 if (I_IS_UFS1(ip))
 391                         blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff];
 392                 else
 393                         blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff];
 394                 /*
 395                  * If the block number is non-zero, then the indirect block
 396                  * must have been previously allocated and need not be written.
 397                  * If the block number is zero, then we may have allocated
 398                  * the indirect block and hence need to write it out.
 399                  */
 400                 if (blkno != 0)
 401                         brelse(bp);
 402                 else if (flags & IO_SYNC)
 403                         bwrite(bp);
 404                 else
 405                         bdwrite(bp);
 406         }
 407         /*
 408          * If the block number at the new end of the file is zero,
 409          * then we must allocate it to ensure that the last block of
 410          * the file is allocated. Soft updates does not handle this
 411          * case, so here we have to clean up the soft updates data
 412          * structures describing the allocation past the truncation
 413          * point. Finding and deallocating those structures is a lot of
 414          * work. Since partial truncation with a hole at the end occurs
 415          * rarely, we solve the problem by syncing the file so that it
 416          * will have no soft updates data structures left.
 417          */
 418         if (blkno == 0 && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
 419                 return (error);
 420         if (blkno != 0 && DOINGSOFTDEP(vp)) {
 421                 if (softdeptrunc == 0 && journaltrunc == 0) {
 422                         /*
 423                          * If soft updates cannot handle this truncation,
 424                          * clean up soft dependency data structures and
 425                          * fall through to the synchronous truncation.
 426                          */
 427                         if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
 428                                 return (error);
 429                 } else {
 430                         flags = IO_NORMAL | (needextclean ? IO_EXT: 0);
 431                         if (journaltrunc)
 432                                 softdep_journal_freeblocks(ip, cred, length,
 433                                     flags);
 434                         else
 435                                 softdep_setup_freeblocks(ip, length, flags);
 436                         ASSERT_VOP_LOCKED(vp, "ffs_truncate1");
 437                         if (journaltrunc == 0) {
 438                                 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
 439                                 error = ffs_update(vp, 0);
 440                         }
 441                         return (error);
 442                 }
 443         }
 444         /*
 445          * Shorten the size of the file. If the last block of the
 446          * shortened file is unallocated, we must allocate it.
 447          * Additionally, if the file is not being truncated to a
 448          * block boundary, the contents of the partial block
 449          * following the end of the file must be zero'ed in
 450          * case it ever becomes accessible again because of
 451          * subsequent file growth. Directories however are not
 452          * zero'ed as they should grow back initialized to empty.
 453          */
 454         offset = blkoff(fs, length);
 455         if (blkno != 0 && offset == 0) {
 456                 ip->i_size = length;
 457                 DIP_SET(ip, i_size, length);
 458                 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
 459         } else {
 460                 lbn = lblkno(fs, length);
 461                 flags |= BA_CLRBUF;
 462                 error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
 463                 if (error)
 464                         return (error);
 465                 ffs_inode_bwrite(vp, bp, flags);
 466
 467                 /*
 468                  * When we are doing soft updates and the UFS_BALLOC
 469                  * above fills in a direct block hole with a full sized
 470                  * block that will be truncated down to a fragment below,
 471                  * we must flush out the block dependency with an FSYNC
 472                  * so that we do not get a soft updates inconsistency
 473                  * when we create the fragment below.
 474                  */
 475                 if (DOINGSOFTDEP(vp) && lbn < UFS_NDADDR &&
 476                     fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
 477                     (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
 478                         return (error);
 479
 480                 error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
 481                 if (error)
 482                         return (error);
 483                 ip->i_size = length;
 484                 DIP_SET(ip, i_size, length);
 485                 size = blksize(fs, ip, lbn);
 486                 if (vp->v_type != VDIR && offset != 0)
 487                         bzero((char *)bp->b_data + offset,
 488                             (u_int)(size - offset));
 489                 /* Kirk's code has reallocbuf(bp, size, 1) here */
 490                 allocbuf(bp, size);
 491                 if (bp->b_bufsize == fs->fs_bsize)
 492                         bp->b_flags |= B_CLUSTEROK;
 493                 ffs_inode_bwrite(vp, bp, flags);
 494                 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
 495         }
 496         /*
 497          * Calculate index into inode's block list of
 498          * last direct and indirect blocks (if any)
 499          * which we want to keep.  Lastblock is -1 when
 500          * the file is truncated to 0.
 501          */
 502         lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
 503         lastiblock[SINGLE] = lastblock - UFS_NDADDR;
 504         lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
 505         lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
 506         nblocks = btodb(fs->fs_bsize);
 507         /*
 508          * Update file and block pointers on disk before we start freeing
 509          * blocks.  If we crash before free'ing blocks below, the blocks
 510          * will be returned to the free list.  lastiblock values are also
 511          * normalized to -1 for calls to ffs_indirtrunc below.
 512          */
 513         for (level = TRIPLE; level >= SINGLE; level--) {
 514                 oldblks[UFS_NDADDR + level] = DIP(ip, i_ib[level]);
 515                 if (lastiblock[level] < 0) {
 516                         DIP_SET(ip, i_ib[level], 0);
 517                         lastiblock[level] = -1;
 518                 }
 519         }
 520         for (i = 0; i < UFS_NDADDR; i++) {
 521                 oldblks[i] = DIP(ip, i_db[i]);
 522                 if (i > lastblock)
 523                         DIP_SET(ip, i_db[i], 0);
 524         }
 525         UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
 526         allerror = ffs_update(vp, waitforupdate);
 527
 528         /*
 529          * Having written the new inode to disk, save its new configuration
 530          * and put back the old block pointers long enough to process them.
 531          * Note that we save the new block configuration so we can check it
 532          * when we are done.
 533          */
 534         for (i = 0; i < UFS_NDADDR; i++) {
 535                 newblks[i] = DIP(ip, i_db[i]);
 536                 DIP_SET(ip, i_db[i], oldblks[i]);
 537         }
 538         for (i = 0; i < UFS_NIADDR; i++) {
 539                 newblks[UFS_NDADDR + i] = DIP(ip, i_ib[i]);
 540                 DIP_SET(ip, i_ib[i], oldblks[UFS_NDADDR + i]);
 541         }
 542         ip->i_size = osize;
 543         DIP_SET(ip, i_size, osize);
 544         UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
 545
 546         error = vtruncbuf(vp, length, fs->fs_bsize);
 547         if (error && (allerror == 0))
 548                 allerror = error;
 549
 550         /*
 551          * Indirect blocks first.
 552          */
 553         indir_lbn[SINGLE] = -UFS_NDADDR;
 554         indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
 555         indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
 556         for (level = TRIPLE; level >= SINGLE; level--) {
 557                 bn = DIP(ip, i_ib[level]);
 558                 if (bn != 0) {
 559                         error = ffs_indirtrunc(ip, indir_lbn[level],
 560                             fsbtodb(fs, bn), lastiblock[level], level, &count);
 561                         if (error)
 562                                 allerror = error;
 563                         blocksreleased += count;
 564                         if (lastiblock[level] < 0) {
 565                                 DIP_SET(ip, i_ib[level], 0);
 566                                 ffs_blkfree(ump, fs, ump->um_devvp, bn,
 567                                     fs->fs_bsize, ip->i_number,
 568                                     vp->v_type, NULL, SINGLETON_KEY);
 569                                 blocksreleased += nblocks;
 570                         }
 571                 }
 572                 if (lastiblock[level] >= 0)
 573                         goto done;
 574         }
 575
 576         /*
 577          * All whole direct blocks or frags.
 578          */
 579         key = ffs_blkrelease_start(ump, ump->um_devvp, ip->i_number);
 580         for (i = UFS_NDADDR - 1; i > lastblock; i--) {
 581                 long bsize;
 582
 583                 bn = DIP(ip, i_db[i]);
 584                 if (bn == 0)
 585                         continue;
 586                 DIP_SET(ip, i_db[i], 0);
 587                 bsize = blksize(fs, ip, i);
 588                 ffs_blkfree(ump, fs, ump->um_devvp, bn, bsize, ip->i_number,
 589                     vp->v_type, NULL, key);
 590                 blocksreleased += btodb(bsize);
 591         }
 592         ffs_blkrelease_finish(ump, key);
 593         if (lastblock < 0)
 594                 goto done;
 595
 596         /*
 597          * Finally, look for a change in size of the
 598          * last direct block; release any frags.
 599          */
 600         bn = DIP(ip, i_db[lastblock]);
 601         if (bn != 0) {
 602                 long oldspace, newspace;
 603
 604                 /*
 605                  * Calculate amount of space we're giving
 606                  * back as old block size minus new block size.
 607                  */
 608                 oldspace = blksize(fs, ip, lastblock);
 609                 ip->i_size = length;
 610                 DIP_SET(ip, i_size, length);
 611                 UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
 612                 newspace = blksize(fs, ip, lastblock);
 613                 if (newspace == 0)
 614                         panic("ffs_truncate: newspace");
 615                 if (oldspace - newspace > 0) {
 616                         /*
 617                          * Block number of space to be free'd is
 618                          * the old block # plus the number of frags
 619                          * required for the storage we're keeping.
 620                          */
 621                         bn += numfrags(fs, newspace);
 622                         ffs_blkfree(ump, fs, ump->um_devvp, bn,
 623                            oldspace - newspace, ip->i_number, vp->v_type,
 624                            NULL, SINGLETON_KEY);
 625                         blocksreleased += btodb(oldspace - newspace);
 626                 }
 627         }
 628 done:
 629 #ifdef INVARIANTS
 630         for (level = SINGLE; level <= TRIPLE; level++)
 631                 if (newblks[UFS_NDADDR + level] != DIP(ip, i_ib[level]))
 632                         panic("ffs_truncate1: level %d newblks %jd != i_ib %jd",
 633                             level, (intmax_t)newblks[UFS_NDADDR + level],
 634                             (intmax_t)DIP(ip, i_ib[level]));
 635         for (i = 0; i < UFS_NDADDR; i++)
 636                 if (newblks[i] != DIP(ip, i_db[i]))
 637                         panic("ffs_truncate2: blkno %d newblks %jd != i_db %jd",
 638                             i, (intmax_t)newblks[UFS_NDADDR + level],
 639                             (intmax_t)DIP(ip, i_ib[level]));
 640         BO_LOCK(bo);
 641         if (length == 0 &&
 642             (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) &&
 643             (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0))
 644                 panic("ffs_truncate3: vp = %p, buffers: dirty = %d, clean = %d",
 645                         vp, bo->bo_dirty.bv_cnt, bo->bo_clean.bv_cnt);
 646         BO_UNLOCK(bo);
 647 #endif /* INVARIANTS */
 648         /*
 649          * Put back the real size.
 650          */
 651         ip->i_size = length;
 652         DIP_SET(ip, i_size, length);
 653         if (DIP(ip, i_blocks) >= blocksreleased)
 654                 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - blocksreleased);
 655         else    /* sanity */
 656                 DIP_SET(ip, i_blocks, 0);
 657         UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
 658 #ifdef QUOTA
 659         (void) chkdq(ip, -blocksreleased, NOCRED, FORCE);
 660 #endif
 661         return (allerror);
 662
 663 extclean:
 664         if (journaltrunc)
 665                 softdep_journal_freeblocks(ip, cred, length, IO_EXT);
 666         else
 667                 softdep_setup_freeblocks(ip, length, IO_EXT);
 668         return (ffs_update(vp, waitforupdate));
 669 }
 670
 671 /*
 672  * Release blocks associated with the inode ip and stored in the indirect
 673  * block bn.  Blocks are free'd in LIFO order up to (but not including)
 674  * lastbn.  If level is greater than SINGLE, the block is an indirect block
 675  * and recursive calls to indirtrunc must be used to cleanse other indirect
 676  * blocks.
 677  */
 678 static int
 679 ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
 680         struct inode *ip;
 681         ufs2_daddr_t lbn, lastbn;
 682         ufs2_daddr_t dbn;
 683         int level;
 684         ufs2_daddr_t *countp;
 685 {
 686         struct buf *bp;
 687         struct fs *fs;
 688         struct ufsmount *ump;
 689         struct vnode *vp;
 690         caddr_t copy = NULL;
 691         u_long key;
 692         int i, nblocks, error = 0, allerror = 0;
 693         ufs2_daddr_t nb, nlbn, last;
 694         ufs2_daddr_t blkcount, factor, blocksreleased = 0;
 695         ufs1_daddr_t *bap1 = NULL;
 696         ufs2_daddr_t *bap2 = NULL;
 697 #define BAP(ip, i) (I_IS_UFS1(ip) ? bap1[i] : bap2[i])
 698
 699         fs = ITOFS(ip);
 700         ump = ITOUMP(ip);
 701
 702         /*
 703          * Calculate index in current block of last
 704          * block to be kept.  -1 indicates the entire
 705          * block so we need not calculate the index.
 706          */
 707         factor = lbn_offset(fs, level);
 708         last = lastbn;
 709         if (lastbn > 0)
 710                 last /= factor;
 711         nblocks = btodb(fs->fs_bsize);
 712         /*
 713          * Get buffer of block pointers, zero those entries corresponding
 714          * to blocks to be free'd, and update on disk copy first.  Since
 715          * double(triple) indirect before single(double) indirect, calls
 716          * to VOP_BMAP() on these blocks will fail.  However, we already
 717          * have the on-disk address, so we just pass it to bread() instead
 718          * of having bread() attempt to calculate it using VOP_BMAP().
 719          */
 720         vp = ITOV(ip);
 721         error = ffs_breadz(ump, vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0,
 722             NOCRED, 0, NULL, &bp);
 723         if (error) {
 724                 *countp = 0;
 725                 return (error);
 726         }
 727
 728         if (I_IS_UFS1(ip))
 729                 bap1 = (ufs1_daddr_t *)bp->b_data;
 730         else
 731                 bap2 = (ufs2_daddr_t *)bp->b_data;
 732         if (lastbn != -1) {
 733                 copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK);
 734                 bcopy((caddr_t)bp->b_data, copy, (u_int)fs->fs_bsize);
 735                 for (i = last + 1; i < NINDIR(fs); i++)
 736                         if (I_IS_UFS1(ip))
 737                                 bap1[i] = 0;
 738                         else
 739                                 bap2[i] = 0;
 740                 if (DOINGASYNC(vp)) {
 741                         bdwrite(bp);
 742                 } else {
 743                         error = bwrite(bp);
 744                         if (error)
 745                                 allerror = error;
 746                 }
 747                 if (I_IS_UFS1(ip))
 748                         bap1 = (ufs1_daddr_t *)copy;
 749                 else
 750                         bap2 = (ufs2_daddr_t *)copy;
 751         }
 752
 753         /*
 754          * Recursively free totally unused blocks.
 755          */
 756         key = ffs_blkrelease_start(ump, ITODEVVP(ip), ip->i_number);
 757         for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
 758             i--, nlbn += factor) {
 759                 nb = BAP(ip, i);
 760                 if (nb == 0)
 761                         continue;
 762                 if (level > SINGLE) {
 763                         if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
 764                             (ufs2_daddr_t)-1, level - 1, &blkcount)) != 0)
 765                                 allerror = error;
 766                         blocksreleased += blkcount;
 767                 }
 768                 ffs_blkfree(ump, fs, ITODEVVP(ip), nb, fs->fs_bsize,
 769                     ip->i_number, vp->v_type, NULL, key);
 770                 blocksreleased += nblocks;
 771         }
 772         ffs_blkrelease_finish(ump, key);
 773
 774         /*
 775          * Recursively free last partial block.
 776          */
 777         if (level > SINGLE && lastbn >= 0) {
 778                 last = lastbn % factor;
 779                 nb = BAP(ip, i);
 780                 if (nb != 0) {
 781                         error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
 782                             last, level - 1, &blkcount);
 783                         if (error)
 784                                 allerror = error;
 785                         blocksreleased += blkcount;
 786                 }
 787         }
 788         if (copy != NULL) {
 789                 free(copy, M_TEMP);
 790         } else {
 791                 bp->b_flags |= B_INVAL | B_NOCACHE;
 792                 brelse(bp);
 793         }
 794
 795         *countp = blocksreleased;
 796         return (allerror);
 797 }
 798
 799 int
 800 ffs_rdonly(struct inode *ip)
 801 {
 802
 803         return (ITOFS(ip)->fs_ronly != 0);
 804 }