sys/ufs/ffs/ffs_vfsops.c

   1 /*-
   2  * Copyright (c) 1989, 1991, 1993, 1994
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 4. Neither the name of the University nor the names of its contributors
  14  *    may be used to endorse or promote products derived from this software
  15  *    without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  *
  29  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
  30  */
  31
  32 #include <sys/cdefs.h>
  33 __FBSDID("$FreeBSD$");
  34
  35 #include "opt_mac.h"
  36 #include "opt_quota.h"
  37 #include "opt_ufs.h"
  38 #include "opt_ffs.h"
  39 #include "opt_ddb.h"
  40
  41 #include <sys/param.h>
  42 #include <sys/systm.h>
  43 #include <sys/namei.h>
  44 #include <sys/priv.h>
  45 #include <sys/proc.h>
  46 #include <sys/kernel.h>
  47 #include <sys/vnode.h>
  48 #include <sys/mount.h>
  49 #include <sys/bio.h>
  50 #include <sys/buf.h>
  51 #include <sys/conf.h>
  52 #include <sys/fcntl.h>
  53 #include <sys/malloc.h>
  54 #include <sys/mutex.h>
  55
  56 #include <security/mac/mac_framework.h>
  57
  58 #include <ufs/ufs/extattr.h>
  59 #include <ufs/ufs/gjournal.h>
  60 #include <ufs/ufs/quota.h>
  61 #include <ufs/ufs/ufsmount.h>
  62 #include <ufs/ufs/inode.h>
  63 #include <ufs/ufs/ufs_extern.h>
  64
  65 #include <ufs/ffs/fs.h>
  66 #include <ufs/ffs/ffs_extern.h>
  67
  68 #include <vm/vm.h>
  69 #include <vm/uma.h>
  70 #include <vm/vm_page.h>
  71
  72 #include <geom/geom.h>
  73 #include <geom/geom_vfs.h>
  74
  75 #include <ddb/ddb.h>
  76
  77 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
  78
  79 static int      ffs_reload(struct mount *, struct thread *);
  80 static int      ffs_mountfs(struct vnode *, struct mount *, struct thread *);
  81 static void     ffs_oldfscompat_read(struct fs *, struct ufsmount *,
  82                     ufs2_daddr_t);
  83 static void     ffs_oldfscompat_write(struct fs *, struct ufsmount *);
  84 static void     ffs_ifree(struct ufsmount *ump, struct inode *ip);
  85 static vfs_init_t ffs_init;
  86 static vfs_uninit_t ffs_uninit;
  87 static vfs_extattrctl_t ffs_extattrctl;
  88 static vfs_cmount_t ffs_cmount;
  89 static vfs_unmount_t ffs_unmount;
  90 static vfs_mount_t ffs_mount;
  91 static vfs_statfs_t ffs_statfs;
  92 static vfs_fhtovp_t ffs_fhtovp;
  93 static vfs_sync_t ffs_sync;
  94
  95 static struct vfsops ufs_vfsops = {
  96         .vfs_extattrctl =       ffs_extattrctl,
  97         .vfs_fhtovp =           ffs_fhtovp,
  98         .vfs_init =             ffs_init,
  99         .vfs_mount =            ffs_mount,
 100         .vfs_cmount =           ffs_cmount,
 101         .vfs_quotactl =         ufs_quotactl,
 102         .vfs_root =             ufs_root,
 103         .vfs_statfs =           ffs_statfs,
 104         .vfs_sync =             ffs_sync,
 105         .vfs_uninit =           ffs_uninit,
 106         .vfs_unmount =          ffs_unmount,
 107         .vfs_vget =             ffs_vget,
 108         .vfs_susp_clean =       process_deferred_inactive,
 109 };
 110
 111 VFS_SET(ufs_vfsops, ufs, 0);
 112 MODULE_VERSION(ufs, 1);
 113
 114 static b_strategy_t ffs_geom_strategy;
 115 static b_write_t ffs_bufwrite;
 116
 117 static struct buf_ops ffs_ops = {
 118         .bop_name =     "FFS",
 119         .bop_write =    ffs_bufwrite,
 120         .bop_strategy = ffs_geom_strategy,
 121         .bop_sync =     bufsync,
 122 #ifdef NO_FFS_SNAPSHOT
 123         .bop_bdflush =  bufbdflush,
 124 #else
 125         .bop_bdflush =  ffs_bdflush,
 126 #endif
 127 };
 128
 129 static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
 130     "noclusterw", "noexec", "export", "force", "from", "multilabel",
 131     "snapshot", "nosuid", "suiddir", "nosymfollow", "sync",
 132     "union", NULL };
 133
 134 static int
 135 ffs_mount(struct mount *mp, struct thread *td)
 136 {
 137         struct vnode *devvp;
 138         struct ufsmount *ump = 0;
 139         struct fs *fs;
 140         int error, flags;
 141         u_int mntorflags, mntandnotflags;
 142         mode_t accessmode;
 143         struct nameidata ndp;
 144         char *fspec;
 145
 146         if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
 147                 return (EINVAL);
 148         if (uma_inode == NULL) {
 149                 uma_inode = uma_zcreate("FFS inode",
 150                     sizeof(struct inode), NULL, NULL, NULL, NULL,
 151                     UMA_ALIGN_PTR, 0);
 152                 uma_ufs1 = uma_zcreate("FFS1 dinode",
 153                     sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
 154                     UMA_ALIGN_PTR, 0);
 155                 uma_ufs2 = uma_zcreate("FFS2 dinode",
 156                     sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
 157                     UMA_ALIGN_PTR, 0);
 158         }
 159
 160         fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
 161         if (error)
 162                 return (error);
 163
 164         mntorflags = 0;
 165         mntandnotflags = 0;
 166         if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
 167                 mntorflags |= MNT_ACLS;
 168
 169         if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
 170                 mntorflags |= MNT_SNAPSHOT;
 171                 /*
 172                  * Once we have set the MNT_SNAPSHOT flag, do not
 173                  * persist "snapshot" in the options list.
 174                  */
 175                 vfs_deleteopt(mp->mnt_optnew, "snapshot");
 176                 vfs_deleteopt(mp->mnt_opt, "snapshot");
 177         }
 178
 179         MNT_ILOCK(mp);
 180         mp->mnt_flag = (mp->mnt_flag | mntorflags) & ~mntandnotflags;
 181         MNT_IUNLOCK(mp);
 182         /*
 183          * If updating, check whether changing from read-only to
 184          * read/write; if there is no device name, that's all we do.
 185          */
 186         if (mp->mnt_flag & MNT_UPDATE) {
 187                 ump = VFSTOUFS(mp);
 188                 fs = ump->um_fs;
 189                 devvp = ump->um_devvp;
 190                 if (fs->fs_ronly == 0 &&
 191                     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 192                         /*
 193                          * Flush any dirty data and suspend filesystem.
 194                          */
 195                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 196                                 return (error);
 197                         for (;;) {
 198                                 vn_finished_write(mp);
 199                                 if ((error = vfs_write_suspend(mp)) != 0)
 200                                         return (error);
 201                                 MNT_ILOCK(mp);
 202                                 if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
 203                                         /*
 204                                          * Allow the secondary writes
 205                                          * to proceed.
 206                                          */
 207                                         mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
 208                                             MNTK_SUSPEND2);
 209                                         wakeup(&mp->mnt_flag);
 210                                         MNT_IUNLOCK(mp);
 211                                         /*
 212                                          * Allow the curthread to
 213                                          * ignore the suspension to
 214                                          * synchronize on-disk state.
 215                                          */
 216                                         curthread->td_pflags |= TDP_IGNSUSP;
 217                                         break;
 218                                 }
 219                                 MNT_IUNLOCK(mp);
 220                                 vn_start_write(NULL, &mp, V_WAIT);
 221                         }
 222                         /*
 223                          * Check for and optionally get rid of files open
 224                          * for writing.
 225                          */
 226                         flags = WRITECLOSE;
 227                         if (mp->mnt_flag & MNT_FORCE)
 228                                 flags |= FORCECLOSE;
 229                         if (mp->mnt_flag & MNT_SOFTDEP) {
 230                                 error = softdep_flushfiles(mp, flags, td);
 231                         } else {
 232                                 error = ffs_flushfiles(mp, flags, td);
 233                         }
 234                         if (error) {
 235                                 vfs_write_resume(mp);
 236                                 return (error);
 237                         }
 238                         if (fs->fs_pendingblocks != 0 ||
 239                             fs->fs_pendinginodes != 0) {
 240                                 printf("%s: %s: blocks %jd files %d\n",
 241                                     fs->fs_fsmnt, "update error",
 242                                     (intmax_t)fs->fs_pendingblocks,
 243                                     fs->fs_pendinginodes);
 244                                 fs->fs_pendingblocks = 0;
 245                                 fs->fs_pendinginodes = 0;
 246                         }
 247                         if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
 248                                 fs->fs_clean = 1;
 249                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 250                                 fs->fs_ronly = 0;
 251                                 fs->fs_clean = 0;
 252                                 vfs_write_resume(mp);
 253                                 return (error);
 254                         }
 255                         DROP_GIANT();
 256                         g_topology_lock();
 257                         g_access(ump->um_cp, 0, -1, 0);
 258                         g_topology_unlock();
 259                         PICKUP_GIANT();
 260                         fs->fs_ronly = 1;
 261                         MNT_ILOCK(mp);
 262                         mp->mnt_flag |= MNT_RDONLY;
 263                         MNT_IUNLOCK(mp);
 264                         /*
 265                          * Allow the writers to note that filesystem
 266                          * is ro now.
 267                          */
 268                         vfs_write_resume(mp);
 269                 }
 270                 if ((mp->mnt_flag & MNT_RELOAD) &&
 271                     (error = ffs_reload(mp, td)) != 0)
 272                         return (error);
 273                 if (fs->fs_ronly &&
 274                     !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 275                         /*
 276                          * If upgrade to read-write by non-root, then verify
 277                          * that user has necessary permissions on the device.
 278                          */
 279                         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
 280                         error = VOP_ACCESS(devvp, VREAD | VWRITE,
 281                             td->td_ucred, td);
 282                         if (error)
 283                                 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 284                         if (error) {
 285                                 VOP_UNLOCK(devvp, 0, td);
 286                                 return (error);
 287                         }
 288                         VOP_UNLOCK(devvp, 0, td);
 289                         fs->fs_flags &= ~FS_UNCLEAN;
 290                         if (fs->fs_clean == 0) {
 291                                 fs->fs_flags |= FS_UNCLEAN;
 292                                 if ((mp->mnt_flag & MNT_FORCE) ||
 293                                     ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
 294                                      (fs->fs_flags & FS_DOSOFTDEP))) {
 295                                         printf("WARNING: %s was not %s\n",
 296                                            fs->fs_fsmnt, "properly dismounted");
 297                                 } else {
 298                                         printf(
 299 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 300                                             fs->fs_fsmnt);
 301                                         return (EPERM);
 302                                 }
 303                         }
 304                         DROP_GIANT();
 305                         g_topology_lock();
 306                         /*
 307                          * If we're the root device, we may not have an E count
 308                          * yet, get it now.
 309                          */
 310                         if (ump->um_cp->ace == 0)
 311                                 error = g_access(ump->um_cp, 0, 1, 1);
 312                         else
 313                                 error = g_access(ump->um_cp, 0, 1, 0);
 314                         g_topology_unlock();
 315                         PICKUP_GIANT();
 316                         if (error)
 317                                 return (error);
 318                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 319                                 return (error);
 320                         fs->fs_ronly = 0;
 321                         MNT_ILOCK(mp);
 322                         mp->mnt_flag &= ~MNT_RDONLY;
 323                         MNT_IUNLOCK(mp);
 324                         fs->fs_clean = 0;
 325                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 326                                 vn_finished_write(mp);
 327                                 return (error);
 328                         }
 329                         /* check to see if we need to start softdep */
 330                         if ((fs->fs_flags & FS_DOSOFTDEP) &&
 331                             (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
 332                                 vn_finished_write(mp);
 333                                 return (error);
 334                         }
 335                         if (fs->fs_snapinum[0] != 0)
 336                                 ffs_snapshot_mount(mp);
 337                         vn_finished_write(mp);
 338                 }
 339                 /*
 340                  * Soft updates is incompatible with "async",
 341                  * so if we are doing softupdates stop the user
 342                  * from setting the async flag in an update.
 343                  * Softdep_mount() clears it in an initial mount
 344                  * or ro->rw remount.
 345                  */
 346                 if (mp->mnt_flag & MNT_SOFTDEP) {
 347                         /* XXX: Reset too late ? */
 348                         MNT_ILOCK(mp);
 349                         mp->mnt_flag &= ~MNT_ASYNC;
 350                         MNT_IUNLOCK(mp);
 351                 }
 352                 /*
 353                  * Keep MNT_ACLS flag if it is stored in superblock.
 354                  */
 355                 if ((fs->fs_flags & FS_ACLS) != 0) {
 356                         /* XXX: Set too late ? */
 357                         MNT_ILOCK(mp);
 358                         mp->mnt_flag |= MNT_ACLS;
 359                         MNT_IUNLOCK(mp);
 360                 }
 361
 362                 /*
 363                  * If this is a snapshot request, take the snapshot.
 364                  */
 365                 if (mp->mnt_flag & MNT_SNAPSHOT)
 366                         return (ffs_snapshot(mp, fspec));
 367         }
 368
 369         /*
 370          * Not an update, or updating the name: look up the name
 371          * and verify that it refers to a sensible disk device.
 372          */
 373         NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
 374         if ((error = namei(&ndp)) != 0)
 375                 return (error);
 376         NDFREE(&ndp, NDF_ONLY_PNBUF);
 377         devvp = ndp.ni_vp;
 378         if (!vn_isdisk(devvp, &error)) {
 379                 vput(devvp);
 380                 return (error);
 381         }
 382
 383         /*
 384          * If mount by non-root, then verify that user has necessary
 385          * permissions on the device.
 386          */
 387         accessmode = VREAD;
 388         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 389                 accessmode |= VWRITE;
 390         error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td);
 391         if (error)
 392                 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 393         if (error) {
 394                 vput(devvp);
 395                 return (error);
 396         }
 397
 398         if (mp->mnt_flag & MNT_UPDATE) {
 399                 /*
 400                  * Update only
 401                  *
 402                  * If it's not the same vnode, or at least the same device
 403                  * then it's not correct.
 404                  */
 405
 406                 if (devvp->v_rdev != ump->um_devvp->v_rdev)
 407                         error = EINVAL; /* needs translation */
 408                 vput(devvp);
 409                 if (error)
 410                         return (error);
 411         } else {
 412                 /*
 413                  * New mount
 414                  *
 415                  * We need the name for the mount point (also used for
 416                  * "last mounted on") copied in. If an error occurs,
 417                  * the mount point is discarded by the upper level code.
 418                  * Note that vfs_mount() populates f_mntonname for us.
 419                  */
 420                 if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
 421                         vrele(devvp);
 422                         return (error);
 423                 }
 424         }
 425         vfs_mountedfrom(mp, fspec);
 426         return (0);
 427 }
 428
 429 /*
 430  * Compatibility with old mount system call.
 431  */
 432
 433 static int
 434 ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
 435 {
 436         struct ufs_args args;
 437         int error;
 438
 439         if (data == NULL)
 440                 return (EINVAL);
 441         error = copyin(data, &args, sizeof args);
 442         if (error)
 443                 return (error);
 444
 445         ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
 446         ma = mount_arg(ma, "export", &args.export, sizeof args.export);
 447         error = kernel_mount(ma, flags);
 448
 449         return (error);
 450 }
 451
 452 /*
 453  * Reload all incore data for a filesystem (used after running fsck on
 454  * the root filesystem and finding things to fix). The filesystem must
 455  * be mounted read-only.
 456  *
 457  * Things to do to update the mount:
 458  *      1) invalidate all cached meta-data.
 459  *      2) re-read superblock from disk.
 460  *      3) re-read summary information from disk.
 461  *      4) invalidate all inactive vnodes.
 462  *      5) invalidate all cached file data.
 463  *      6) re-read inode data for all active vnodes.
 464  */
 465 static int
 466 ffs_reload(struct mount *mp, struct thread *td)
 467 {
 468         struct vnode *vp, *mvp, *devvp;
 469         struct inode *ip;
 470         void *space;
 471         struct buf *bp;
 472         struct fs *fs, *newfs;
 473         struct ufsmount *ump;
 474         ufs2_daddr_t sblockloc;
 475         int i, blks, size, error;
 476         int32_t *lp;
 477
 478         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 479                 return (EINVAL);
 480         ump = VFSTOUFS(mp);
 481         /*
 482          * Step 1: invalidate all cached meta-data.
 483          */
 484         devvp = VFSTOUFS(mp)->um_devvp;
 485         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
 486         if (vinvalbuf(devvp, 0, td, 0, 0) != 0)
 487                 panic("ffs_reload: dirty1");
 488         VOP_UNLOCK(devvp, 0, td);
 489
 490         /*
 491          * Step 2: re-read superblock from disk.
 492          */
 493         fs = VFSTOUFS(mp)->um_fs;
 494         if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
 495             NOCRED, &bp)) != 0)
 496                 return (error);
 497         newfs = (struct fs *)bp->b_data;
 498         if ((newfs->fs_magic != FS_UFS1_MAGIC &&
 499              newfs->fs_magic != FS_UFS2_MAGIC) ||
 500             newfs->fs_bsize > MAXBSIZE ||
 501             newfs->fs_bsize < sizeof(struct fs)) {
 502                         brelse(bp);
 503                         return (EIO);           /* XXX needs translation */
 504         }
 505         /*
 506          * Copy pointer fields back into superblock before copying in   XXX
 507          * new superblock. These should really be in the ufsmount.      XXX
 508          * Note that important parameters (eg fs_ncg) are unchanged.
 509          */
 510         newfs->fs_csp = fs->fs_csp;
 511         newfs->fs_maxcluster = fs->fs_maxcluster;
 512         newfs->fs_contigdirs = fs->fs_contigdirs;
 513         newfs->fs_active = fs->fs_active;
 514         /* The file system is still read-only. */
 515         newfs->fs_ronly = 1;
 516         sblockloc = fs->fs_sblockloc;
 517         bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 518         brelse(bp);
 519         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 520         ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
 521         UFS_LOCK(ump);
 522         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 523                 printf("%s: reload pending error: blocks %jd files %d\n",
 524                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 525                     fs->fs_pendinginodes);
 526                 fs->fs_pendingblocks = 0;
 527                 fs->fs_pendinginodes = 0;
 528         }
 529         UFS_UNLOCK(ump);
 530
 531         /*
 532          * Step 3: re-read summary information from disk.
 533          */
 534         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 535         space = fs->fs_csp;
 536         for (i = 0; i < blks; i += fs->fs_frag) {
 537                 size = fs->fs_bsize;
 538                 if (i + fs->fs_frag > blks)
 539                         size = (blks - i) * fs->fs_fsize;
 540                 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 541                     NOCRED, &bp);
 542                 if (error)
 543                         return (error);
 544                 bcopy(bp->b_data, space, (u_int)size);
 545                 space = (char *)space + size;
 546                 brelse(bp);
 547         }
 548         /*
 549          * We no longer know anything about clusters per cylinder group.
 550          */
 551         if (fs->fs_contigsumsize > 0) {
 552                 lp = fs->fs_maxcluster;
 553                 for (i = 0; i < fs->fs_ncg; i++)
 554                         *lp++ = fs->fs_contigsumsize;
 555         }
 556
 557 loop:
 558         MNT_ILOCK(mp);
 559         MNT_VNODE_FOREACH(vp, mp, mvp) {
 560                 VI_LOCK(vp);
 561                 if (vp->v_iflag & VI_DOOMED) {
 562                         VI_UNLOCK(vp);
 563                         continue;
 564                 }
 565                 MNT_IUNLOCK(mp);
 566                 /*
 567                  * Step 4: invalidate all cached file data.
 568                  */
 569                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 570                         MNT_VNODE_FOREACH_ABORT(mp, mvp);
 571                         goto loop;
 572                 }
 573                 if (vinvalbuf(vp, 0, td, 0, 0))
 574                         panic("ffs_reload: dirty2");
 575                 /*
 576                  * Step 5: re-read inode data for all active vnodes.
 577                  */
 578                 ip = VTOI(vp);
 579                 error =
 580                     bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 581                     (int)fs->fs_bsize, NOCRED, &bp);
 582                 if (error) {
 583                         VOP_UNLOCK(vp, 0, td);
 584                         vrele(vp);
 585                         MNT_VNODE_FOREACH_ABORT(mp, mvp);
 586                         return (error);
 587                 }
 588                 ffs_load_inode(bp, ip, fs, ip->i_number);
 589                 ip->i_effnlink = ip->i_nlink;
 590                 brelse(bp);
 591                 VOP_UNLOCK(vp, 0, td);
 592                 vrele(vp);
 593                 MNT_ILOCK(mp);
 594         }
 595         MNT_IUNLOCK(mp);
 596         return (0);
 597 }
 598
 599 /*
 600  * Possible superblock locations ordered from most to least likely.
 601  */
 602 static int sblock_try[] = SBLOCKSEARCH;
 603
 604 /*
 605  * Common code for mount and mountroot
 606  */
 607 static int
 608 ffs_mountfs(devvp, mp, td)
 609         struct vnode *devvp;
 610         struct mount *mp;
 611         struct thread *td;
 612 {
 613         struct ufsmount *ump;
 614         struct buf *bp;
 615         struct fs *fs;
 616         struct cdev *dev;
 617         void *space;
 618         ufs2_daddr_t sblockloc;
 619         int error, i, blks, size, ronly;
 620         int32_t *lp;
 621         struct ucred *cred;
 622         struct g_consumer *cp;
 623         struct mount *nmp;
 624
 625         bp = NULL;
 626         ump = NULL;
 627         cred = td ? td->td_ucred : NOCRED;
 628         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 629
 630         dev = devvp->v_rdev;
 631         dev_ref(dev);
 632         DROP_GIANT();
 633         g_topology_lock();
 634         error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
 635
 636         /*
 637          * If we are a root mount, drop the E flag so fsck can do its magic.
 638          * We will pick it up again when we remount R/W.
 639          */
 640         if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
 641                 error = g_access(cp, 0, 0, -1);
 642         g_topology_unlock();
 643         PICKUP_GIANT();
 644         VOP_UNLOCK(devvp, 0, td);
 645         if (error)
 646                 goto out;
 647         if (devvp->v_rdev->si_iosize_max != 0)
 648                 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
 649         if (mp->mnt_iosize_max > MAXPHYS)
 650                 mp->mnt_iosize_max = MAXPHYS;
 651
 652         devvp->v_bufobj.bo_private = cp;
 653         devvp->v_bufobj.bo_ops = &ffs_ops;
 654
 655         fs = NULL;
 656         sblockloc = 0;
 657         /*
 658          * Try reading the superblock in each of its possible locations.
 659          */
 660         for (i = 0; sblock_try[i] != -1; i++) {
 661                 if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
 662                         error = EINVAL;
 663                         vfs_mount_error(mp,
 664                             "Invalid sectorsize %d for superblock size %d",
 665                             cp->provider->sectorsize, SBLOCKSIZE);
 666                         goto out;
 667                 }
 668                 if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
 669                     cred, &bp)) != 0)
 670                         goto out;
 671                 fs = (struct fs *)bp->b_data;
 672                 sblockloc = sblock_try[i];
 673                 if ((fs->fs_magic == FS_UFS1_MAGIC ||
 674                      (fs->fs_magic == FS_UFS2_MAGIC &&
 675                       (fs->fs_sblockloc == sblockloc ||
 676                        (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
 677                     fs->fs_bsize <= MAXBSIZE &&
 678                     fs->fs_bsize >= sizeof(struct fs))
 679                         break;
 680                 brelse(bp);
 681                 bp = NULL;
 682         }
 683         if (sblock_try[i] == -1) {
 684                 error = EINVAL;         /* XXX needs translation */
 685                 goto out;
 686         }
 687         fs->fs_fmod = 0;
 688         fs->fs_flags &= ~FS_INDEXDIRS;  /* no support for directory indicies */
 689         fs->fs_flags &= ~FS_UNCLEAN;
 690         if (fs->fs_clean == 0) {
 691                 fs->fs_flags |= FS_UNCLEAN;
 692                 if (ronly || (mp->mnt_flag & MNT_FORCE) ||
 693                     ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
 694                      (fs->fs_flags & FS_DOSOFTDEP))) {
 695                         printf(
 696 "WARNING: %s was not properly dismounted\n",
 697                             fs->fs_fsmnt);
 698                 } else {
 699                         printf(
 700 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 701                             fs->fs_fsmnt);
 702                         error = EPERM;
 703                         goto out;
 704                 }
 705                 if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
 706                     (mp->mnt_flag & MNT_FORCE)) {
 707                         printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
 708                             (intmax_t)fs->fs_pendingblocks,
 709                             fs->fs_pendinginodes);
 710                         fs->fs_pendingblocks = 0;
 711                         fs->fs_pendinginodes = 0;
 712                 }
 713         }
 714         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 715                 printf("%s: mount pending error: blocks %jd files %d\n",
 716                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 717                     fs->fs_pendinginodes);
 718                 fs->fs_pendingblocks = 0;
 719                 fs->fs_pendinginodes = 0;
 720         }
 721         if ((fs->fs_flags & FS_GJOURNAL) != 0) {
 722 #ifdef UFS_GJOURNAL
 723                 /*
 724                  * Get journal provider name.
 725                  */
 726                 size = 1024;
 727                 mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
 728                 if (g_io_getattr("GJOURNAL::provider", cp, &size,
 729                     mp->mnt_gjprovider) == 0) {
 730                         mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
 731                             M_UFSMNT, M_WAITOK);
 732                         MNT_ILOCK(mp);
 733                         mp->mnt_flag |= MNT_GJOURNAL;
 734                         MNT_IUNLOCK(mp);
 735                 } else {
 736                         printf(
 737 "WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
 738                             mp->mnt_stat.f_mntonname);
 739                         free(mp->mnt_gjprovider, M_UFSMNT);
 740                         mp->mnt_gjprovider = NULL;
 741                 }
 742 #else
 743                 printf(
 744 "WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
 745                     mp->mnt_stat.f_mntonname);
 746 #endif
 747         } else {
 748                 mp->mnt_gjprovider = NULL;
 749         }
 750         ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
 751         ump->um_cp = cp;
 752         ump->um_bo = &devvp->v_bufobj;
 753         ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
 754         if (fs->fs_magic == FS_UFS1_MAGIC) {
 755                 ump->um_fstype = UFS1;
 756                 ump->um_balloc = ffs_balloc_ufs1;
 757         } else {
 758                 ump->um_fstype = UFS2;
 759                 ump->um_balloc = ffs_balloc_ufs2;
 760         }
 761         ump->um_blkatoff = ffs_blkatoff;
 762         ump->um_truncate = ffs_truncate;
 763         ump->um_update = ffs_update;
 764         ump->um_valloc = ffs_valloc;
 765         ump->um_vfree = ffs_vfree;
 766         ump->um_ifree = ffs_ifree;
 767         ump->um_rdonly = ffs_rdonly;
 768         mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
 769         bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
 770         if (fs->fs_sbsize < SBLOCKSIZE)
 771                 bp->b_flags |= B_INVAL | B_NOCACHE;
 772         brelse(bp);
 773         bp = NULL;
 774         fs = ump->um_fs;
 775         ffs_oldfscompat_read(fs, ump, sblockloc);
 776         fs->fs_ronly = ronly;
 777         size = fs->fs_cssize;
 778         blks = howmany(size, fs->fs_fsize);
 779         if (fs->fs_contigsumsize > 0)
 780                 size += fs->fs_ncg * sizeof(int32_t);
 781         size += fs->fs_ncg * sizeof(u_int8_t);
 782         space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
 783         fs->fs_csp = space;
 784         for (i = 0; i < blks; i += fs->fs_frag) {
 785                 size = fs->fs_bsize;
 786                 if (i + fs->fs_frag > blks)
 787                         size = (blks - i) * fs->fs_fsize;
 788                 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 789                     cred, &bp)) != 0) {
 790                         free(fs->fs_csp, M_UFSMNT);
 791                         goto out;
 792                 }
 793                 bcopy(bp->b_data, space, (u_int)size);
 794                 space = (char *)space + size;
 795                 brelse(bp);
 796                 bp = NULL;
 797         }
 798         if (fs->fs_contigsumsize > 0) {
 799                 fs->fs_maxcluster = lp = space;
 800                 for (i = 0; i < fs->fs_ncg; i++)
 801                         *lp++ = fs->fs_contigsumsize;
 802                 space = lp;
 803         }
 804         size = fs->fs_ncg * sizeof(u_int8_t);
 805         fs->fs_contigdirs = (u_int8_t *)space;
 806         bzero(fs->fs_contigdirs, size);
 807         fs->fs_active = NULL;
 808         mp->mnt_data = (qaddr_t)ump;
 809         mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
 810         mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
 811         nmp = NULL;
 812         if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
 813             (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
 814                 if (nmp)
 815                         vfs_rel(nmp);
 816                 vfs_getnewfsid(mp);
 817         }
 818         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 819         MNT_ILOCK(mp);
 820         mp->mnt_flag |= MNT_LOCAL;
 821         MNT_IUNLOCK(mp);
 822         if ((fs->fs_flags & FS_MULTILABEL) != 0) {
 823 #ifdef MAC
 824                 MNT_ILOCK(mp);
 825                 mp->mnt_flag |= MNT_MULTILABEL;
 826                 MNT_IUNLOCK(mp);
 827 #else
 828                 printf(
 829 "WARNING: %s: multilabel flag on fs but no MAC support\n",
 830                     mp->mnt_stat.f_mntonname);
 831 #endif
 832         }
 833         if ((fs->fs_flags & FS_ACLS) != 0) {
 834 #ifdef UFS_ACL
 835                 MNT_ILOCK(mp);
 836                 mp->mnt_flag |= MNT_ACLS;
 837                 MNT_IUNLOCK(mp);
 838 #else
 839                 printf(
 840 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
 841                     mp->mnt_stat.f_mntonname);
 842 #endif
 843         }
 844         ump->um_mountp = mp;
 845         ump->um_dev = dev;
 846         ump->um_devvp = devvp;
 847         ump->um_nindir = fs->fs_nindir;
 848         ump->um_bptrtodb = fs->fs_fsbtodb;
 849         ump->um_seqinc = fs->fs_frag;
 850         for (i = 0; i < MAXQUOTAS; i++)
 851                 ump->um_quotas[i] = NULLVP;
 852 #ifdef UFS_EXTATTR
 853         ufs_extattr_uepm_init(&ump->um_extattr);
 854 #endif
 855         /*
 856          * Set FS local "last mounted on" information (NULL pad)
 857          */
 858         bzero(fs->fs_fsmnt, MAXMNTLEN);
 859         strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
 860
 861         if( mp->mnt_flag & MNT_ROOTFS) {
 862                 /*
 863                  * Root mount; update timestamp in mount structure.
 864                  * this will be used by the common root mount code
 865                  * to update the system clock.
 866                  */
 867                 mp->mnt_time = fs->fs_time;
 868         }
 869
 870         if (ronly == 0) {
 871                 if ((fs->fs_flags & FS_DOSOFTDEP) &&
 872                     (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
 873                         free(fs->fs_csp, M_UFSMNT);
 874                         goto out;
 875                 }
 876                 if (fs->fs_snapinum[0] != 0)
 877                         ffs_snapshot_mount(mp);
 878                 fs->fs_fmod = 1;
 879                 fs->fs_clean = 0;
 880                 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
 881         }
 882         /*
 883          * Initialize filesystem stat information in mount struct.
 884          */
 885         MNT_ILOCK(mp);
 886         mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED;
 887         MNT_IUNLOCK(mp);
 888 #ifdef UFS_EXTATTR
 889 #ifdef UFS_EXTATTR_AUTOSTART
 890         /*
 891          *
 892          * Auto-starting does the following:
 893          *      - check for /.attribute in the fs, and extattr_start if so
 894          *      - for each file in .attribute, enable that file with
 895          *        an attribute of the same name.
 896          * Not clear how to report errors -- probably eat them.
 897          * This would all happen while the filesystem was busy/not
 898          * available, so would effectively be "atomic".
 899          */
 900         mp->mnt_stat.f_iosize = fs->fs_bsize;
 901         (void) ufs_extattr_autostart(mp, td);
 902 #endif /* !UFS_EXTATTR_AUTOSTART */
 903 #endif /* !UFS_EXTATTR */
 904         return (0);
 905 out:
 906         if (bp)
 907                 brelse(bp);
 908         if (cp != NULL) {
 909                 DROP_GIANT();
 910                 g_topology_lock();
 911                 g_vfs_close(cp, td);
 912                 g_topology_unlock();
 913                 PICKUP_GIANT();
 914         }
 915         if (ump) {
 916                 mtx_destroy(UFS_MTX(ump));
 917                 if (mp->mnt_gjprovider != NULL) {
 918                         free(mp->mnt_gjprovider, M_UFSMNT);
 919                         mp->mnt_gjprovider = NULL;
 920                 }
 921                 free(ump->um_fs, M_UFSMNT);
 922                 free(ump, M_UFSMNT);
 923                 mp->mnt_data = (qaddr_t)0;
 924         }
 925         dev_rel(dev);
 926         return (error);
 927 }
 928
 929 #include <sys/sysctl.h>
 930 static int bigcgs = 0;
 931 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
 932
 933 /*
 934  * Sanity checks for loading old filesystem superblocks.
 935  * See ffs_oldfscompat_write below for unwound actions.
 936  *
 937  * XXX - Parts get retired eventually.
 938  * Unfortunately new bits get added.
 939  */
 940 static void
 941 ffs_oldfscompat_read(fs, ump, sblockloc)
 942         struct fs *fs;
 943         struct ufsmount *ump;
 944         ufs2_daddr_t sblockloc;
 945 {
 946         off_t maxfilesize;
 947
 948         /*
 949          * If not yet done, update fs_flags location and value of fs_sblockloc.
 950          */
 951         if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
 952                 fs->fs_flags = fs->fs_old_flags;
 953                 fs->fs_old_flags |= FS_FLAGS_UPDATED;
 954                 fs->fs_sblockloc = sblockloc;
 955         }
 956         /*
 957          * If not yet done, update UFS1 superblock with new wider fields.
 958          */
 959         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
 960                 fs->fs_maxbsize = fs->fs_bsize;
 961                 fs->fs_time = fs->fs_old_time;
 962                 fs->fs_size = fs->fs_old_size;
 963                 fs->fs_dsize = fs->fs_old_dsize;
 964                 fs->fs_csaddr = fs->fs_old_csaddr;
 965                 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
 966                 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
 967                 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
 968                 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
 969         }
 970         if (fs->fs_magic == FS_UFS1_MAGIC &&
 971             fs->fs_old_inodefmt < FS_44INODEFMT) {
 972                 fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
 973                 fs->fs_qbmask = ~fs->fs_bmask;
 974                 fs->fs_qfmask = ~fs->fs_fmask;
 975         }
 976         if (fs->fs_magic == FS_UFS1_MAGIC) {
 977                 ump->um_savedmaxfilesize = fs->fs_maxfilesize;
 978                 maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
 979                 if (fs->fs_maxfilesize > maxfilesize)
 980                         fs->fs_maxfilesize = maxfilesize;
 981         }
 982         /* Compatibility for old filesystems */
 983         if (fs->fs_avgfilesize <= 0)
 984                 fs->fs_avgfilesize = AVFILESIZ;
 985         if (fs->fs_avgfpdir <= 0)
 986                 fs->fs_avgfpdir = AFPDIR;
 987         if (bigcgs) {
 988                 fs->fs_save_cgsize = fs->fs_cgsize;
 989                 fs->fs_cgsize = fs->fs_bsize;
 990         }
 991 }
 992
 993 /*
 994  * Unwinding superblock updates for old filesystems.
 995  * See ffs_oldfscompat_read above for details.
 996  *
 997  * XXX - Parts get retired eventually.
 998  * Unfortunately new bits get added.
 999  */
1000 static void
1001 ffs_oldfscompat_write(fs, ump)
1002         struct fs *fs;
1003         struct ufsmount *ump;
1004 {
1005
1006         /*
1007          * Copy back UFS2 updated fields that UFS1 inspects.
1008          */
1009         if (fs->fs_magic == FS_UFS1_MAGIC) {
1010                 fs->fs_old_time = fs->fs_time;
1011                 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1012                 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1013                 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1014                 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1015                 fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1016         }
1017         if (bigcgs) {
1018                 fs->fs_cgsize = fs->fs_save_cgsize;
1019                 fs->fs_save_cgsize = 0;
1020         }
1021 }
1022
1023 /*
1024  * unmount system call
1025  */
1026 static int
1027 ffs_unmount(mp, mntflags, td)
1028         struct mount *mp;
1029         int mntflags;
1030         struct thread *td;
1031 {
1032         struct ufsmount *ump = VFSTOUFS(mp);
1033         struct fs *fs;
1034         int error, flags, susp;
1035 #ifdef UFS_EXTATTR
1036         int e_restart;
1037 #endif
1038
1039         flags = 0;
1040         fs = ump->um_fs;
1041         if (mntflags & MNT_FORCE) {
1042                 flags |= FORCECLOSE;
1043                 susp = fs->fs_ronly != 0;
1044         } else
1045                 susp = 0;
1046 #ifdef UFS_EXTATTR
1047         if ((error = ufs_extattr_stop(mp, td))) {
1048                 if (error != EOPNOTSUPP)
1049                         printf("ffs_unmount: ufs_extattr_stop returned %d\n",
1050                             error);
1051                 e_restart = 0;
1052         } else {
1053                 ufs_extattr_uepm_destroy(&ump->um_extattr);
1054                 e_restart = 1;
1055         }
1056 #endif
1057         if (susp) {
1058                 /*
1059                  * dounmount already called vn_start_write().
1060                  */
1061                 for (;;) {
1062                         vn_finished_write(mp);
1063                         if ((error = vfs_write_suspend(mp)) != 0)
1064                                 return (error);
1065                         MNT_ILOCK(mp);
1066                         if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
1067                                 mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
1068                                     MNTK_SUSPEND2);
1069                                 wakeup(&mp->mnt_flag);
1070                                 MNT_IUNLOCK(mp);
1071                                 curthread->td_pflags |= TDP_IGNSUSP;
1072                                 break;
1073                         }
1074                         MNT_IUNLOCK(mp);
1075                         vn_start_write(NULL, &mp, V_WAIT);
1076                 }
1077         }
1078         if (mp->mnt_flag & MNT_SOFTDEP) {
1079                 if ((error = softdep_flushfiles(mp, flags, td)) != 0)
1080                         goto fail;
1081         } else {
1082                 if ((error = ffs_flushfiles(mp, flags, td)) != 0)
1083                         goto fail;
1084         }
1085         UFS_LOCK(ump);
1086         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1087                 printf("%s: unmount pending error: blocks %jd files %d\n",
1088                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1089                     fs->fs_pendinginodes);
1090                 fs->fs_pendingblocks = 0;
1091                 fs->fs_pendinginodes = 0;
1092         }
1093         UFS_UNLOCK(ump);
1094         if (fs->fs_ronly == 0) {
1095                 fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1096                 error = ffs_sbupdate(ump, MNT_WAIT, 0);
1097                 if (error) {
1098                         fs->fs_clean = 0;
1099                         goto fail;
1100                 }
1101         }
1102         if (susp) {
1103                 vfs_write_resume(mp);
1104                 vn_start_write(NULL, &mp, V_WAIT);
1105         }
1106         DROP_GIANT();
1107         g_topology_lock();
1108         g_vfs_close(ump->um_cp, td);
1109         g_topology_unlock();
1110         PICKUP_GIANT();
1111         vrele(ump->um_devvp);
1112         dev_rel(ump->um_dev);
1113         mtx_destroy(UFS_MTX(ump));
1114         if (mp->mnt_gjprovider != NULL) {
1115                 free(mp->mnt_gjprovider, M_UFSMNT);
1116                 mp->mnt_gjprovider = NULL;
1117         }
1118         free(fs->fs_csp, M_UFSMNT);
1119         free(fs, M_UFSMNT);
1120         free(ump, M_UFSMNT);
1121         mp->mnt_data = (qaddr_t)0;
1122         MNT_ILOCK(mp);
1123         mp->mnt_flag &= ~MNT_LOCAL;
1124         MNT_IUNLOCK(mp);
1125         return (error);
1126
1127 fail:
1128         if (susp) {
1129                 vfs_write_resume(mp);
1130                 vn_start_write(NULL, &mp, V_WAIT);
1131         }
1132 #ifdef UFS_EXTATTR
1133         if (e_restart) {
1134                 ufs_extattr_uepm_init(&ump->um_extattr);
1135 #ifdef UFS_EXTATTR_AUTOSTART
1136                 (void) ufs_extattr_autostart(mp, td);
1137 #endif
1138         }
1139 #endif
1140
1141         return (error);
1142 }
1143
1144 /*
1145  * Flush out all the files in a filesystem.
1146  */
1147 int
1148 ffs_flushfiles(mp, flags, td)
1149         struct mount *mp;
1150         int flags;
1151         struct thread *td;
1152 {
1153         struct ufsmount *ump;
1154         int error;
1155
1156         ump = VFSTOUFS(mp);
1157 #ifdef QUOTA
1158         if (mp->mnt_flag & MNT_QUOTA) {
1159                 int i;
1160                 error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1161                 if (error)
1162                         return (error);
1163                 for (i = 0; i < MAXQUOTAS; i++) {
1164                         quotaoff(td, mp, i);
1165                 }
1166                 /*
1167                  * Here we fall through to vflush again to ensure
1168                  * that we have gotten rid of all the system vnodes.
1169                  */
1170         }
1171 #endif
1172         ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1173         if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1174                 if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1175                         return (error);
1176                 ffs_snapshot_unmount(mp);
1177                 flags |= FORCECLOSE;
1178                 /*
1179                  * Here we fall through to vflush again to ensure
1180                  * that we have gotten rid of all the system vnodes.
1181                  */
1182         }
1183         /*
1184          * Flush all the files.
1185          */
1186         if ((error = vflush(mp, 0, flags, td)) != 0)
1187                 return (error);
1188         /*
1189          * Flush filesystem metadata.
1190          */
1191         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1192         error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1193         VOP_UNLOCK(ump->um_devvp, 0, td);
1194         return (error);
1195 }
1196
1197 /*
1198  * Get filesystem statistics.
1199  */
1200 static int
1201 ffs_statfs(mp, sbp, td)
1202         struct mount *mp;
1203         struct statfs *sbp;
1204         struct thread *td;
1205 {
1206         struct ufsmount *ump;
1207         struct fs *fs;
1208
1209         ump = VFSTOUFS(mp);
1210         fs = ump->um_fs;
1211         if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1212                 panic("ffs_statfs");
1213         sbp->f_version = STATFS_VERSION;
1214         sbp->f_bsize = fs->fs_fsize;
1215         sbp->f_iosize = fs->fs_bsize;
1216         sbp->f_blocks = fs->fs_dsize;
1217         UFS_LOCK(ump);
1218         sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1219             fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1220         sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1221             dbtofsb(fs, fs->fs_pendingblocks);
1222         sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1223         sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1224         UFS_UNLOCK(ump);
1225         sbp->f_namemax = NAME_MAX;
1226         return (0);
1227 }
1228
1229 /*
1230  * Go through the disk queues to initiate sandbagged IO;
1231  * go through the inodes to write those that have been modified;
1232  * initiate the writing of the super block if it has been modified.
1233  *
1234  * Note: we are always called with the filesystem marked `MPBUSY'.
1235  */
1236 static int
1237 ffs_sync(mp, waitfor, td)
1238         struct mount *mp;
1239         int waitfor;
1240         struct thread *td;
1241 {
1242         struct vnode *mvp, *vp, *devvp;
1243         struct inode *ip;
1244         struct ufsmount *ump = VFSTOUFS(mp);
1245         struct fs *fs;
1246         int error, count, wait, lockreq, allerror = 0;
1247         int suspend;
1248         int suspended;
1249         int secondary_writes;
1250         int secondary_accwrites;
1251         int softdep_deps;
1252         int softdep_accdeps;
1253         struct bufobj *bo;
1254
1255         fs = ump->um_fs;
1256         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
1257                 printf("fs = %s\n", fs->fs_fsmnt);
1258                 panic("ffs_sync: rofs mod");
1259         }
1260         /*
1261          * Write back each (modified) inode.
1262          */
1263         wait = 0;
1264         suspend = 0;
1265         suspended = 0;
1266         lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1267         if (waitfor == MNT_SUSPEND) {
1268                 suspend = 1;
1269                 waitfor = MNT_WAIT;
1270         }
1271         if (waitfor == MNT_WAIT) {
1272                 wait = 1;
1273                 lockreq = LK_EXCLUSIVE;
1274         }
1275         lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1276         MNT_ILOCK(mp);
1277 loop:
1278         /* Grab snapshot of secondary write counts */
1279         secondary_writes = mp->mnt_secondary_writes;
1280         secondary_accwrites = mp->mnt_secondary_accwrites;
1281
1282         /* Grab snapshot of softdep dependency counts */
1283         MNT_IUNLOCK(mp);
1284         softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1285         MNT_ILOCK(mp);
1286
1287         MNT_VNODE_FOREACH(vp, mp, mvp) {
1288                 /*
1289                  * Depend on the mntvnode_slock to keep things stable enough
1290                  * for a quick test.  Since there might be hundreds of
1291                  * thousands of vnodes, we cannot afford even a subroutine
1292                  * call unless there's a good chance that we have work to do.
1293                  */
1294                 VI_LOCK(vp);
1295                 if (vp->v_iflag & VI_DOOMED) {
1296                         VI_UNLOCK(vp);
1297                         continue;
1298                 }
1299                 ip = VTOI(vp);
1300                 if (vp->v_type == VNON || ((ip->i_flag &
1301                     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1302                     vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1303                         VI_UNLOCK(vp);
1304                         continue;
1305                 }
1306                 MNT_IUNLOCK(mp);
1307                 if ((error = vget(vp, lockreq, td)) != 0) {
1308                         MNT_ILOCK(mp);
1309                         if (error == ENOENT || error == ENOLCK) {
1310                                 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1311                                 goto loop;
1312                         }
1313                         continue;
1314                 }
1315                 if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1316                         allerror = error;
1317                 vput(vp);
1318                 MNT_ILOCK(mp);
1319         }
1320         MNT_IUNLOCK(mp);
1321         /*
1322          * Force stale filesystem control information to be flushed.
1323          */
1324         if (waitfor == MNT_WAIT) {
1325                 if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1326                         allerror = error;
1327                 /* Flushed work items may create new vnodes to clean */
1328                 if (allerror == 0 && count) {
1329                         MNT_ILOCK(mp);
1330                         goto loop;
1331                 }
1332         }
1333 #ifdef QUOTA
1334         qsync(mp);
1335 #endif
1336         devvp = ump->um_devvp;
1337         VI_LOCK(devvp);
1338         bo = &devvp->v_bufobj;
1339         if (waitfor != MNT_LAZY &&
1340             (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1341                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1342                 if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1343                         allerror = error;
1344                 VOP_UNLOCK(devvp, 0, td);
1345                 if (allerror == 0 && waitfor == MNT_WAIT) {
1346                         MNT_ILOCK(mp);
1347                         goto loop;
1348                 }
1349         } else if (suspend != 0) {
1350                 if (softdep_check_suspend(mp,
1351                                           devvp,
1352                                           softdep_deps,
1353                                           softdep_accdeps,
1354                                           secondary_writes,
1355                                           secondary_accwrites) != 0)
1356                         goto loop;      /* More work needed */
1357                 mtx_assert(MNT_MTX(mp), MA_OWNED);
1358                 mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1359                 MNT_IUNLOCK(mp);
1360                 suspended = 1;
1361         } else
1362                 VI_UNLOCK(devvp);
1363         /*
1364          * Write back modified superblock.
1365          */
1366         if (fs->fs_fmod != 0 &&
1367             (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1368                 allerror = error;
1369         return (allerror);
1370 }
1371
1372 int
1373 ffs_vget(mp, ino, flags, vpp)
1374         struct mount *mp;
1375         ino_t ino;
1376         int flags;
1377         struct vnode **vpp;
1378 {
1379         return (ffs_vgetf(mp, ino, flags, vpp, 0));
1380 }
1381
1382 int
1383 ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1384         struct mount *mp;
1385         ino_t ino;
1386         int flags;
1387         struct vnode **vpp;
1388         int ffs_flags;
1389 {
1390         struct fs *fs;
1391         struct inode *ip;
1392         struct ufsmount *ump;
1393         struct buf *bp;
1394         struct vnode *vp;
1395         struct cdev *dev;
1396         int error;
1397         struct thread *td;
1398
1399         error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1400         if (error || *vpp != NULL)
1401                 return (error);
1402
1403         /*
1404          * We must promote to an exclusive lock for vnode creation.  This
1405          * can happen if lookup is passed LOCKSHARED.
1406          */
1407         if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1408                 flags &= ~LK_TYPE_MASK;
1409                 flags |= LK_EXCLUSIVE;
1410         }
1411
1412         /*
1413          * We do not lock vnode creation as it is believed to be too
1414          * expensive for such rare case as simultaneous creation of vnode
1415          * for same ino by different processes. We just allow them to race
1416          * and check later to decide who wins. Let the race begin!
1417          */
1418
1419         ump = VFSTOUFS(mp);
1420         dev = ump->um_dev;
1421         fs = ump->um_fs;
1422
1423         /*
1424          * If this MALLOC() is performed after the getnewvnode()
1425          * it might block, leaving a vnode with a NULL v_data to be
1426          * found by ffs_sync() if a sync happens to fire right then,
1427          * which will cause a panic because ffs_sync() blindly
1428          * dereferences vp->v_data (as well it should).
1429          */
1430         ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1431
1432         /* Allocate a new vnode/inode. */
1433         if (fs->fs_magic == FS_UFS1_MAGIC)
1434                 error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1435         else
1436                 error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1437         if (error) {
1438                 *vpp = NULL;
1439                 uma_zfree(uma_inode, ip);
1440                 return (error);
1441         }
1442         /*
1443          * FFS supports recursive and shared locking.
1444          */
1445         vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1446         vp->v_vnlock->lk_flags &= ~LK_NOSHARE;
1447         vp->v_data = ip;
1448         vp->v_bufobj.bo_bsize = fs->fs_bsize;
1449         ip->i_vnode = vp;
1450         ip->i_ump = ump;
1451         ip->i_fs = fs;
1452         ip->i_dev = dev;
1453         ip->i_number = ino;
1454         ip->i_ea_refs = 0;
1455 #ifdef QUOTA
1456         {
1457                 int i;
1458                 for (i = 0; i < MAXQUOTAS; i++)
1459                         ip->i_dquot[i] = NODQUOT;
1460         }
1461 #endif
1462
1463         td = curthread;
1464         lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL, td);
1465         if (ffs_flags & FFSV_FORCEINSMQ)
1466                 vp->v_vflag |= VV_FORCEINSMQ;
1467         error = insmntque(vp, mp);
1468         if (error != 0) {
1469                 *vpp = NULL;
1470                 return (error);
1471         }
1472         vp->v_vflag &= ~VV_FORCEINSMQ;
1473         error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1474         if (error || *vpp != NULL)
1475                 return (error);
1476
1477         /* Read in the disk contents for the inode, copy into the inode. */
1478         error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1479             (int)fs->fs_bsize, NOCRED, &bp);
1480         if (error) {
1481                 /*
1482                  * The inode does not contain anything useful, so it would
1483                  * be misleading to leave it on its hash chain. With mode
1484                  * still zero, it will be unlinked and returned to the free
1485                  * list by vput().
1486                  */
1487                 brelse(bp);
1488                 vput(vp);
1489                 *vpp = NULL;
1490                 return (error);
1491         }
1492         if (ip->i_ump->um_fstype == UFS1)
1493                 ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1494         else
1495                 ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1496         ffs_load_inode(bp, ip, fs, ino);
1497         if (DOINGSOFTDEP(vp))
1498                 softdep_load_inodeblock(ip);
1499         else
1500                 ip->i_effnlink = ip->i_nlink;
1501         bqrelse(bp);
1502
1503         /*
1504          * Initialize the vnode from the inode, check for aliases.
1505          * Note that the underlying vnode may have changed.
1506          */
1507         if (ip->i_ump->um_fstype == UFS1)
1508                 error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1509         else
1510                 error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1511         if (error) {
1512                 vput(vp);
1513                 *vpp = NULL;
1514                 return (error);
1515         }
1516
1517         /*
1518          * Finish inode initialization.
1519          */
1520
1521         /*
1522          * Set up a generation number for this inode if it does not
1523          * already have one. This should only happen on old filesystems.
1524          */
1525         if (ip->i_gen == 0) {
1526                 ip->i_gen = arc4random() / 2 + 1;
1527                 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1528                         ip->i_flag |= IN_MODIFIED;
1529                         DIP_SET(ip, i_gen, ip->i_gen);
1530                 }
1531         }
1532         /*
1533          * Ensure that uid and gid are correct. This is a temporary
1534          * fix until fsck has been changed to do the update.
1535          */
1536         if (fs->fs_magic == FS_UFS1_MAGIC &&            /* XXX */
1537             fs->fs_old_inodefmt < FS_44INODEFMT) {      /* XXX */
1538                 ip->i_uid = ip->i_din1->di_ouid;        /* XXX */
1539                 ip->i_gid = ip->i_din1->di_ogid;        /* XXX */
1540         }                                               /* XXX */
1541
1542 #ifdef MAC
1543         if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1544                 /*
1545                  * If this vnode is already allocated, and we're running
1546                  * multi-label, attempt to perform a label association
1547                  * from the extended attributes on the inode.
1548                  */
1549                 error = mac_associate_vnode_extattr(mp, vp);
1550                 if (error) {
1551                         /* ufs_inactive will release ip->i_devvp ref. */
1552                         vput(vp);
1553                         *vpp = NULL;
1554                         return (error);
1555                 }
1556         }
1557 #endif
1558
1559         *vpp = vp;
1560         return (0);
1561 }
1562
1563 /*
1564  * File handle to vnode
1565  *
1566  * Have to be really careful about stale file handles:
1567  * - check that the inode number is valid
1568  * - call ffs_vget() to get the locked inode
1569  * - check for an unallocated inode (i_mode == 0)
1570  * - check that the given client host has export rights and return
1571  *   those rights via. exflagsp and credanonp
1572  */
1573 static int
1574 ffs_fhtovp(mp, fhp, vpp)
1575         struct mount *mp;
1576         struct fid *fhp;
1577         struct vnode **vpp;
1578 {
1579         struct ufid *ufhp;
1580         struct fs *fs;
1581
1582         ufhp = (struct ufid *)fhp;
1583         fs = VFSTOUFS(mp)->um_fs;
1584         if (ufhp->ufid_ino < ROOTINO ||
1585             ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1586                 return (ESTALE);
1587         return (ufs_fhtovp(mp, ufhp, vpp));
1588 }
1589
1590 /*
1591  * Initialize the filesystem.
1592  */
1593 static int
1594 ffs_init(vfsp)
1595         struct vfsconf *vfsp;
1596 {
1597
1598         softdep_initialize();
1599         return (ufs_init(vfsp));
1600 }
1601
1602 /*
1603  * Undo the work of ffs_init().
1604  */
1605 static int
1606 ffs_uninit(vfsp)
1607         struct vfsconf *vfsp;
1608 {
1609         int ret;
1610
1611         ret = ufs_uninit(vfsp);
1612         softdep_uninitialize();
1613         return (ret);
1614 }
1615
1616 /*
1617  * Write a superblock and associated information back to disk.
1618  */
1619 int
1620 ffs_sbupdate(mp, waitfor, suspended)
1621         struct ufsmount *mp;
1622         int waitfor;
1623         int suspended;
1624 {
1625         struct fs *fs = mp->um_fs;
1626         struct buf *sbbp;
1627         struct buf *bp;
1628         int blks;
1629         void *space;
1630         int i, size, error, allerror = 0;
1631
1632         if (fs->fs_ronly == 1 &&
1633             (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1634             (MNT_RDONLY | MNT_UPDATE))
1635                 panic("ffs_sbupdate: write read-only filesystem");
1636         /*
1637          * We use the superblock's buf to serialize calls to ffs_sbupdate().
1638          */
1639         sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1640             0, 0, 0);
1641         /*
1642          * First write back the summary information.
1643          */
1644         blks = howmany(fs->fs_cssize, fs->fs_fsize);
1645         space = fs->fs_csp;
1646         for (i = 0; i < blks; i += fs->fs_frag) {
1647                 size = fs->fs_bsize;
1648                 if (i + fs->fs_frag > blks)
1649                         size = (blks - i) * fs->fs_fsize;
1650                 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1651                     size, 0, 0, 0);
1652                 bcopy(space, bp->b_data, (u_int)size);
1653                 space = (char *)space + size;
1654                 if (suspended)
1655                         bp->b_flags |= B_VALIDSUSPWRT;
1656                 if (waitfor != MNT_WAIT)
1657                         bawrite(bp);
1658                 else if ((error = bwrite(bp)) != 0)
1659                         allerror = error;
1660         }
1661         /*
1662          * Now write back the superblock itself. If any errors occurred
1663          * up to this point, then fail so that the superblock avoids
1664          * being written out as clean.
1665          */
1666         if (allerror) {
1667                 brelse(sbbp);
1668                 return (allerror);
1669         }
1670         bp = sbbp;
1671         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1672             (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1673                 printf("%s: correcting fs_sblockloc from %jd to %d\n",
1674                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1675                 fs->fs_sblockloc = SBLOCK_UFS1;
1676         }
1677         if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1678             (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1679                 printf("%s: correcting fs_sblockloc from %jd to %d\n",
1680                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1681                 fs->fs_sblockloc = SBLOCK_UFS2;
1682         }
1683         fs->fs_fmod = 0;
1684         fs->fs_time = time_second;
1685         bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1686         ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1687         if (suspended)
1688                 bp->b_flags |= B_VALIDSUSPWRT;
1689         if (waitfor != MNT_WAIT)
1690                 bawrite(bp);
1691         else if ((error = bwrite(bp)) != 0)
1692                 allerror = error;
1693         return (allerror);
1694 }
1695
1696 static int
1697 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1698         int attrnamespace, const char *attrname, struct thread *td)
1699 {
1700
1701 #ifdef UFS_EXTATTR
1702         return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1703             attrname, td));
1704 #else
1705         return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1706             attrname, td));
1707 #endif
1708 }
1709
1710 static void
1711 ffs_ifree(struct ufsmount *ump, struct inode *ip)
1712 {
1713
1714         if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1715                 uma_zfree(uma_ufs1, ip->i_din1);
1716         else if (ip->i_din2 != NULL)
1717                 uma_zfree(uma_ufs2, ip->i_din2);
1718         uma_zfree(uma_inode, ip);
1719 }
1720
1721 static int dobkgrdwrite = 1;
1722 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1723     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1724
1725 /*
1726  * Complete a background write started from bwrite.
1727  */
1728 static void
1729 ffs_backgroundwritedone(struct buf *bp)
1730 {
1731         struct bufobj *bufobj;
1732         struct buf *origbp;
1733
1734         /*
1735          * Find the original buffer that we are writing.
1736          */
1737         bufobj = bp->b_bufobj;
1738         BO_LOCK(bufobj);
1739         if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1740                 panic("backgroundwritedone: lost buffer");
1741         /* Grab an extra reference to be dropped by the bufdone() below. */
1742         bufobj_wrefl(bufobj);
1743         BO_UNLOCK(bufobj);
1744         /*
1745          * Process dependencies then return any unfinished ones.
1746          */
1747         if (!LIST_EMPTY(&bp->b_dep))
1748                 buf_complete(bp);
1749 #ifdef SOFTUPDATES
1750         if (!LIST_EMPTY(&bp->b_dep))
1751                 softdep_move_dependencies(bp, origbp);
1752 #endif
1753         /*
1754          * This buffer is marked B_NOCACHE so when it is released
1755          * by biodone it will be tossed.
1756          */
1757         bp->b_flags |= B_NOCACHE;
1758         bp->b_flags &= ~B_CACHE;
1759         bufdone(bp);
1760         BO_LOCK(bufobj);
1761         /*
1762          * Clear the BV_BKGRDINPROG flag in the original buffer
1763          * and awaken it if it is waiting for the write to complete.
1764          * If BV_BKGRDINPROG is not set in the original buffer it must
1765          * have been released and re-instantiated - which is not legal.
1766          */
1767         KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1768             ("backgroundwritedone: lost buffer2"));
1769         origbp->b_vflags &= ~BV_BKGRDINPROG;
1770         if (origbp->b_vflags & BV_BKGRDWAIT) {
1771                 origbp->b_vflags &= ~BV_BKGRDWAIT;
1772                 wakeup(&origbp->b_xflags);
1773         }
1774         BO_UNLOCK(bufobj);
1775 }
1776
1777
1778 /*
1779  * Write, release buffer on completion.  (Done by iodone
1780  * if async).  Do not bother writing anything if the buffer
1781  * is invalid.
1782  *
1783  * Note that we set B_CACHE here, indicating that buffer is
1784  * fully valid and thus cacheable.  This is true even of NFS
1785  * now so we set it generally.  This could be set either here
1786  * or in biodone() since the I/O is synchronous.  We put it
1787  * here.
1788  */
1789 static int
1790 ffs_bufwrite(struct buf *bp)
1791 {
1792         int oldflags, s;
1793         struct buf *newbp;
1794
1795         CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1796         if (bp->b_flags & B_INVAL) {
1797                 brelse(bp);
1798                 return (0);
1799         }
1800
1801         oldflags = bp->b_flags;
1802
1803         if (BUF_REFCNT(bp) == 0)
1804                 panic("bufwrite: buffer is not busy???");
1805         s = splbio();
1806         /*
1807          * If a background write is already in progress, delay
1808          * writing this block if it is asynchronous. Otherwise
1809          * wait for the background write to complete.
1810          */
1811         BO_LOCK(bp->b_bufobj);
1812         if (bp->b_vflags & BV_BKGRDINPROG) {
1813                 if (bp->b_flags & B_ASYNC) {
1814                         BO_UNLOCK(bp->b_bufobj);
1815                         splx(s);
1816                         bdwrite(bp);
1817                         return (0);
1818                 }
1819                 bp->b_vflags |= BV_BKGRDWAIT;
1820                 msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1821                 if (bp->b_vflags & BV_BKGRDINPROG)
1822                         panic("bufwrite: still writing");
1823         }
1824         BO_UNLOCK(bp->b_bufobj);
1825
1826         /* Mark the buffer clean */
1827         bundirty(bp);
1828
1829         /*
1830          * If this buffer is marked for background writing and we
1831          * do not have to wait for it, make a copy and write the
1832          * copy so as to leave this buffer ready for further use.
1833          *
1834          * This optimization eats a lot of memory.  If we have a page
1835          * or buffer shortfall we can't do it.
1836          */
1837         if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1838             (bp->b_flags & B_ASYNC) &&
1839             !vm_page_count_severe() &&
1840             !buf_dirty_count_severe()) {
1841                 KASSERT(bp->b_iodone == NULL,
1842                     ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1843
1844                 /* get a new block */
1845                 newbp = geteblk(bp->b_bufsize);
1846
1847                 /*
1848                  * set it to be identical to the old block.  We have to
1849                  * set b_lblkno and BKGRDMARKER before calling bgetvp()
1850                  * to avoid confusing the splay tree and gbincore().
1851                  */
1852                 memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1853                 newbp->b_lblkno = bp->b_lblkno;
1854                 newbp->b_xflags |= BX_BKGRDMARKER;
1855                 BO_LOCK(bp->b_bufobj);
1856                 bp->b_vflags |= BV_BKGRDINPROG;
1857                 bgetvp(bp->b_vp, newbp);
1858                 BO_UNLOCK(bp->b_bufobj);
1859                 newbp->b_bufobj = &bp->b_vp->v_bufobj;
1860                 newbp->b_blkno = bp->b_blkno;
1861                 newbp->b_offset = bp->b_offset;
1862                 newbp->b_iodone = ffs_backgroundwritedone;
1863                 newbp->b_flags |= B_ASYNC;
1864                 newbp->b_flags &= ~B_INVAL;
1865
1866 #ifdef SOFTUPDATES
1867                 /* move over the dependencies */
1868                 if (!LIST_EMPTY(&bp->b_dep))
1869                         softdep_move_dependencies(bp, newbp);
1870 #endif
1871
1872                 /*
1873                  * Initiate write on the copy, release the original to
1874                  * the B_LOCKED queue so that it cannot go away until
1875                  * the background write completes. If not locked it could go
1876                  * away and then be reconstituted while it was being written.
1877                  * If the reconstituted buffer were written, we could end up
1878                  * with two background copies being written at the same time.
1879                  */
1880                 bqrelse(bp);
1881                 bp = newbp;
1882         }
1883
1884         /* Let the normal bufwrite do the rest for us */
1885         return (bufwrite(bp));
1886 }
1887
1888
1889 static void
1890 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1891 {
1892         struct vnode *vp;
1893         int error;
1894         struct buf *tbp;
1895
1896         vp = bo->__bo_vnode;
1897         if (bp->b_iocmd == BIO_WRITE) {
1898                 if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
1899                     bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
1900                     (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
1901                         panic("ffs_geom_strategy: bad I/O");
1902                 bp->b_flags &= ~B_VALIDSUSPWRT;
1903                 if ((vp->v_vflag & VV_COPYONWRITE) &&
1904                     vp->v_rdev->si_snapdata != NULL) {
1905                         if ((bp->b_flags & B_CLUSTER) != 0) {
1906                                 runningbufwakeup(bp);
1907                                 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1908                                               b_cluster.cluster_entry) {
1909                                         error = ffs_copyonwrite(vp, tbp);
1910                                         if (error != 0 &&
1911                                             error != EOPNOTSUPP) {
1912                                                 bp->b_error = error;
1913                                                 bp->b_ioflags |= BIO_ERROR;
1914                                                 bufdone(bp);
1915                                                 return;
1916                                         }
1917                                 }
1918                                 bp->b_runningbufspace = bp->b_bufsize;
1919                                 atomic_add_int(&runningbufspace,
1920                                                bp->b_runningbufspace);
1921                         } else {
1922                                 error = ffs_copyonwrite(vp, bp);
1923                                 if (error != 0 && error != EOPNOTSUPP) {
1924                                         bp->b_error = error;
1925                                         bp->b_ioflags |= BIO_ERROR;
1926                                         bufdone(bp);
1927                                         return;
1928                                 }
1929                         }
1930                 }
1931 #ifdef SOFTUPDATES
1932                 if ((bp->b_flags & B_CLUSTER) != 0) {
1933                         TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1934                                       b_cluster.cluster_entry) {
1935                                 if (!LIST_EMPTY(&tbp->b_dep))
1936                                         buf_start(tbp);
1937                         }
1938                 } else {
1939                         if (!LIST_EMPTY(&bp->b_dep))
1940                                 buf_start(bp);
1941                 }
1942
1943 #endif
1944         }
1945         g_vfs_strategy(bo, bp);
1946 }
1947
1948 #ifdef  DDB
1949
1950 static void
1951 db_print_ffs(struct ufsmount *ump)
1952 {
1953         db_printf("mp %p %s devvp %p fs %p su_wl %d su_wl_in %d su_deps %d "
1954                   "su_req %d\n",
1955             ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
1956             ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
1957             ump->softdep_on_worklist_inprogress, ump->softdep_deps,
1958             ump->softdep_req);
1959 }
1960
1961 DB_SHOW_COMMAND(ffs, db_show_ffs)
1962 {
1963         struct mount *mp;
1964         struct ufsmount *ump;
1965
1966         if (have_addr) {
1967                 ump = VFSTOUFS((struct mount *)addr);
1968                 db_print_ffs(ump);
1969                 return;
1970         }
1971
1972         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
1973                 if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
1974                         db_print_ffs(VFSTOUFS(mp));
1975         }
1976 }
1977
1978 #endif  /* DDB */