sys/ufs/ffs/ffs_vfsops.c

   1 /*-
   2  * Copyright (c) 1989, 1991, 1993, 1994
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 4. Neither the name of the University nor the names of its contributors
  14  *    may be used to endorse or promote products derived from this software
  15  *    without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  *
  29  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
  30  */
  31
  32 #include <sys/cdefs.h>
  33 __FBSDID("$FreeBSD$");
  34
  35 #include "opt_quota.h"
  36 #include "opt_ufs.h"
  37 #include "opt_ffs.h"
  38 #include "opt_ddb.h"
  39
  40 #include <sys/param.h>
  41 #include <sys/systm.h>
  42 #include <sys/namei.h>
  43 #include <sys/priv.h>
  44 #include <sys/proc.h>
  45 #include <sys/kernel.h>
  46 #include <sys/vnode.h>
  47 #include <sys/mount.h>
  48 #include <sys/bio.h>
  49 #include <sys/buf.h>
  50 #include <sys/conf.h>
  51 #include <sys/fcntl.h>
  52 #include <sys/malloc.h>
  53 #include <sys/mutex.h>
  54
  55 #include <security/mac/mac_framework.h>
  56
  57 #include <ufs/ufs/extattr.h>
  58 #include <ufs/ufs/gjournal.h>
  59 #include <ufs/ufs/quota.h>
  60 #include <ufs/ufs/ufsmount.h>
  61 #include <ufs/ufs/inode.h>
  62 #include <ufs/ufs/ufs_extern.h>
  63
  64 #include <ufs/ffs/fs.h>
  65 #include <ufs/ffs/ffs_extern.h>
  66
  67 #include <vm/vm.h>
  68 #include <vm/uma.h>
  69 #include <vm/vm_page.h>
  70
  71 #include <geom/geom.h>
  72 #include <geom/geom_vfs.h>
  73
  74 #include <ddb/ddb.h>
  75
  76 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
  77
  78 static int      ffs_reload(struct mount *, struct thread *);
  79 static int      ffs_mountfs(struct vnode *, struct mount *, struct thread *);
  80 static void     ffs_oldfscompat_read(struct fs *, struct ufsmount *,
  81                     ufs2_daddr_t);
  82 static void     ffs_oldfscompat_write(struct fs *, struct ufsmount *);
  83 static void     ffs_ifree(struct ufsmount *ump, struct inode *ip);
  84 static vfs_init_t ffs_init;
  85 static vfs_uninit_t ffs_uninit;
  86 static vfs_extattrctl_t ffs_extattrctl;
  87 static vfs_cmount_t ffs_cmount;
  88 static vfs_unmount_t ffs_unmount;
  89 static vfs_mount_t ffs_mount;
  90 static vfs_statfs_t ffs_statfs;
  91 static vfs_fhtovp_t ffs_fhtovp;
  92 static vfs_sync_t ffs_sync;
  93
  94 static struct vfsops ufs_vfsops = {
  95         .vfs_extattrctl =       ffs_extattrctl,
  96         .vfs_fhtovp =           ffs_fhtovp,
  97         .vfs_init =             ffs_init,
  98         .vfs_mount =            ffs_mount,
  99         .vfs_cmount =           ffs_cmount,
 100         .vfs_quotactl =         ufs_quotactl,
 101         .vfs_root =             ufs_root,
 102         .vfs_statfs =           ffs_statfs,
 103         .vfs_sync =             ffs_sync,
 104         .vfs_uninit =           ffs_uninit,
 105         .vfs_unmount =          ffs_unmount,
 106         .vfs_vget =             ffs_vget,
 107         .vfs_susp_clean =       process_deferred_inactive,
 108 };
 109
 110 VFS_SET(ufs_vfsops, ufs, 0);
 111 MODULE_VERSION(ufs, 1);
 112
 113 static b_strategy_t ffs_geom_strategy;
 114 static b_write_t ffs_bufwrite;
 115
 116 static struct buf_ops ffs_ops = {
 117         .bop_name =     "FFS",
 118         .bop_write =    ffs_bufwrite,
 119         .bop_strategy = ffs_geom_strategy,
 120         .bop_sync =     bufsync,
 121 #ifdef NO_FFS_SNAPSHOT
 122         .bop_bdflush =  bufbdflush,
 123 #else
 124         .bop_bdflush =  ffs_bdflush,
 125 #endif
 126 };
 127
 128 /*
 129  * Note that userquota and groupquota options are not currently used
 130  * by UFS/FFS code and generally mount(8) does not pass those options
 131  * from userland, but they can be passed by loader(8) via
 132  * vfs.root.mountfrom.options.
 133  */
 134 static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
 135     "noclusterw", "noexec", "export", "force", "from", "groupquota",
 136     "multilabel", "nfsv4acls", "snapshot", "nosuid", "suiddir", "nosymfollow",
 137     "sync", "union", "userquota", NULL };
 138
 139 static int
 140 ffs_mount(struct mount *mp)
 141 {
 142         struct vnode *devvp;
 143         struct thread *td;
 144         struct ufsmount *ump = 0;
 145         struct fs *fs;
 146         int error, flags;
 147         u_int mntorflags, mntandnotflags;
 148         accmode_t accmode;
 149         struct nameidata ndp;
 150         char *fspec;
 151
 152         td = curthread;
 153         if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
 154                 return (EINVAL);
 155         if (uma_inode == NULL) {
 156                 uma_inode = uma_zcreate("FFS inode",
 157                     sizeof(struct inode), NULL, NULL, NULL, NULL,
 158                     UMA_ALIGN_PTR, 0);
 159                 uma_ufs1 = uma_zcreate("FFS1 dinode",
 160                     sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
 161                     UMA_ALIGN_PTR, 0);
 162                 uma_ufs2 = uma_zcreate("FFS2 dinode",
 163                     sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
 164                     UMA_ALIGN_PTR, 0);
 165         }
 166
 167         vfs_deleteopt(mp->mnt_optnew, "groupquota");
 168         vfs_deleteopt(mp->mnt_optnew, "userquota");
 169
 170         fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
 171         if (error)
 172                 return (error);
 173
 174         mntorflags = 0;
 175         mntandnotflags = 0;
 176         if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
 177                 mntorflags |= MNT_ACLS;
 178
 179         if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
 180                 mntorflags |= MNT_SNAPSHOT;
 181                 /*
 182                  * Once we have set the MNT_SNAPSHOT flag, do not
 183                  * persist "snapshot" in the options list.
 184                  */
 185                 vfs_deleteopt(mp->mnt_optnew, "snapshot");
 186                 vfs_deleteopt(mp->mnt_opt, "snapshot");
 187         }
 188
 189         if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
 190                 if (mntorflags & MNT_ACLS) {
 191                         printf("WARNING: \"acls\" and \"nfsv4acls\" "
 192                             "options are mutually exclusive\n");
 193                         return (EINVAL);
 194                 }
 195                 mntorflags |= MNT_NFS4ACLS;
 196         }
 197
 198         MNT_ILOCK(mp);
 199         mp->mnt_flag = (mp->mnt_flag | mntorflags) & ~mntandnotflags;
 200         MNT_IUNLOCK(mp);
 201         /*
 202          * If updating, check whether changing from read-only to
 203          * read/write; if there is no device name, that's all we do.
 204          */
 205         if (mp->mnt_flag & MNT_UPDATE) {
 206                 ump = VFSTOUFS(mp);
 207                 fs = ump->um_fs;
 208                 devvp = ump->um_devvp;
 209                 if (fs->fs_ronly == 0 &&
 210                     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 211                         /*
 212                          * Flush any dirty data and suspend filesystem.
 213                          */
 214                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 215                                 return (error);
 216                         for (;;) {
 217                                 vn_finished_write(mp);
 218                                 if ((error = vfs_write_suspend(mp)) != 0)
 219                                         return (error);
 220                                 MNT_ILOCK(mp);
 221                                 if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
 222                                         /*
 223                                          * Allow the secondary writes
 224                                          * to proceed.
 225                                          */
 226                                         mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
 227                                             MNTK_SUSPEND2);
 228                                         wakeup(&mp->mnt_flag);
 229                                         MNT_IUNLOCK(mp);
 230                                         /*
 231                                          * Allow the curthread to
 232                                          * ignore the suspension to
 233                                          * synchronize on-disk state.
 234                                          */
 235                                         td->td_pflags |= TDP_IGNSUSP;
 236                                         break;
 237                                 }
 238                                 MNT_IUNLOCK(mp);
 239                                 vn_start_write(NULL, &mp, V_WAIT);
 240                         }
 241                         /*
 242                          * Check for and optionally get rid of files open
 243                          * for writing.
 244                          */
 245                         flags = WRITECLOSE;
 246                         if (mp->mnt_flag & MNT_FORCE)
 247                                 flags |= FORCECLOSE;
 248                         if (mp->mnt_flag & MNT_SOFTDEP) {
 249                                 error = softdep_flushfiles(mp, flags, td);
 250                         } else {
 251                                 error = ffs_flushfiles(mp, flags, td);
 252                         }
 253                         if (error) {
 254                                 vfs_write_resume(mp);
 255                                 return (error);
 256                         }
 257                         if (fs->fs_pendingblocks != 0 ||
 258                             fs->fs_pendinginodes != 0) {
 259                                 printf("%s: %s: blocks %jd files %d\n",
 260                                     fs->fs_fsmnt, "update error",
 261                                     (intmax_t)fs->fs_pendingblocks,
 262                                     fs->fs_pendinginodes);
 263                                 fs->fs_pendingblocks = 0;
 264                                 fs->fs_pendinginodes = 0;
 265                         }
 266                         if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
 267                                 fs->fs_clean = 1;
 268                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 269                                 fs->fs_ronly = 0;
 270                                 fs->fs_clean = 0;
 271                                 vfs_write_resume(mp);
 272                                 return (error);
 273                         }
 274                         DROP_GIANT();
 275                         g_topology_lock();
 276                         g_access(ump->um_cp, 0, -1, 0);
 277                         g_topology_unlock();
 278                         PICKUP_GIANT();
 279                         fs->fs_ronly = 1;
 280                         MNT_ILOCK(mp);
 281                         mp->mnt_flag |= MNT_RDONLY;
 282                         MNT_IUNLOCK(mp);
 283                         /*
 284                          * Allow the writers to note that filesystem
 285                          * is ro now.
 286                          */
 287                         vfs_write_resume(mp);
 288                 }
 289                 if ((mp->mnt_flag & MNT_RELOAD) &&
 290                     (error = ffs_reload(mp, td)) != 0)
 291                         return (error);
 292                 if (fs->fs_ronly &&
 293                     !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 294                         /*
 295                          * If upgrade to read-write by non-root, then verify
 296                          * that user has necessary permissions on the device.
 297                          */
 298                         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 299                         error = VOP_ACCESS(devvp, VREAD | VWRITE,
 300                             td->td_ucred, td);
 301                         if (error)
 302                                 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 303                         if (error) {
 304                                 VOP_UNLOCK(devvp, 0);
 305                                 return (error);
 306                         }
 307                         VOP_UNLOCK(devvp, 0);
 308                         fs->fs_flags &= ~FS_UNCLEAN;
 309                         if (fs->fs_clean == 0) {
 310                                 fs->fs_flags |= FS_UNCLEAN;
 311                                 if ((mp->mnt_flag & MNT_FORCE) ||
 312                                     ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
 313                                      (fs->fs_flags & FS_DOSOFTDEP))) {
 314                                         printf("WARNING: %s was not %s\n",
 315                                            fs->fs_fsmnt, "properly dismounted");
 316                                 } else {
 317                                         printf(
 318 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 319                                             fs->fs_fsmnt);
 320                                         return (EPERM);
 321                                 }
 322                         }
 323                         DROP_GIANT();
 324                         g_topology_lock();
 325                         /*
 326                          * If we're the root device, we may not have an E count
 327                          * yet, get it now.
 328                          */
 329                         if (ump->um_cp->ace == 0)
 330                                 error = g_access(ump->um_cp, 0, 1, 1);
 331                         else
 332                                 error = g_access(ump->um_cp, 0, 1, 0);
 333                         g_topology_unlock();
 334                         PICKUP_GIANT();
 335                         if (error)
 336                                 return (error);
 337                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 338                                 return (error);
 339                         fs->fs_ronly = 0;
 340                         MNT_ILOCK(mp);
 341                         mp->mnt_flag &= ~MNT_RDONLY;
 342                         MNT_IUNLOCK(mp);
 343                         fs->fs_clean = 0;
 344                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 345                                 vn_finished_write(mp);
 346                                 return (error);
 347                         }
 348                         /* check to see if we need to start softdep */
 349                         if ((fs->fs_flags & FS_DOSOFTDEP) &&
 350                             (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
 351                                 vn_finished_write(mp);
 352                                 return (error);
 353                         }
 354                         if (fs->fs_snapinum[0] != 0)
 355                                 ffs_snapshot_mount(mp);
 356                         vn_finished_write(mp);
 357                 }
 358                 /*
 359                  * Soft updates is incompatible with "async",
 360                  * so if we are doing softupdates stop the user
 361                  * from setting the async flag in an update.
 362                  * Softdep_mount() clears it in an initial mount
 363                  * or ro->rw remount.
 364                  */
 365                 if (mp->mnt_flag & MNT_SOFTDEP) {
 366                         /* XXX: Reset too late ? */
 367                         MNT_ILOCK(mp);
 368                         mp->mnt_flag &= ~MNT_ASYNC;
 369                         MNT_IUNLOCK(mp);
 370                 }
 371                 /*
 372                  * Keep MNT_ACLS flag if it is stored in superblock.
 373                  */
 374                 if ((fs->fs_flags & FS_ACLS) != 0) {
 375                         /* XXX: Set too late ? */
 376                         MNT_ILOCK(mp);
 377                         mp->mnt_flag |= MNT_ACLS;
 378                         MNT_IUNLOCK(mp);
 379                 }
 380
 381                 if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
 382                         /* XXX: Set too late ? */
 383                         MNT_ILOCK(mp);
 384                         mp->mnt_flag |= MNT_NFS4ACLS;
 385                         MNT_IUNLOCK(mp);
 386                 }
 387
 388                 /*
 389                  * If this is a snapshot request, take the snapshot.
 390                  */
 391                 if (mp->mnt_flag & MNT_SNAPSHOT)
 392                         return (ffs_snapshot(mp, fspec));
 393         }
 394
 395         /*
 396          * Not an update, or updating the name: look up the name
 397          * and verify that it refers to a sensible disk device.
 398          */
 399         NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
 400         if ((error = namei(&ndp)) != 0)
 401                 return (error);
 402         NDFREE(&ndp, NDF_ONLY_PNBUF);
 403         devvp = ndp.ni_vp;
 404         if (!vn_isdisk(devvp, &error)) {
 405                 vput(devvp);
 406                 return (error);
 407         }
 408
 409         /*
 410          * If mount by non-root, then verify that user has necessary
 411          * permissions on the device.
 412          */
 413         accmode = VREAD;
 414         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 415                 accmode |= VWRITE;
 416         error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
 417         if (error)
 418                 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 419         if (error) {
 420                 vput(devvp);
 421                 return (error);
 422         }
 423
 424         if (mp->mnt_flag & MNT_UPDATE) {
 425                 /*
 426                  * Update only
 427                  *
 428                  * If it's not the same vnode, or at least the same device
 429                  * then it's not correct.
 430                  */
 431
 432                 if (devvp->v_rdev != ump->um_devvp->v_rdev)
 433                         error = EINVAL; /* needs translation */
 434                 vput(devvp);
 435                 if (error)
 436                         return (error);
 437         } else {
 438                 /*
 439                  * New mount
 440                  *
 441                  * We need the name for the mount point (also used for
 442                  * "last mounted on") copied in. If an error occurs,
 443                  * the mount point is discarded by the upper level code.
 444                  * Note that vfs_mount() populates f_mntonname for us.
 445                  */
 446                 if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
 447                         vrele(devvp);
 448                         return (error);
 449                 }
 450         }
 451         vfs_mountedfrom(mp, fspec);
 452         return (0);
 453 }
 454
 455 /*
 456  * Compatibility with old mount system call.
 457  */
 458
 459 static int
 460 ffs_cmount(struct mntarg *ma, void *data, int flags)
 461 {
 462         struct ufs_args args;
 463         struct export_args exp;
 464         int error;
 465
 466         if (data == NULL)
 467                 return (EINVAL);
 468         error = copyin(data, &args, sizeof args);
 469         if (error)
 470                 return (error);
 471         vfs_oexport_conv(&args.export, &exp);
 472
 473         ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
 474         ma = mount_arg(ma, "export", &exp, sizeof(exp));
 475         error = kernel_mount(ma, flags);
 476
 477         return (error);
 478 }
 479
 480 /*
 481  * Reload all incore data for a filesystem (used after running fsck on
 482  * the root filesystem and finding things to fix). The filesystem must
 483  * be mounted read-only.
 484  *
 485  * Things to do to update the mount:
 486  *      1) invalidate all cached meta-data.
 487  *      2) re-read superblock from disk.
 488  *      3) re-read summary information from disk.
 489  *      4) invalidate all inactive vnodes.
 490  *      5) invalidate all cached file data.
 491  *      6) re-read inode data for all active vnodes.
 492  */
 493 static int
 494 ffs_reload(struct mount *mp, struct thread *td)
 495 {
 496         struct vnode *vp, *mvp, *devvp;
 497         struct inode *ip;
 498         void *space;
 499         struct buf *bp;
 500         struct fs *fs, *newfs;
 501         struct ufsmount *ump;
 502         ufs2_daddr_t sblockloc;
 503         int i, blks, size, error;
 504         int32_t *lp;
 505
 506         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 507                 return (EINVAL);
 508         ump = VFSTOUFS(mp);
 509         /*
 510          * Step 1: invalidate all cached meta-data.
 511          */
 512         devvp = VFSTOUFS(mp)->um_devvp;
 513         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 514         if (vinvalbuf(devvp, 0, 0, 0) != 0)
 515                 panic("ffs_reload: dirty1");
 516         VOP_UNLOCK(devvp, 0);
 517
 518         /*
 519          * Step 2: re-read superblock from disk.
 520          */
 521         fs = VFSTOUFS(mp)->um_fs;
 522         if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
 523             NOCRED, &bp)) != 0)
 524                 return (error);
 525         newfs = (struct fs *)bp->b_data;
 526         if ((newfs->fs_magic != FS_UFS1_MAGIC &&
 527              newfs->fs_magic != FS_UFS2_MAGIC) ||
 528             newfs->fs_bsize > MAXBSIZE ||
 529             newfs->fs_bsize < sizeof(struct fs)) {
 530                         brelse(bp);
 531                         return (EIO);           /* XXX needs translation */
 532         }
 533         /*
 534          * Copy pointer fields back into superblock before copying in   XXX
 535          * new superblock. These should really be in the ufsmount.      XXX
 536          * Note that important parameters (eg fs_ncg) are unchanged.
 537          */
 538         newfs->fs_csp = fs->fs_csp;
 539         newfs->fs_maxcluster = fs->fs_maxcluster;
 540         newfs->fs_contigdirs = fs->fs_contigdirs;
 541         newfs->fs_active = fs->fs_active;
 542         /* The file system is still read-only. */
 543         newfs->fs_ronly = 1;
 544         sblockloc = fs->fs_sblockloc;
 545         bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 546         brelse(bp);
 547         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 548         ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
 549         UFS_LOCK(ump);
 550         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 551                 printf("%s: reload pending error: blocks %jd files %d\n",
 552                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 553                     fs->fs_pendinginodes);
 554                 fs->fs_pendingblocks = 0;
 555                 fs->fs_pendinginodes = 0;
 556         }
 557         UFS_UNLOCK(ump);
 558
 559         /*
 560          * Step 3: re-read summary information from disk.
 561          */
 562         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 563         space = fs->fs_csp;
 564         for (i = 0; i < blks; i += fs->fs_frag) {
 565                 size = fs->fs_bsize;
 566                 if (i + fs->fs_frag > blks)
 567                         size = (blks - i) * fs->fs_fsize;
 568                 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 569                     NOCRED, &bp);
 570                 if (error)
 571                         return (error);
 572                 bcopy(bp->b_data, space, (u_int)size);
 573                 space = (char *)space + size;
 574                 brelse(bp);
 575         }
 576         /*
 577          * We no longer know anything about clusters per cylinder group.
 578          */
 579         if (fs->fs_contigsumsize > 0) {
 580                 lp = fs->fs_maxcluster;
 581                 for (i = 0; i < fs->fs_ncg; i++)
 582                         *lp++ = fs->fs_contigsumsize;
 583         }
 584
 585 loop:
 586         MNT_ILOCK(mp);
 587         MNT_VNODE_FOREACH(vp, mp, mvp) {
 588                 VI_LOCK(vp);
 589                 if (vp->v_iflag & VI_DOOMED) {
 590                         VI_UNLOCK(vp);
 591                         continue;
 592                 }
 593                 MNT_IUNLOCK(mp);
 594                 /*
 595                  * Step 4: invalidate all cached file data.
 596                  */
 597                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 598                         MNT_VNODE_FOREACH_ABORT(mp, mvp);
 599                         goto loop;
 600                 }
 601                 if (vinvalbuf(vp, 0, 0, 0))
 602                         panic("ffs_reload: dirty2");
 603                 /*
 604                  * Step 5: re-read inode data for all active vnodes.
 605                  */
 606                 ip = VTOI(vp);
 607                 error =
 608                     bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 609                     (int)fs->fs_bsize, NOCRED, &bp);
 610                 if (error) {
 611                         VOP_UNLOCK(vp, 0);
 612                         vrele(vp);
 613                         MNT_VNODE_FOREACH_ABORT(mp, mvp);
 614                         return (error);
 615                 }
 616                 ffs_load_inode(bp, ip, fs, ip->i_number);
 617                 ip->i_effnlink = ip->i_nlink;
 618                 brelse(bp);
 619                 VOP_UNLOCK(vp, 0);
 620                 vrele(vp);
 621                 MNT_ILOCK(mp);
 622         }
 623         MNT_IUNLOCK(mp);
 624         return (0);
 625 }
 626
 627 /*
 628  * Possible superblock locations ordered from most to least likely.
 629  */
 630 static int sblock_try[] = SBLOCKSEARCH;
 631
 632 /*
 633  * Common code for mount and mountroot
 634  */
 635 static int
 636 ffs_mountfs(devvp, mp, td)
 637         struct vnode *devvp;
 638         struct mount *mp;
 639         struct thread *td;
 640 {
 641         struct ufsmount *ump;
 642         struct buf *bp;
 643         struct fs *fs;
 644         struct cdev *dev;
 645         void *space;
 646         ufs2_daddr_t sblockloc;
 647         int error, i, blks, size, ronly;
 648         int32_t *lp;
 649         struct ucred *cred;
 650         struct g_consumer *cp;
 651         struct mount *nmp;
 652
 653         bp = NULL;
 654         ump = NULL;
 655         cred = td ? td->td_ucred : NOCRED;
 656         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 657
 658         dev = devvp->v_rdev;
 659         dev_ref(dev);
 660         DROP_GIANT();
 661         g_topology_lock();
 662         error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
 663
 664         /*
 665          * If we are a root mount, drop the E flag so fsck can do its magic.
 666          * We will pick it up again when we remount R/W.
 667          */
 668         if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
 669                 error = g_access(cp, 0, 0, -1);
 670         g_topology_unlock();
 671         PICKUP_GIANT();
 672         VOP_UNLOCK(devvp, 0);
 673         if (error)
 674                 goto out;
 675         if (devvp->v_rdev->si_iosize_max != 0)
 676                 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
 677         if (mp->mnt_iosize_max > MAXPHYS)
 678                 mp->mnt_iosize_max = MAXPHYS;
 679
 680         devvp->v_bufobj.bo_ops = &ffs_ops;
 681
 682         fs = NULL;
 683         sblockloc = 0;
 684         /*
 685          * Try reading the superblock in each of its possible locations.
 686          */
 687         for (i = 0; sblock_try[i] != -1; i++) {
 688                 if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
 689                         error = EINVAL;
 690                         vfs_mount_error(mp,
 691                             "Invalid sectorsize %d for superblock size %d",
 692                             cp->provider->sectorsize, SBLOCKSIZE);
 693                         goto out;
 694                 }
 695                 if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
 696                     cred, &bp)) != 0)
 697                         goto out;
 698                 fs = (struct fs *)bp->b_data;
 699                 sblockloc = sblock_try[i];
 700                 if ((fs->fs_magic == FS_UFS1_MAGIC ||
 701                      (fs->fs_magic == FS_UFS2_MAGIC &&
 702                       (fs->fs_sblockloc == sblockloc ||
 703                        (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
 704                     fs->fs_bsize <= MAXBSIZE &&
 705                     fs->fs_bsize >= sizeof(struct fs))
 706                         break;
 707                 brelse(bp);
 708                 bp = NULL;
 709         }
 710         if (sblock_try[i] == -1) {
 711                 error = EINVAL;         /* XXX needs translation */
 712                 goto out;
 713         }
 714         fs->fs_fmod = 0;
 715         fs->fs_flags &= ~FS_INDEXDIRS;  /* no support for directory indicies */
 716         fs->fs_flags &= ~FS_UNCLEAN;
 717         if (fs->fs_clean == 0) {
 718                 fs->fs_flags |= FS_UNCLEAN;
 719                 if (ronly || (mp->mnt_flag & MNT_FORCE) ||
 720                     ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
 721                      (fs->fs_flags & FS_DOSOFTDEP))) {
 722                         printf(
 723 "WARNING: %s was not properly dismounted\n",
 724                             fs->fs_fsmnt);
 725                 } else {
 726                         printf(
 727 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 728                             fs->fs_fsmnt);
 729                         error = EPERM;
 730                         goto out;
 731                 }
 732                 if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
 733                     (mp->mnt_flag & MNT_FORCE)) {
 734                         printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
 735                             (intmax_t)fs->fs_pendingblocks,
 736                             fs->fs_pendinginodes);
 737                         fs->fs_pendingblocks = 0;
 738                         fs->fs_pendinginodes = 0;
 739                 }
 740         }
 741         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 742                 printf("%s: mount pending error: blocks %jd files %d\n",
 743                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 744                     fs->fs_pendinginodes);
 745                 fs->fs_pendingblocks = 0;
 746                 fs->fs_pendinginodes = 0;
 747         }
 748         if ((fs->fs_flags & FS_GJOURNAL) != 0) {
 749 #ifdef UFS_GJOURNAL
 750                 /*
 751                  * Get journal provider name.
 752                  */
 753                 size = 1024;
 754                 mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
 755                 if (g_io_getattr("GJOURNAL::provider", cp, &size,
 756                     mp->mnt_gjprovider) == 0) {
 757                         mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
 758                             M_UFSMNT, M_WAITOK);
 759                         MNT_ILOCK(mp);
 760                         mp->mnt_flag |= MNT_GJOURNAL;
 761                         MNT_IUNLOCK(mp);
 762                 } else {
 763                         printf(
 764 "WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
 765                             mp->mnt_stat.f_mntonname);
 766                         free(mp->mnt_gjprovider, M_UFSMNT);
 767                         mp->mnt_gjprovider = NULL;
 768                 }
 769 #else
 770                 printf(
 771 "WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
 772                     mp->mnt_stat.f_mntonname);
 773 #endif
 774         } else {
 775                 mp->mnt_gjprovider = NULL;
 776         }
 777         ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
 778         ump->um_cp = cp;
 779         ump->um_bo = &devvp->v_bufobj;
 780         ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
 781         if (fs->fs_magic == FS_UFS1_MAGIC) {
 782                 ump->um_fstype = UFS1;
 783                 ump->um_balloc = ffs_balloc_ufs1;
 784         } else {
 785                 ump->um_fstype = UFS2;
 786                 ump->um_balloc = ffs_balloc_ufs2;
 787         }
 788         ump->um_blkatoff = ffs_blkatoff;
 789         ump->um_truncate = ffs_truncate;
 790         ump->um_update = ffs_update;
 791         ump->um_valloc = ffs_valloc;
 792         ump->um_vfree = ffs_vfree;
 793         ump->um_ifree = ffs_ifree;
 794         ump->um_rdonly = ffs_rdonly;
 795         ump->um_snapgone = ffs_snapgone;
 796         mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
 797         bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
 798         if (fs->fs_sbsize < SBLOCKSIZE)
 799                 bp->b_flags |= B_INVAL | B_NOCACHE;
 800         brelse(bp);
 801         bp = NULL;
 802         fs = ump->um_fs;
 803         ffs_oldfscompat_read(fs, ump, sblockloc);
 804         fs->fs_ronly = ronly;
 805         size = fs->fs_cssize;
 806         blks = howmany(size, fs->fs_fsize);
 807         if (fs->fs_contigsumsize > 0)
 808                 size += fs->fs_ncg * sizeof(int32_t);
 809         size += fs->fs_ncg * sizeof(u_int8_t);
 810         space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
 811         fs->fs_csp = space;
 812         for (i = 0; i < blks; i += fs->fs_frag) {
 813                 size = fs->fs_bsize;
 814                 if (i + fs->fs_frag > blks)
 815                         size = (blks - i) * fs->fs_fsize;
 816                 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 817                     cred, &bp)) != 0) {
 818                         free(fs->fs_csp, M_UFSMNT);
 819                         goto out;
 820                 }
 821                 bcopy(bp->b_data, space, (u_int)size);
 822                 space = (char *)space + size;
 823                 brelse(bp);
 824                 bp = NULL;
 825         }
 826         if (fs->fs_contigsumsize > 0) {
 827                 fs->fs_maxcluster = lp = space;
 828                 for (i = 0; i < fs->fs_ncg; i++)
 829                         *lp++ = fs->fs_contigsumsize;
 830                 space = lp;
 831         }
 832         size = fs->fs_ncg * sizeof(u_int8_t);
 833         fs->fs_contigdirs = (u_int8_t *)space;
 834         bzero(fs->fs_contigdirs, size);
 835         fs->fs_active = NULL;
 836         mp->mnt_data = ump;
 837         mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
 838         mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
 839         nmp = NULL;
 840         if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
 841             (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
 842                 if (nmp)
 843                         vfs_rel(nmp);
 844                 vfs_getnewfsid(mp);
 845         }
 846         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 847         MNT_ILOCK(mp);
 848         mp->mnt_flag |= MNT_LOCAL;
 849         MNT_IUNLOCK(mp);
 850         if ((fs->fs_flags & FS_MULTILABEL) != 0) {
 851 #ifdef MAC
 852                 MNT_ILOCK(mp);
 853                 mp->mnt_flag |= MNT_MULTILABEL;
 854                 MNT_IUNLOCK(mp);
 855 #else
 856                 printf(
 857 "WARNING: %s: multilabel flag on fs but no MAC support\n",
 858                     mp->mnt_stat.f_mntonname);
 859 #endif
 860         }
 861         if ((fs->fs_flags & FS_ACLS) != 0) {
 862 #ifdef UFS_ACL
 863                 MNT_ILOCK(mp);
 864
 865                 if (mp->mnt_flag & MNT_NFS4ACLS)
 866                         printf("WARNING: ACLs flag on fs conflicts with "
 867                             "\"nfsv4acls\" mount option; option ignored\n");
 868                 mp->mnt_flag &= ~MNT_NFS4ACLS;
 869                 mp->mnt_flag |= MNT_ACLS;
 870
 871                 MNT_IUNLOCK(mp);
 872 #else
 873                 printf(
 874 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
 875                     mp->mnt_stat.f_mntonname);
 876 #endif
 877         }
 878         if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
 879 #ifdef UFS_ACL
 880                 MNT_ILOCK(mp);
 881
 882                 if (mp->mnt_flag & MNT_ACLS)
 883                         printf("WARNING: NFSv4 ACLs flag on fs conflicts with "
 884                             "\"acls\" mount option; option ignored\n");
 885                 mp->mnt_flag &= ~MNT_ACLS;
 886                 mp->mnt_flag |= MNT_NFS4ACLS;
 887
 888                 MNT_IUNLOCK(mp);
 889 #else
 890                 printf(
 891 "WARNING: %s: NFSv4 ACLs flag on fs but no ACLs support\n",
 892                     mp->mnt_stat.f_mntonname);
 893 #endif
 894         }
 895         if ((fs->fs_flags & FS_TRIM) != 0) {
 896                 size = sizeof(int);
 897                 if (g_io_getattr("GEOM::candelete", cp, &size,
 898                     &ump->um_candelete) == 0) {
 899                         if (!ump->um_candelete)
 900                                 printf(
 901 "WARNING: %s: TRIM flag on fs but disk does not support TRIM\n",
 902                                     mp->mnt_stat.f_mntonname);
 903                 } else {
 904                         printf(
 905 "WARNING: %s: TRIM flag on fs but cannot get whether disk supports TRIM\n",
 906                             mp->mnt_stat.f_mntonname);
 907                         ump->um_candelete = 0;
 908                 }
 909         }
 910
 911         ump->um_mountp = mp;
 912         ump->um_dev = dev;
 913         ump->um_devvp = devvp;
 914         ump->um_nindir = fs->fs_nindir;
 915         ump->um_bptrtodb = fs->fs_fsbtodb;
 916         ump->um_seqinc = fs->fs_frag;
 917         for (i = 0; i < MAXQUOTAS; i++)
 918                 ump->um_quotas[i] = NULLVP;
 919 #ifdef UFS_EXTATTR
 920         ufs_extattr_uepm_init(&ump->um_extattr);
 921 #endif
 922         /*
 923          * Set FS local "last mounted on" information (NULL pad)
 924          */
 925         bzero(fs->fs_fsmnt, MAXMNTLEN);
 926         strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
 927
 928         if( mp->mnt_flag & MNT_ROOTFS) {
 929                 /*
 930                  * Root mount; update timestamp in mount structure.
 931                  * this will be used by the common root mount code
 932                  * to update the system clock.
 933                  */
 934                 mp->mnt_time = fs->fs_time;
 935         }
 936
 937         if (ronly == 0) {
 938                 if ((fs->fs_flags & FS_DOSOFTDEP) &&
 939                     (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
 940                         free(fs->fs_csp, M_UFSMNT);
 941                         goto out;
 942                 }
 943                 if (fs->fs_snapinum[0] != 0)
 944                         ffs_snapshot_mount(mp);
 945                 fs->fs_fmod = 1;
 946                 fs->fs_clean = 0;
 947                 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
 948         }
 949         /*
 950          * Initialize filesystem stat information in mount struct.
 951          */
 952         MNT_ILOCK(mp);
 953         mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED |
 954             MNTK_EXTENDED_SHARED;
 955         MNT_IUNLOCK(mp);
 956 #ifdef UFS_EXTATTR
 957 #ifdef UFS_EXTATTR_AUTOSTART
 958         /*
 959          *
 960          * Auto-starting does the following:
 961          *      - check for /.attribute in the fs, and extattr_start if so
 962          *      - for each file in .attribute, enable that file with
 963          *        an attribute of the same name.
 964          * Not clear how to report errors -- probably eat them.
 965          * This would all happen while the filesystem was busy/not
 966          * available, so would effectively be "atomic".
 967          */
 968         mp->mnt_stat.f_iosize = fs->fs_bsize;
 969         (void) ufs_extattr_autostart(mp, td);
 970 #endif /* !UFS_EXTATTR_AUTOSTART */
 971 #endif /* !UFS_EXTATTR */
 972         return (0);
 973 out:
 974         if (bp)
 975                 brelse(bp);
 976         if (cp != NULL) {
 977                 DROP_GIANT();
 978                 g_topology_lock();
 979                 g_vfs_close(cp);
 980                 g_topology_unlock();
 981                 PICKUP_GIANT();
 982         }
 983         if (ump) {
 984                 mtx_destroy(UFS_MTX(ump));
 985                 if (mp->mnt_gjprovider != NULL) {
 986                         free(mp->mnt_gjprovider, M_UFSMNT);
 987                         mp->mnt_gjprovider = NULL;
 988                 }
 989                 free(ump->um_fs, M_UFSMNT);
 990                 free(ump, M_UFSMNT);
 991                 mp->mnt_data = NULL;
 992         }
 993         dev_rel(dev);
 994         return (error);
 995 }
 996
 997 #include <sys/sysctl.h>
 998 static int bigcgs = 0;
 999 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
1000
1001 /*
1002  * Sanity checks for loading old filesystem superblocks.
1003  * See ffs_oldfscompat_write below for unwound actions.
1004  *
1005  * XXX - Parts get retired eventually.
1006  * Unfortunately new bits get added.
1007  */
1008 static void
1009 ffs_oldfscompat_read(fs, ump, sblockloc)
1010         struct fs *fs;
1011         struct ufsmount *ump;
1012         ufs2_daddr_t sblockloc;
1013 {
1014         off_t maxfilesize;
1015
1016         /*
1017          * If not yet done, update fs_flags location and value of fs_sblockloc.
1018          */
1019         if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1020                 fs->fs_flags = fs->fs_old_flags;
1021                 fs->fs_old_flags |= FS_FLAGS_UPDATED;
1022                 fs->fs_sblockloc = sblockloc;
1023         }
1024         /*
1025          * If not yet done, update UFS1 superblock with new wider fields.
1026          */
1027         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
1028                 fs->fs_maxbsize = fs->fs_bsize;
1029                 fs->fs_time = fs->fs_old_time;
1030                 fs->fs_size = fs->fs_old_size;
1031                 fs->fs_dsize = fs->fs_old_dsize;
1032                 fs->fs_csaddr = fs->fs_old_csaddr;
1033                 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1034                 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1035                 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1036                 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1037         }
1038         if (fs->fs_magic == FS_UFS1_MAGIC &&
1039             fs->fs_old_inodefmt < FS_44INODEFMT) {
1040                 fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
1041                 fs->fs_qbmask = ~fs->fs_bmask;
1042                 fs->fs_qfmask = ~fs->fs_fmask;
1043         }
1044         if (fs->fs_magic == FS_UFS1_MAGIC) {
1045                 ump->um_savedmaxfilesize = fs->fs_maxfilesize;
1046                 maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
1047                 if (fs->fs_maxfilesize > maxfilesize)
1048                         fs->fs_maxfilesize = maxfilesize;
1049         }
1050         /* Compatibility for old filesystems */
1051         if (fs->fs_avgfilesize <= 0)
1052                 fs->fs_avgfilesize = AVFILESIZ;
1053         if (fs->fs_avgfpdir <= 0)
1054                 fs->fs_avgfpdir = AFPDIR;
1055         if (bigcgs) {
1056                 fs->fs_save_cgsize = fs->fs_cgsize;
1057                 fs->fs_cgsize = fs->fs_bsize;
1058         }
1059 }
1060
1061 /*
1062  * Unwinding superblock updates for old filesystems.
1063  * See ffs_oldfscompat_read above for details.
1064  *
1065  * XXX - Parts get retired eventually.
1066  * Unfortunately new bits get added.
1067  */
1068 static void
1069 ffs_oldfscompat_write(fs, ump)
1070         struct fs *fs;
1071         struct ufsmount *ump;
1072 {
1073
1074         /*
1075          * Copy back UFS2 updated fields that UFS1 inspects.
1076          */
1077         if (fs->fs_magic == FS_UFS1_MAGIC) {
1078                 fs->fs_old_time = fs->fs_time;
1079                 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1080                 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1081                 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1082                 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1083                 fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1084         }
1085         if (bigcgs) {
1086                 fs->fs_cgsize = fs->fs_save_cgsize;
1087                 fs->fs_save_cgsize = 0;
1088         }
1089 }
1090
1091 /*
1092  * unmount system call
1093  */
1094 static int
1095 ffs_unmount(mp, mntflags)
1096         struct mount *mp;
1097         int mntflags;
1098 {
1099         struct thread *td;
1100         struct ufsmount *ump = VFSTOUFS(mp);
1101         struct fs *fs;
1102         int error, flags, susp;
1103 #ifdef UFS_EXTATTR
1104         int e_restart;
1105 #endif
1106
1107         flags = 0;
1108         td = curthread;
1109         fs = ump->um_fs;
1110         if (mntflags & MNT_FORCE) {
1111                 flags |= FORCECLOSE;
1112                 susp = fs->fs_ronly != 0;
1113         } else
1114                 susp = 0;
1115 #ifdef UFS_EXTATTR
1116         if ((error = ufs_extattr_stop(mp, td))) {
1117                 if (error != EOPNOTSUPP)
1118                         printf("ffs_unmount: ufs_extattr_stop returned %d\n",
1119                             error);
1120                 e_restart = 0;
1121         } else {
1122                 ufs_extattr_uepm_destroy(&ump->um_extattr);
1123                 e_restart = 1;
1124         }
1125 #endif
1126         if (susp) {
1127                 /*
1128                  * dounmount already called vn_start_write().
1129                  */
1130                 for (;;) {
1131                         vn_finished_write(mp);
1132                         if ((error = vfs_write_suspend(mp)) != 0)
1133                                 return (error);
1134                         MNT_ILOCK(mp);
1135                         if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
1136                                 mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
1137                                     MNTK_SUSPEND2);
1138                                 wakeup(&mp->mnt_flag);
1139                                 MNT_IUNLOCK(mp);
1140                                 td->td_pflags |= TDP_IGNSUSP;
1141                                 break;
1142                         }
1143                         MNT_IUNLOCK(mp);
1144                         vn_start_write(NULL, &mp, V_WAIT);
1145                 }
1146         }
1147         if (mp->mnt_flag & MNT_SOFTDEP)
1148                 error = softdep_flushfiles(mp, flags, td);
1149         else
1150                 error = ffs_flushfiles(mp, flags, td);
1151         if (error != 0 && error != ENXIO)
1152                 goto fail;
1153
1154         UFS_LOCK(ump);
1155         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1156                 printf("%s: unmount pending error: blocks %jd files %d\n",
1157                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1158                     fs->fs_pendinginodes);
1159                 fs->fs_pendingblocks = 0;
1160                 fs->fs_pendinginodes = 0;
1161         }
1162         UFS_UNLOCK(ump);
1163         if (fs->fs_ronly == 0) {
1164                 fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1165                 error = ffs_sbupdate(ump, MNT_WAIT, 0);
1166                 if (error && error != ENXIO) {
1167                         fs->fs_clean = 0;
1168                         goto fail;
1169                 }
1170         }
1171         if (susp) {
1172                 vfs_write_resume(mp);
1173                 vn_start_write(NULL, &mp, V_WAIT);
1174         }
1175         DROP_GIANT();
1176         g_topology_lock();
1177         g_vfs_close(ump->um_cp);
1178         g_topology_unlock();
1179         PICKUP_GIANT();
1180         vrele(ump->um_devvp);
1181         dev_rel(ump->um_dev);
1182         mtx_destroy(UFS_MTX(ump));
1183         if (mp->mnt_gjprovider != NULL) {
1184                 free(mp->mnt_gjprovider, M_UFSMNT);
1185                 mp->mnt_gjprovider = NULL;
1186         }
1187         free(fs->fs_csp, M_UFSMNT);
1188         free(fs, M_UFSMNT);
1189         free(ump, M_UFSMNT);
1190         mp->mnt_data = NULL;
1191         MNT_ILOCK(mp);
1192         mp->mnt_flag &= ~MNT_LOCAL;
1193         MNT_IUNLOCK(mp);
1194         return (error);
1195
1196 fail:
1197         if (susp) {
1198                 vfs_write_resume(mp);
1199                 vn_start_write(NULL, &mp, V_WAIT);
1200         }
1201 #ifdef UFS_EXTATTR
1202         if (e_restart) {
1203                 ufs_extattr_uepm_init(&ump->um_extattr);
1204 #ifdef UFS_EXTATTR_AUTOSTART
1205                 (void) ufs_extattr_autostart(mp, td);
1206 #endif
1207         }
1208 #endif
1209
1210         return (error);
1211 }
1212
1213 /*
1214  * Flush out all the files in a filesystem.
1215  */
1216 int
1217 ffs_flushfiles(mp, flags, td)
1218         struct mount *mp;
1219         int flags;
1220         struct thread *td;
1221 {
1222         struct ufsmount *ump;
1223         int error;
1224
1225         ump = VFSTOUFS(mp);
1226 #ifdef QUOTA
1227         if (mp->mnt_flag & MNT_QUOTA) {
1228                 int i;
1229                 error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1230                 if (error)
1231                         return (error);
1232                 for (i = 0; i < MAXQUOTAS; i++) {
1233                         quotaoff(td, mp, i);
1234                 }
1235                 /*
1236                  * Here we fall through to vflush again to ensure
1237                  * that we have gotten rid of all the system vnodes.
1238                  */
1239         }
1240 #endif
1241         ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1242         if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1243                 if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1244                         return (error);
1245                 ffs_snapshot_unmount(mp);
1246                 flags |= FORCECLOSE;
1247                 /*
1248                  * Here we fall through to vflush again to ensure
1249                  * that we have gotten rid of all the system vnodes.
1250                  */
1251         }
1252         /*
1253          * Flush all the files.
1254          */
1255         if ((error = vflush(mp, 0, flags, td)) != 0)
1256                 return (error);
1257         /*
1258          * Flush filesystem metadata.
1259          */
1260         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1261         error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1262         VOP_UNLOCK(ump->um_devvp, 0);
1263         return (error);
1264 }
1265
1266 /*
1267  * Get filesystem statistics.
1268  */
1269 static int
1270 ffs_statfs(mp, sbp)
1271         struct mount *mp;
1272         struct statfs *sbp;
1273 {
1274         struct ufsmount *ump;
1275         struct fs *fs;
1276
1277         ump = VFSTOUFS(mp);
1278         fs = ump->um_fs;
1279         if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1280                 panic("ffs_statfs");
1281         sbp->f_version = STATFS_VERSION;
1282         sbp->f_bsize = fs->fs_fsize;
1283         sbp->f_iosize = fs->fs_bsize;
1284         sbp->f_blocks = fs->fs_dsize;
1285         UFS_LOCK(ump);
1286         sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1287             fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1288         sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1289             dbtofsb(fs, fs->fs_pendingblocks);
1290         sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1291         sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1292         UFS_UNLOCK(ump);
1293         sbp->f_namemax = NAME_MAX;
1294         return (0);
1295 }
1296
1297 /*
1298  * Go through the disk queues to initiate sandbagged IO;
1299  * go through the inodes to write those that have been modified;
1300  * initiate the writing of the super block if it has been modified.
1301  *
1302  * Note: we are always called with the filesystem marked `MPBUSY'.
1303  */
1304 static int
1305 ffs_sync(mp, waitfor)
1306         struct mount *mp;
1307         int waitfor;
1308 {
1309         struct vnode *mvp, *vp, *devvp;
1310         struct thread *td;
1311         struct inode *ip;
1312         struct ufsmount *ump = VFSTOUFS(mp);
1313         struct fs *fs;
1314         int error, count, wait, lockreq, allerror = 0;
1315         int suspend;
1316         int suspended;
1317         int secondary_writes;
1318         int secondary_accwrites;
1319         int softdep_deps;
1320         int softdep_accdeps;
1321         struct bufobj *bo;
1322
1323         td = curthread;
1324         fs = ump->um_fs;
1325         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
1326                 printf("fs = %s\n", fs->fs_fsmnt);
1327                 panic("ffs_sync: rofs mod");
1328         }
1329         /*
1330          * Write back each (modified) inode.
1331          */
1332         wait = 0;
1333         suspend = 0;
1334         suspended = 0;
1335         lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1336         if (waitfor == MNT_SUSPEND) {
1337                 suspend = 1;
1338                 waitfor = MNT_WAIT;
1339         }
1340         if (waitfor == MNT_WAIT) {
1341                 wait = 1;
1342                 lockreq = LK_EXCLUSIVE;
1343         }
1344         lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1345         MNT_ILOCK(mp);
1346 loop:
1347         /* Grab snapshot of secondary write counts */
1348         secondary_writes = mp->mnt_secondary_writes;
1349         secondary_accwrites = mp->mnt_secondary_accwrites;
1350
1351         /* Grab snapshot of softdep dependency counts */
1352         MNT_IUNLOCK(mp);
1353         softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1354         MNT_ILOCK(mp);
1355
1356         MNT_VNODE_FOREACH(vp, mp, mvp) {
1357                 /*
1358                  * Depend on the mntvnode_slock to keep things stable enough
1359                  * for a quick test.  Since there might be hundreds of
1360                  * thousands of vnodes, we cannot afford even a subroutine
1361                  * call unless there's a good chance that we have work to do.
1362                  */
1363                 VI_LOCK(vp);
1364                 if (vp->v_iflag & VI_DOOMED) {
1365                         VI_UNLOCK(vp);
1366                         continue;
1367                 }
1368                 ip = VTOI(vp);
1369                 if (vp->v_type == VNON || ((ip->i_flag &
1370                     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1371                     vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1372                         VI_UNLOCK(vp);
1373                         continue;
1374                 }
1375                 MNT_IUNLOCK(mp);
1376                 if ((error = vget(vp, lockreq, td)) != 0) {
1377                         MNT_ILOCK(mp);
1378                         if (error == ENOENT || error == ENOLCK) {
1379                                 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1380                                 goto loop;
1381                         }
1382                         continue;
1383                 }
1384                 if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1385                         allerror = error;
1386                 vput(vp);
1387                 MNT_ILOCK(mp);
1388         }
1389         MNT_IUNLOCK(mp);
1390         /*
1391          * Force stale filesystem control information to be flushed.
1392          */
1393         if (waitfor == MNT_WAIT) {
1394                 if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1395                         allerror = error;
1396                 /* Flushed work items may create new vnodes to clean */
1397                 if (allerror == 0 && count) {
1398                         MNT_ILOCK(mp);
1399                         goto loop;
1400                 }
1401         }
1402 #ifdef QUOTA
1403         qsync(mp);
1404 #endif
1405         devvp = ump->um_devvp;
1406         bo = &devvp->v_bufobj;
1407         BO_LOCK(bo);
1408         if (waitfor != MNT_LAZY &&
1409             (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1410                 BO_UNLOCK(bo);
1411                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1412                 if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1413                         allerror = error;
1414                 VOP_UNLOCK(devvp, 0);
1415                 if (allerror == 0 && waitfor == MNT_WAIT) {
1416                         MNT_ILOCK(mp);
1417                         goto loop;
1418                 }
1419         } else if (suspend != 0) {
1420                 if (softdep_check_suspend(mp,
1421                                           devvp,
1422                                           softdep_deps,
1423                                           softdep_accdeps,
1424                                           secondary_writes,
1425                                           secondary_accwrites) != 0)
1426                         goto loop;      /* More work needed */
1427                 mtx_assert(MNT_MTX(mp), MA_OWNED);
1428                 mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1429                 MNT_IUNLOCK(mp);
1430                 suspended = 1;
1431         } else
1432                 BO_UNLOCK(bo);
1433         /*
1434          * Write back modified superblock.
1435          */
1436         if (fs->fs_fmod != 0 &&
1437             (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1438                 allerror = error;
1439         return (allerror);
1440 }
1441
1442 int
1443 ffs_vget(mp, ino, flags, vpp)
1444         struct mount *mp;
1445         ino_t ino;
1446         int flags;
1447         struct vnode **vpp;
1448 {
1449         return (ffs_vgetf(mp, ino, flags, vpp, 0));
1450 }
1451
1452 int
1453 ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1454         struct mount *mp;
1455         ino_t ino;
1456         int flags;
1457         struct vnode **vpp;
1458         int ffs_flags;
1459 {
1460         struct fs *fs;
1461         struct inode *ip;
1462         struct ufsmount *ump;
1463         struct buf *bp;
1464         struct vnode *vp;
1465         struct cdev *dev;
1466         int error;
1467
1468         error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1469         if (error || *vpp != NULL)
1470                 return (error);
1471
1472         /*
1473          * We must promote to an exclusive lock for vnode creation.  This
1474          * can happen if lookup is passed LOCKSHARED.
1475          */
1476         if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1477                 flags &= ~LK_TYPE_MASK;
1478                 flags |= LK_EXCLUSIVE;
1479         }
1480
1481         /*
1482          * We do not lock vnode creation as it is believed to be too
1483          * expensive for such rare case as simultaneous creation of vnode
1484          * for same ino by different processes. We just allow them to race
1485          * and check later to decide who wins. Let the race begin!
1486          */
1487
1488         ump = VFSTOUFS(mp);
1489         dev = ump->um_dev;
1490         fs = ump->um_fs;
1491
1492         /*
1493          * If this malloc() is performed after the getnewvnode()
1494          * it might block, leaving a vnode with a NULL v_data to be
1495          * found by ffs_sync() if a sync happens to fire right then,
1496          * which will cause a panic because ffs_sync() blindly
1497          * dereferences vp->v_data (as well it should).
1498          */
1499         ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1500
1501         /* Allocate a new vnode/inode. */
1502         if (fs->fs_magic == FS_UFS1_MAGIC)
1503                 error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1504         else
1505                 error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1506         if (error) {
1507                 *vpp = NULL;
1508                 uma_zfree(uma_inode, ip);
1509                 return (error);
1510         }
1511         /*
1512          * FFS supports recursive locking.
1513          */
1514         VN_LOCK_AREC(vp);
1515         vp->v_data = ip;
1516         vp->v_bufobj.bo_bsize = fs->fs_bsize;
1517         ip->i_vnode = vp;
1518         ip->i_ump = ump;
1519         ip->i_fs = fs;
1520         ip->i_dev = dev;
1521         ip->i_number = ino;
1522         ip->i_ea_refs = 0;
1523 #ifdef QUOTA
1524         {
1525                 int i;
1526                 for (i = 0; i < MAXQUOTAS; i++)
1527                         ip->i_dquot[i] = NODQUOT;
1528         }
1529 #endif
1530
1531         lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1532         if (ffs_flags & FFSV_FORCEINSMQ)
1533                 vp->v_vflag |= VV_FORCEINSMQ;
1534         error = insmntque(vp, mp);
1535         if (error != 0) {
1536                 uma_zfree(uma_inode, ip);
1537                 *vpp = NULL;
1538                 return (error);
1539         }
1540         vp->v_vflag &= ~VV_FORCEINSMQ;
1541         error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1542         if (error || *vpp != NULL)
1543                 return (error);
1544
1545         /* Read in the disk contents for the inode, copy into the inode. */
1546         error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1547             (int)fs->fs_bsize, NOCRED, &bp);
1548         if (error) {
1549                 /*
1550                  * The inode does not contain anything useful, so it would
1551                  * be misleading to leave it on its hash chain. With mode
1552                  * still zero, it will be unlinked and returned to the free
1553                  * list by vput().
1554                  */
1555                 brelse(bp);
1556                 vput(vp);
1557                 *vpp = NULL;
1558                 return (error);
1559         }
1560         if (ip->i_ump->um_fstype == UFS1)
1561                 ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1562         else
1563                 ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1564         ffs_load_inode(bp, ip, fs, ino);
1565         if (DOINGSOFTDEP(vp))
1566                 softdep_load_inodeblock(ip);
1567         else
1568                 ip->i_effnlink = ip->i_nlink;
1569         bqrelse(bp);
1570
1571         /*
1572          * Initialize the vnode from the inode, check for aliases.
1573          * Note that the underlying vnode may have changed.
1574          */
1575         if (ip->i_ump->um_fstype == UFS1)
1576                 error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1577         else
1578                 error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1579         if (error) {
1580                 vput(vp);
1581                 *vpp = NULL;
1582                 return (error);
1583         }
1584
1585         /*
1586          * Finish inode initialization.
1587          */
1588         if (vp->v_type != VFIFO) {
1589                 /* FFS supports shared locking for all files except fifos. */
1590                 VN_LOCK_ASHARE(vp);
1591         }
1592
1593         /*
1594          * Set up a generation number for this inode if it does not
1595          * already have one. This should only happen on old filesystems.
1596          */
1597         if (ip->i_gen == 0) {
1598                 ip->i_gen = arc4random() / 2 + 1;
1599                 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1600                         ip->i_flag |= IN_MODIFIED;
1601                         DIP_SET(ip, i_gen, ip->i_gen);
1602                 }
1603         }
1604         /*
1605          * Ensure that uid and gid are correct. This is a temporary
1606          * fix until fsck has been changed to do the update.
1607          */
1608         if (fs->fs_magic == FS_UFS1_MAGIC &&            /* XXX */
1609             fs->fs_old_inodefmt < FS_44INODEFMT) {      /* XXX */
1610                 ip->i_uid = ip->i_din1->di_ouid;        /* XXX */
1611                 ip->i_gid = ip->i_din1->di_ogid;        /* XXX */
1612         }                                               /* XXX */
1613
1614 #ifdef MAC
1615         if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1616                 /*
1617                  * If this vnode is already allocated, and we're running
1618                  * multi-label, attempt to perform a label association
1619                  * from the extended attributes on the inode.
1620                  */
1621                 error = mac_vnode_associate_extattr(mp, vp);
1622                 if (error) {
1623                         /* ufs_inactive will release ip->i_devvp ref. */
1624                         vput(vp);
1625                         *vpp = NULL;
1626                         return (error);
1627                 }
1628         }
1629 #endif
1630
1631         *vpp = vp;
1632         return (0);
1633 }
1634
1635 /*
1636  * File handle to vnode
1637  *
1638  * Have to be really careful about stale file handles:
1639  * - check that the inode number is valid
1640  * - call ffs_vget() to get the locked inode
1641  * - check for an unallocated inode (i_mode == 0)
1642  * - check that the given client host has export rights and return
1643  *   those rights via. exflagsp and credanonp
1644  */
1645 static int
1646 ffs_fhtovp(mp, fhp, vpp)
1647         struct mount *mp;
1648         struct fid *fhp;
1649         struct vnode **vpp;
1650 {
1651         struct ufid *ufhp;
1652         struct fs *fs;
1653
1654         ufhp = (struct ufid *)fhp;
1655         fs = VFSTOUFS(mp)->um_fs;
1656         if (ufhp->ufid_ino < ROOTINO ||
1657             ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1658                 return (ESTALE);
1659         return (ufs_fhtovp(mp, ufhp, vpp));
1660 }
1661
1662 /*
1663  * Initialize the filesystem.
1664  */
1665 static int
1666 ffs_init(vfsp)
1667         struct vfsconf *vfsp;
1668 {
1669
1670         softdep_initialize();
1671         return (ufs_init(vfsp));
1672 }
1673
1674 /*
1675  * Undo the work of ffs_init().
1676  */
1677 static int
1678 ffs_uninit(vfsp)
1679         struct vfsconf *vfsp;
1680 {
1681         int ret;
1682
1683         ret = ufs_uninit(vfsp);
1684         softdep_uninitialize();
1685         return (ret);
1686 }
1687
1688 /*
1689  * Write a superblock and associated information back to disk.
1690  */
1691 int
1692 ffs_sbupdate(mp, waitfor, suspended)
1693         struct ufsmount *mp;
1694         int waitfor;
1695         int suspended;
1696 {
1697         struct fs *fs = mp->um_fs;
1698         struct buf *sbbp;
1699         struct buf *bp;
1700         int blks;
1701         void *space;
1702         int i, size, error, allerror = 0;
1703
1704         if (fs->fs_ronly == 1 &&
1705             (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1706             (MNT_RDONLY | MNT_UPDATE))
1707                 panic("ffs_sbupdate: write read-only filesystem");
1708         /*
1709          * We use the superblock's buf to serialize calls to ffs_sbupdate().
1710          */
1711         sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1712             0, 0, 0);
1713         /*
1714          * First write back the summary information.
1715          */
1716         blks = howmany(fs->fs_cssize, fs->fs_fsize);
1717         space = fs->fs_csp;
1718         for (i = 0; i < blks; i += fs->fs_frag) {
1719                 size = fs->fs_bsize;
1720                 if (i + fs->fs_frag > blks)
1721                         size = (blks - i) * fs->fs_fsize;
1722                 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1723                     size, 0, 0, 0);
1724                 bcopy(space, bp->b_data, (u_int)size);
1725                 space = (char *)space + size;
1726                 if (suspended)
1727                         bp->b_flags |= B_VALIDSUSPWRT;
1728                 if (waitfor != MNT_WAIT)
1729                         bawrite(bp);
1730                 else if ((error = bwrite(bp)) != 0)
1731                         allerror = error;
1732         }
1733         /*
1734          * Now write back the superblock itself. If any errors occurred
1735          * up to this point, then fail so that the superblock avoids
1736          * being written out as clean.
1737          */
1738         if (allerror) {
1739                 brelse(sbbp);
1740                 return (allerror);
1741         }
1742         bp = sbbp;
1743         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1744             (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1745                 printf("%s: correcting fs_sblockloc from %jd to %d\n",
1746                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1747                 fs->fs_sblockloc = SBLOCK_UFS1;
1748         }
1749         if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1750             (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1751                 printf("%s: correcting fs_sblockloc from %jd to %d\n",
1752                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1753                 fs->fs_sblockloc = SBLOCK_UFS2;
1754         }
1755         fs->fs_fmod = 0;
1756         fs->fs_time = time_second;
1757         bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1758         ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1759         if (suspended)
1760                 bp->b_flags |= B_VALIDSUSPWRT;
1761         if (waitfor != MNT_WAIT)
1762                 bawrite(bp);
1763         else if ((error = bwrite(bp)) != 0)
1764                 allerror = error;
1765         return (allerror);
1766 }
1767
1768 static int
1769 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1770         int attrnamespace, const char *attrname)
1771 {
1772
1773 #ifdef UFS_EXTATTR
1774         return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1775             attrname));
1776 #else
1777         return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1778             attrname));
1779 #endif
1780 }
1781
1782 static void
1783 ffs_ifree(struct ufsmount *ump, struct inode *ip)
1784 {
1785
1786         if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1787                 uma_zfree(uma_ufs1, ip->i_din1);
1788         else if (ip->i_din2 != NULL)
1789                 uma_zfree(uma_ufs2, ip->i_din2);
1790         uma_zfree(uma_inode, ip);
1791 }
1792
1793 static int dobkgrdwrite = 1;
1794 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1795     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1796
1797 /*
1798  * Complete a background write started from bwrite.
1799  */
1800 static void
1801 ffs_backgroundwritedone(struct buf *bp)
1802 {
1803         struct bufobj *bufobj;
1804         struct buf *origbp;
1805
1806         /*
1807          * Find the original buffer that we are writing.
1808          */
1809         bufobj = bp->b_bufobj;
1810         BO_LOCK(bufobj);
1811         if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1812                 panic("backgroundwritedone: lost buffer");
1813         /* Grab an extra reference to be dropped by the bufdone() below. */
1814         bufobj_wrefl(bufobj);
1815         BO_UNLOCK(bufobj);
1816         /*
1817          * Process dependencies then return any unfinished ones.
1818          */
1819         if (!LIST_EMPTY(&bp->b_dep))
1820                 buf_complete(bp);
1821 #ifdef SOFTUPDATES
1822         if (!LIST_EMPTY(&bp->b_dep))
1823                 softdep_move_dependencies(bp, origbp);
1824 #endif
1825         /*
1826          * This buffer is marked B_NOCACHE so when it is released
1827          * by biodone it will be tossed.
1828          */
1829         bp->b_flags |= B_NOCACHE;
1830         bp->b_flags &= ~B_CACHE;
1831         bufdone(bp);
1832         BO_LOCK(bufobj);
1833         /*
1834          * Clear the BV_BKGRDINPROG flag in the original buffer
1835          * and awaken it if it is waiting for the write to complete.
1836          * If BV_BKGRDINPROG is not set in the original buffer it must
1837          * have been released and re-instantiated - which is not legal.
1838          */
1839         KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1840             ("backgroundwritedone: lost buffer2"));
1841         origbp->b_vflags &= ~BV_BKGRDINPROG;
1842         if (origbp->b_vflags & BV_BKGRDWAIT) {
1843                 origbp->b_vflags &= ~BV_BKGRDWAIT;
1844                 wakeup(&origbp->b_xflags);
1845         }
1846         BO_UNLOCK(bufobj);
1847 }
1848
1849
1850 /*
1851  * Write, release buffer on completion.  (Done by iodone
1852  * if async).  Do not bother writing anything if the buffer
1853  * is invalid.
1854  *
1855  * Note that we set B_CACHE here, indicating that buffer is
1856  * fully valid and thus cacheable.  This is true even of NFS
1857  * now so we set it generally.  This could be set either here
1858  * or in biodone() since the I/O is synchronous.  We put it
1859  * here.
1860  */
1861 static int
1862 ffs_bufwrite(struct buf *bp)
1863 {
1864         int oldflags, s;
1865         struct buf *newbp;
1866
1867         CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1868         if (bp->b_flags & B_INVAL) {
1869                 brelse(bp);
1870                 return (0);
1871         }
1872
1873         oldflags = bp->b_flags;
1874
1875         if (!BUF_ISLOCKED(bp))
1876                 panic("bufwrite: buffer is not busy???");
1877         s = splbio();
1878         /*
1879          * If a background write is already in progress, delay
1880          * writing this block if it is asynchronous. Otherwise
1881          * wait for the background write to complete.
1882          */
1883         BO_LOCK(bp->b_bufobj);
1884         if (bp->b_vflags & BV_BKGRDINPROG) {
1885                 if (bp->b_flags & B_ASYNC) {
1886                         BO_UNLOCK(bp->b_bufobj);
1887                         splx(s);
1888                         bdwrite(bp);
1889                         return (0);
1890                 }
1891                 bp->b_vflags |= BV_BKGRDWAIT;
1892                 msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1893                 if (bp->b_vflags & BV_BKGRDINPROG)
1894                         panic("bufwrite: still writing");
1895         }
1896         BO_UNLOCK(bp->b_bufobj);
1897
1898         /* Mark the buffer clean */
1899         bundirty(bp);
1900
1901         /*
1902          * If this buffer is marked for background writing and we
1903          * do not have to wait for it, make a copy and write the
1904          * copy so as to leave this buffer ready for further use.
1905          *
1906          * This optimization eats a lot of memory.  If we have a page
1907          * or buffer shortfall we can't do it.
1908          */
1909         if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1910             (bp->b_flags & B_ASYNC) &&
1911             !vm_page_count_severe() &&
1912             !buf_dirty_count_severe()) {
1913                 KASSERT(bp->b_iodone == NULL,
1914                     ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1915
1916                 /* get a new block */
1917                 newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
1918                 if (newbp == NULL)
1919                         goto normal_write;
1920
1921                 /*
1922                  * set it to be identical to the old block.  We have to
1923                  * set b_lblkno and BKGRDMARKER before calling bgetvp()
1924                  * to avoid confusing the splay tree and gbincore().
1925                  */
1926                 memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1927                 newbp->b_lblkno = bp->b_lblkno;
1928                 newbp->b_xflags |= BX_BKGRDMARKER;
1929                 BO_LOCK(bp->b_bufobj);
1930                 bp->b_vflags |= BV_BKGRDINPROG;
1931                 bgetvp(bp->b_vp, newbp);
1932                 BO_UNLOCK(bp->b_bufobj);
1933                 newbp->b_bufobj = &bp->b_vp->v_bufobj;
1934                 newbp->b_blkno = bp->b_blkno;
1935                 newbp->b_offset = bp->b_offset;
1936                 newbp->b_iodone = ffs_backgroundwritedone;
1937                 newbp->b_flags |= B_ASYNC;
1938                 newbp->b_flags &= ~B_INVAL;
1939
1940 #ifdef SOFTUPDATES
1941                 /* move over the dependencies */
1942                 if (!LIST_EMPTY(&bp->b_dep))
1943                         softdep_move_dependencies(bp, newbp);
1944 #endif
1945
1946                 /*
1947                  * Initiate write on the copy, release the original to
1948                  * the B_LOCKED queue so that it cannot go away until
1949                  * the background write completes. If not locked it could go
1950                  * away and then be reconstituted while it was being written.
1951                  * If the reconstituted buffer were written, we could end up
1952                  * with two background copies being written at the same time.
1953                  */
1954                 bqrelse(bp);
1955                 bp = newbp;
1956         }
1957
1958         /* Let the normal bufwrite do the rest for us */
1959 normal_write:
1960         return (bufwrite(bp));
1961 }
1962
1963
1964 static void
1965 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1966 {
1967         struct vnode *vp;
1968         int error;
1969         struct buf *tbp;
1970
1971         vp = bo->__bo_vnode;
1972         if (bp->b_iocmd == BIO_WRITE) {
1973                 if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
1974                     bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
1975                     (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
1976                         panic("ffs_geom_strategy: bad I/O");
1977                 bp->b_flags &= ~B_VALIDSUSPWRT;
1978                 if ((vp->v_vflag & VV_COPYONWRITE) &&
1979                     vp->v_rdev->si_snapdata != NULL) {
1980                         if ((bp->b_flags & B_CLUSTER) != 0) {
1981                                 runningbufwakeup(bp);
1982                                 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1983                                               b_cluster.cluster_entry) {
1984                                         error = ffs_copyonwrite(vp, tbp);
1985                                         if (error != 0 &&
1986                                             error != EOPNOTSUPP) {
1987                                                 bp->b_error = error;
1988                                                 bp->b_ioflags |= BIO_ERROR;
1989                                                 bufdone(bp);
1990                                                 return;
1991                                         }
1992                                 }
1993                                 bp->b_runningbufspace = bp->b_bufsize;
1994                                 atomic_add_long(&runningbufspace,
1995                                                bp->b_runningbufspace);
1996                         } else {
1997                                 error = ffs_copyonwrite(vp, bp);
1998                                 if (error != 0 && error != EOPNOTSUPP) {
1999                                         bp->b_error = error;
2000                                         bp->b_ioflags |= BIO_ERROR;
2001                                         bufdone(bp);
2002                                         return;
2003                                 }
2004                         }
2005                 }
2006 #ifdef SOFTUPDATES
2007                 if ((bp->b_flags & B_CLUSTER) != 0) {
2008                         TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
2009                                       b_cluster.cluster_entry) {
2010                                 if (!LIST_EMPTY(&tbp->b_dep))
2011                                         buf_start(tbp);
2012                         }
2013                 } else {
2014                         if (!LIST_EMPTY(&bp->b_dep))
2015                                 buf_start(bp);
2016                 }
2017
2018 #endif
2019         }
2020         g_vfs_strategy(bo, bp);
2021 }
2022
2023 #ifdef  DDB
2024
2025 static void
2026 db_print_ffs(struct ufsmount *ump)
2027 {
2028         db_printf("mp %p %s devvp %p fs %p su_wl %d su_wl_in %d su_deps %d "
2029                   "su_req %d\n",
2030             ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
2031             ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
2032             ump->softdep_on_worklist_inprogress, ump->softdep_deps,
2033             ump->softdep_req);
2034 }
2035
2036 DB_SHOW_COMMAND(ffs, db_show_ffs)
2037 {
2038         struct mount *mp;
2039         struct ufsmount *ump;
2040
2041         if (have_addr) {
2042                 ump = VFSTOUFS((struct mount *)addr);
2043                 db_print_ffs(ump);
2044                 return;
2045         }
2046
2047         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2048                 if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
2049                         db_print_ffs(VFSTOUFS(mp));
2050         }
2051 }
2052
2053 #endif  /* DDB */