sys/ufs/ffs/ffs_vfsops.c

   1 /*-
   2  * Copyright (c) 1989, 1991, 1993, 1994
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 4. Neither the name of the University nor the names of its contributors
  14  *    may be used to endorse or promote products derived from this software
  15  *    without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  *
  29  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
  30  */
  31
  32 #include <sys/cdefs.h>
  33 __FBSDID("$FreeBSD$");
  34
  35 #include "opt_quota.h"
  36 #include "opt_ufs.h"
  37 #include "opt_ffs.h"
  38 #include "opt_ddb.h"
  39
  40 #include <sys/param.h>
  41 #include <sys/systm.h>
  42 #include <sys/namei.h>
  43 #include <sys/priv.h>
  44 #include <sys/proc.h>
  45 #include <sys/kernel.h>
  46 #include <sys/vnode.h>
  47 #include <sys/mount.h>
  48 #include <sys/bio.h>
  49 #include <sys/buf.h>
  50 #include <sys/conf.h>
  51 #include <sys/fcntl.h>
  52 #include <sys/malloc.h>
  53 #include <sys/mutex.h>
  54
  55 #include <security/mac/mac_framework.h>
  56
  57 #include <ufs/ufs/extattr.h>
  58 #include <ufs/ufs/gjournal.h>
  59 #include <ufs/ufs/quota.h>
  60 #include <ufs/ufs/ufsmount.h>
  61 #include <ufs/ufs/inode.h>
  62 #include <ufs/ufs/ufs_extern.h>
  63
  64 #include <ufs/ffs/fs.h>
  65 #include <ufs/ffs/ffs_extern.h>
  66
  67 #include <vm/vm.h>
  68 #include <vm/uma.h>
  69 #include <vm/vm_page.h>
  70
  71 #include <geom/geom.h>
  72 #include <geom/geom_vfs.h>
  73
  74 #include <ddb/ddb.h>
  75
  76 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
  77
  78 static int      ffs_reload(struct mount *, struct thread *);
  79 static int      ffs_mountfs(struct vnode *, struct mount *, struct thread *);
  80 static void     ffs_oldfscompat_read(struct fs *, struct ufsmount *,
  81                     ufs2_daddr_t);
  82 static void     ffs_oldfscompat_write(struct fs *, struct ufsmount *);
  83 static void     ffs_ifree(struct ufsmount *ump, struct inode *ip);
  84 static vfs_init_t ffs_init;
  85 static vfs_uninit_t ffs_uninit;
  86 static vfs_extattrctl_t ffs_extattrctl;
  87 static vfs_cmount_t ffs_cmount;
  88 static vfs_unmount_t ffs_unmount;
  89 static vfs_mount_t ffs_mount;
  90 static vfs_statfs_t ffs_statfs;
  91 static vfs_fhtovp_t ffs_fhtovp;
  92 static vfs_sync_t ffs_sync;
  93
  94 static struct vfsops ufs_vfsops = {
  95         .vfs_extattrctl =       ffs_extattrctl,
  96         .vfs_fhtovp =           ffs_fhtovp,
  97         .vfs_init =             ffs_init,
  98         .vfs_mount =            ffs_mount,
  99         .vfs_cmount =           ffs_cmount,
 100         .vfs_quotactl =         ufs_quotactl,
 101         .vfs_root =             ufs_root,
 102         .vfs_statfs =           ffs_statfs,
 103         .vfs_sync =             ffs_sync,
 104         .vfs_uninit =           ffs_uninit,
 105         .vfs_unmount =          ffs_unmount,
 106         .vfs_vget =             ffs_vget,
 107         .vfs_susp_clean =       process_deferred_inactive,
 108 };
 109
 110 VFS_SET(ufs_vfsops, ufs, 0);
 111 MODULE_VERSION(ufs, 1);
 112
 113 static b_strategy_t ffs_geom_strategy;
 114 static b_write_t ffs_bufwrite;
 115
 116 static struct buf_ops ffs_ops = {
 117         .bop_name =     "FFS",
 118         .bop_write =    ffs_bufwrite,
 119         .bop_strategy = ffs_geom_strategy,
 120         .bop_sync =     bufsync,
 121 #ifdef NO_FFS_SNAPSHOT
 122         .bop_bdflush =  bufbdflush,
 123 #else
 124         .bop_bdflush =  ffs_bdflush,
 125 #endif
 126 };
 127
 128 static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
 129     "noclusterw", "noexec", "export", "force", "from", "multilabel",
 130     "snapshot", "nosuid", "suiddir", "nosymfollow", "sync",
 131     "union", NULL };
 132
 133 static int
 134 ffs_mount(struct mount *mp)
 135 {
 136         struct vnode *devvp;
 137         struct thread *td;
 138         struct ufsmount *ump = 0;
 139         struct fs *fs;
 140         int error, flags;
 141         u_int mntorflags, mntandnotflags;
 142         accmode_t accmode;
 143         struct nameidata ndp;
 144         char *fspec;
 145
 146         td = curthread;
 147         if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
 148                 return (EINVAL);
 149         if (uma_inode == NULL) {
 150                 uma_inode = uma_zcreate("FFS inode",
 151                     sizeof(struct inode), NULL, NULL, NULL, NULL,
 152                     UMA_ALIGN_PTR, 0);
 153                 uma_ufs1 = uma_zcreate("FFS1 dinode",
 154                     sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
 155                     UMA_ALIGN_PTR, 0);
 156                 uma_ufs2 = uma_zcreate("FFS2 dinode",
 157                     sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
 158                     UMA_ALIGN_PTR, 0);
 159         }
 160
 161         fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
 162         if (error)
 163                 return (error);
 164
 165         mntorflags = 0;
 166         mntandnotflags = 0;
 167         if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
 168                 mntorflags |= MNT_ACLS;
 169
 170         if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
 171                 mntorflags |= MNT_SNAPSHOT;
 172                 /*
 173                  * Once we have set the MNT_SNAPSHOT flag, do not
 174                  * persist "snapshot" in the options list.
 175                  */
 176                 vfs_deleteopt(mp->mnt_optnew, "snapshot");
 177                 vfs_deleteopt(mp->mnt_opt, "snapshot");
 178         }
 179
 180         MNT_ILOCK(mp);
 181         mp->mnt_flag = (mp->mnt_flag | mntorflags) & ~mntandnotflags;
 182         MNT_IUNLOCK(mp);
 183         /*
 184          * If updating, check whether changing from read-only to
 185          * read/write; if there is no device name, that's all we do.
 186          */
 187         if (mp->mnt_flag & MNT_UPDATE) {
 188                 ump = VFSTOUFS(mp);
 189                 fs = ump->um_fs;
 190                 devvp = ump->um_devvp;
 191                 if (fs->fs_ronly == 0 &&
 192                     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 193                         /*
 194                          * Flush any dirty data and suspend filesystem.
 195                          */
 196                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 197                                 return (error);
 198                         for (;;) {
 199                                 vn_finished_write(mp);
 200                                 if ((error = vfs_write_suspend(mp)) != 0)
 201                                         return (error);
 202                                 MNT_ILOCK(mp);
 203                                 if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
 204                                         /*
 205                                          * Allow the secondary writes
 206                                          * to proceed.
 207                                          */
 208                                         mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
 209                                             MNTK_SUSPEND2);
 210                                         wakeup(&mp->mnt_flag);
 211                                         MNT_IUNLOCK(mp);
 212                                         /*
 213                                          * Allow the curthread to
 214                                          * ignore the suspension to
 215                                          * synchronize on-disk state.
 216                                          */
 217                                         td->td_pflags |= TDP_IGNSUSP;
 218                                         break;
 219                                 }
 220                                 MNT_IUNLOCK(mp);
 221                                 vn_start_write(NULL, &mp, V_WAIT);
 222                         }
 223                         /*
 224                          * Check for and optionally get rid of files open
 225                          * for writing.
 226                          */
 227                         flags = WRITECLOSE;
 228                         if (mp->mnt_flag & MNT_FORCE)
 229                                 flags |= FORCECLOSE;
 230                         if (mp->mnt_flag & MNT_SOFTDEP) {
 231                                 error = softdep_flushfiles(mp, flags, td);
 232                         } else {
 233                                 error = ffs_flushfiles(mp, flags, td);
 234                         }
 235                         if (error) {
 236                                 vfs_write_resume(mp);
 237                                 return (error);
 238                         }
 239                         if (fs->fs_pendingblocks != 0 ||
 240                             fs->fs_pendinginodes != 0) {
 241                                 printf("%s: %s: blocks %jd files %d\n",
 242                                     fs->fs_fsmnt, "update error",
 243                                     (intmax_t)fs->fs_pendingblocks,
 244                                     fs->fs_pendinginodes);
 245                                 fs->fs_pendingblocks = 0;
 246                                 fs->fs_pendinginodes = 0;
 247                         }
 248                         if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
 249                                 fs->fs_clean = 1;
 250                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 251                                 fs->fs_ronly = 0;
 252                                 fs->fs_clean = 0;
 253                                 vfs_write_resume(mp);
 254                                 return (error);
 255                         }
 256                         DROP_GIANT();
 257                         g_topology_lock();
 258                         g_access(ump->um_cp, 0, -1, 0);
 259                         g_topology_unlock();
 260                         PICKUP_GIANT();
 261                         fs->fs_ronly = 1;
 262                         MNT_ILOCK(mp);
 263                         mp->mnt_flag |= MNT_RDONLY;
 264                         MNT_IUNLOCK(mp);
 265                         /*
 266                          * Allow the writers to note that filesystem
 267                          * is ro now.
 268                          */
 269                         vfs_write_resume(mp);
 270                 }
 271                 if ((mp->mnt_flag & MNT_RELOAD) &&
 272                     (error = ffs_reload(mp, td)) != 0)
 273                         return (error);
 274                 if (fs->fs_ronly &&
 275                     !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 276                         /*
 277                          * If upgrade to read-write by non-root, then verify
 278                          * that user has necessary permissions on the device.
 279                          */
 280                         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 281                         error = VOP_ACCESS(devvp, VREAD | VWRITE,
 282                             td->td_ucred, td);
 283                         if (error)
 284                                 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 285                         if (error) {
 286                                 VOP_UNLOCK(devvp, 0);
 287                                 return (error);
 288                         }
 289                         VOP_UNLOCK(devvp, 0);
 290                         fs->fs_flags &= ~FS_UNCLEAN;
 291                         if (fs->fs_clean == 0) {
 292                                 fs->fs_flags |= FS_UNCLEAN;
 293                                 if ((mp->mnt_flag & MNT_FORCE) ||
 294                                     ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
 295                                      (fs->fs_flags & FS_DOSOFTDEP))) {
 296                                         printf("WARNING: %s was not %s\n",
 297                                            fs->fs_fsmnt, "properly dismounted");
 298                                 } else {
 299                                         printf(
 300 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 301                                             fs->fs_fsmnt);
 302                                         return (EPERM);
 303                                 }
 304                         }
 305                         DROP_GIANT();
 306                         g_topology_lock();
 307                         /*
 308                          * If we're the root device, we may not have an E count
 309                          * yet, get it now.
 310                          */
 311                         if (ump->um_cp->ace == 0)
 312                                 error = g_access(ump->um_cp, 0, 1, 1);
 313                         else
 314                                 error = g_access(ump->um_cp, 0, 1, 0);
 315                         g_topology_unlock();
 316                         PICKUP_GIANT();
 317                         if (error)
 318                                 return (error);
 319                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 320                                 return (error);
 321                         fs->fs_ronly = 0;
 322                         MNT_ILOCK(mp);
 323                         mp->mnt_flag &= ~MNT_RDONLY;
 324                         MNT_IUNLOCK(mp);
 325                         fs->fs_clean = 0;
 326                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 327                                 vn_finished_write(mp);
 328                                 return (error);
 329                         }
 330                         /* check to see if we need to start softdep */
 331                         if ((fs->fs_flags & FS_DOSOFTDEP) &&
 332                             (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
 333                                 vn_finished_write(mp);
 334                                 return (error);
 335                         }
 336                         if (fs->fs_snapinum[0] != 0)
 337                                 ffs_snapshot_mount(mp);
 338                         vn_finished_write(mp);
 339                 }
 340                 /*
 341                  * Soft updates is incompatible with "async",
 342                  * so if we are doing softupdates stop the user
 343                  * from setting the async flag in an update.
 344                  * Softdep_mount() clears it in an initial mount
 345                  * or ro->rw remount.
 346                  */
 347                 if (mp->mnt_flag & MNT_SOFTDEP) {
 348                         /* XXX: Reset too late ? */
 349                         MNT_ILOCK(mp);
 350                         mp->mnt_flag &= ~MNT_ASYNC;
 351                         MNT_IUNLOCK(mp);
 352                 }
 353                 /*
 354                  * Keep MNT_ACLS flag if it is stored in superblock.
 355                  */
 356                 if ((fs->fs_flags & FS_ACLS) != 0) {
 357                         /* XXX: Set too late ? */
 358                         MNT_ILOCK(mp);
 359                         mp->mnt_flag |= MNT_ACLS;
 360                         MNT_IUNLOCK(mp);
 361                 }
 362
 363                 /*
 364                  * If this is a snapshot request, take the snapshot.
 365                  */
 366                 if (mp->mnt_flag & MNT_SNAPSHOT)
 367                         return (ffs_snapshot(mp, fspec));
 368         }
 369
 370         /*
 371          * Not an update, or updating the name: look up the name
 372          * and verify that it refers to a sensible disk device.
 373          */
 374         NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
 375         if ((error = namei(&ndp)) != 0)
 376                 return (error);
 377         NDFREE(&ndp, NDF_ONLY_PNBUF);
 378         devvp = ndp.ni_vp;
 379         if (!vn_isdisk(devvp, &error)) {
 380                 vput(devvp);
 381                 return (error);
 382         }
 383
 384         /*
 385          * If mount by non-root, then verify that user has necessary
 386          * permissions on the device.
 387          */
 388         accmode = VREAD;
 389         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 390                 accmode |= VWRITE;
 391         error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
 392         if (error)
 393                 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 394         if (error) {
 395                 vput(devvp);
 396                 return (error);
 397         }
 398
 399         if (mp->mnt_flag & MNT_UPDATE) {
 400                 /*
 401                  * Update only
 402                  *
 403                  * If it's not the same vnode, or at least the same device
 404                  * then it's not correct.
 405                  */
 406
 407                 if (devvp->v_rdev != ump->um_devvp->v_rdev)
 408                         error = EINVAL; /* needs translation */
 409                 vput(devvp);
 410                 if (error)
 411                         return (error);
 412         } else {
 413                 /*
 414                  * New mount
 415                  *
 416                  * We need the name for the mount point (also used for
 417                  * "last mounted on") copied in. If an error occurs,
 418                  * the mount point is discarded by the upper level code.
 419                  * Note that vfs_mount() populates f_mntonname for us.
 420                  */
 421                 if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
 422                         vrele(devvp);
 423                         return (error);
 424                 }
 425         }
 426         vfs_mountedfrom(mp, fspec);
 427         return (0);
 428 }
 429
 430 /*
 431  * Compatibility with old mount system call.
 432  */
 433
 434 static int
 435 ffs_cmount(struct mntarg *ma, void *data, int flags)
 436 {
 437         struct ufs_args args;
 438         int error;
 439
 440         if (data == NULL)
 441                 return (EINVAL);
 442         error = copyin(data, &args, sizeof args);
 443         if (error)
 444                 return (error);
 445
 446         ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
 447         ma = mount_arg(ma, "export", &args.export, sizeof args.export);
 448         error = kernel_mount(ma, flags);
 449
 450         return (error);
 451 }
 452
 453 /*
 454  * Reload all incore data for a filesystem (used after running fsck on
 455  * the root filesystem and finding things to fix). The filesystem must
 456  * be mounted read-only.
 457  *
 458  * Things to do to update the mount:
 459  *      1) invalidate all cached meta-data.
 460  *      2) re-read superblock from disk.
 461  *      3) re-read summary information from disk.
 462  *      4) invalidate all inactive vnodes.
 463  *      5) invalidate all cached file data.
 464  *      6) re-read inode data for all active vnodes.
 465  */
 466 static int
 467 ffs_reload(struct mount *mp, struct thread *td)
 468 {
 469         struct vnode *vp, *mvp, *devvp;
 470         struct inode *ip;
 471         void *space;
 472         struct buf *bp;
 473         struct fs *fs, *newfs;
 474         struct ufsmount *ump;
 475         ufs2_daddr_t sblockloc;
 476         int i, blks, size, error;
 477         int32_t *lp;
 478
 479         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 480                 return (EINVAL);
 481         ump = VFSTOUFS(mp);
 482         /*
 483          * Step 1: invalidate all cached meta-data.
 484          */
 485         devvp = VFSTOUFS(mp)->um_devvp;
 486         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 487         if (vinvalbuf(devvp, 0, 0, 0) != 0)
 488                 panic("ffs_reload: dirty1");
 489         VOP_UNLOCK(devvp, 0);
 490
 491         /*
 492          * Step 2: re-read superblock from disk.
 493          */
 494         fs = VFSTOUFS(mp)->um_fs;
 495         if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
 496             NOCRED, &bp)) != 0)
 497                 return (error);
 498         newfs = (struct fs *)bp->b_data;
 499         if ((newfs->fs_magic != FS_UFS1_MAGIC &&
 500              newfs->fs_magic != FS_UFS2_MAGIC) ||
 501             newfs->fs_bsize > MAXBSIZE ||
 502             newfs->fs_bsize < sizeof(struct fs)) {
 503                         brelse(bp);
 504                         return (EIO);           /* XXX needs translation */
 505         }
 506         /*
 507          * Copy pointer fields back into superblock before copying in   XXX
 508          * new superblock. These should really be in the ufsmount.      XXX
 509          * Note that important parameters (eg fs_ncg) are unchanged.
 510          */
 511         newfs->fs_csp = fs->fs_csp;
 512         newfs->fs_maxcluster = fs->fs_maxcluster;
 513         newfs->fs_contigdirs = fs->fs_contigdirs;
 514         newfs->fs_active = fs->fs_active;
 515         /* The file system is still read-only. */
 516         newfs->fs_ronly = 1;
 517         sblockloc = fs->fs_sblockloc;
 518         bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 519         brelse(bp);
 520         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 521         ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
 522         UFS_LOCK(ump);
 523         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 524                 printf("%s: reload pending error: blocks %jd files %d\n",
 525                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 526                     fs->fs_pendinginodes);
 527                 fs->fs_pendingblocks = 0;
 528                 fs->fs_pendinginodes = 0;
 529         }
 530         UFS_UNLOCK(ump);
 531
 532         /*
 533          * Step 3: re-read summary information from disk.
 534          */
 535         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 536         space = fs->fs_csp;
 537         for (i = 0; i < blks; i += fs->fs_frag) {
 538                 size = fs->fs_bsize;
 539                 if (i + fs->fs_frag > blks)
 540                         size = (blks - i) * fs->fs_fsize;
 541                 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 542                     NOCRED, &bp);
 543                 if (error)
 544                         return (error);
 545                 bcopy(bp->b_data, space, (u_int)size);
 546                 space = (char *)space + size;
 547                 brelse(bp);
 548         }
 549         /*
 550          * We no longer know anything about clusters per cylinder group.
 551          */
 552         if (fs->fs_contigsumsize > 0) {
 553                 lp = fs->fs_maxcluster;
 554                 for (i = 0; i < fs->fs_ncg; i++)
 555                         *lp++ = fs->fs_contigsumsize;
 556         }
 557
 558 loop:
 559         MNT_ILOCK(mp);
 560         MNT_VNODE_FOREACH(vp, mp, mvp) {
 561                 VI_LOCK(vp);
 562                 if (vp->v_iflag & VI_DOOMED) {
 563                         VI_UNLOCK(vp);
 564                         continue;
 565                 }
 566                 MNT_IUNLOCK(mp);
 567                 /*
 568                  * Step 4: invalidate all cached file data.
 569                  */
 570                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 571                         MNT_VNODE_FOREACH_ABORT(mp, mvp);
 572                         goto loop;
 573                 }
 574                 if (vinvalbuf(vp, 0, 0, 0))
 575                         panic("ffs_reload: dirty2");
 576                 /*
 577                  * Step 5: re-read inode data for all active vnodes.
 578                  */
 579                 ip = VTOI(vp);
 580                 error =
 581                     bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 582                     (int)fs->fs_bsize, NOCRED, &bp);
 583                 if (error) {
 584                         VOP_UNLOCK(vp, 0);
 585                         vrele(vp);
 586                         MNT_VNODE_FOREACH_ABORT(mp, mvp);
 587                         return (error);
 588                 }
 589                 ffs_load_inode(bp, ip, fs, ip->i_number);
 590                 ip->i_effnlink = ip->i_nlink;
 591                 brelse(bp);
 592                 VOP_UNLOCK(vp, 0);
 593                 vrele(vp);
 594                 MNT_ILOCK(mp);
 595         }
 596         MNT_IUNLOCK(mp);
 597         return (0);
 598 }
 599
 600 /*
 601  * Possible superblock locations ordered from most to least likely.
 602  */
 603 static int sblock_try[] = SBLOCKSEARCH;
 604
 605 /*
 606  * Common code for mount and mountroot
 607  */
 608 static int
 609 ffs_mountfs(devvp, mp, td)
 610         struct vnode *devvp;
 611         struct mount *mp;
 612         struct thread *td;
 613 {
 614         struct ufsmount *ump;
 615         struct buf *bp;
 616         struct fs *fs;
 617         struct cdev *dev;
 618         void *space;
 619         ufs2_daddr_t sblockloc;
 620         int error, i, blks, size, ronly;
 621         int32_t *lp;
 622         struct ucred *cred;
 623         struct g_consumer *cp;
 624         struct mount *nmp;
 625
 626         bp = NULL;
 627         ump = NULL;
 628         cred = td ? td->td_ucred : NOCRED;
 629         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 630
 631         dev = devvp->v_rdev;
 632         dev_ref(dev);
 633         DROP_GIANT();
 634         g_topology_lock();
 635         error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
 636
 637         /*
 638          * If we are a root mount, drop the E flag so fsck can do its magic.
 639          * We will pick it up again when we remount R/W.
 640          */
 641         if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
 642                 error = g_access(cp, 0, 0, -1);
 643         g_topology_unlock();
 644         PICKUP_GIANT();
 645         VOP_UNLOCK(devvp, 0);
 646         if (error)
 647                 goto out;
 648         if (devvp->v_rdev->si_iosize_max != 0)
 649                 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
 650         if (mp->mnt_iosize_max > MAXPHYS)
 651                 mp->mnt_iosize_max = MAXPHYS;
 652
 653         devvp->v_bufobj.bo_private = cp;
 654         devvp->v_bufobj.bo_ops = &ffs_ops;
 655
 656         fs = NULL;
 657         sblockloc = 0;
 658         /*
 659          * Try reading the superblock in each of its possible locations.
 660          */
 661         for (i = 0; sblock_try[i] != -1; i++) {
 662                 if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
 663                         error = EINVAL;
 664                         vfs_mount_error(mp,
 665                             "Invalid sectorsize %d for superblock size %d",
 666                             cp->provider->sectorsize, SBLOCKSIZE);
 667                         goto out;
 668                 }
 669                 if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
 670                     cred, &bp)) != 0)
 671                         goto out;
 672                 fs = (struct fs *)bp->b_data;
 673                 sblockloc = sblock_try[i];
 674                 if ((fs->fs_magic == FS_UFS1_MAGIC ||
 675                      (fs->fs_magic == FS_UFS2_MAGIC &&
 676                       (fs->fs_sblockloc == sblockloc ||
 677                        (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
 678                     fs->fs_bsize <= MAXBSIZE &&
 679                     fs->fs_bsize >= sizeof(struct fs))
 680                         break;
 681                 brelse(bp);
 682                 bp = NULL;
 683         }
 684         if (sblock_try[i] == -1) {
 685                 error = EINVAL;         /* XXX needs translation */
 686                 goto out;
 687         }
 688         fs->fs_fmod = 0;
 689         fs->fs_flags &= ~FS_INDEXDIRS;  /* no support for directory indicies */
 690         fs->fs_flags &= ~FS_UNCLEAN;
 691         if (fs->fs_clean == 0) {
 692                 fs->fs_flags |= FS_UNCLEAN;
 693                 if (ronly || (mp->mnt_flag & MNT_FORCE) ||
 694                     ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
 695                      (fs->fs_flags & FS_DOSOFTDEP))) {
 696                         printf(
 697 "WARNING: %s was not properly dismounted\n",
 698                             fs->fs_fsmnt);
 699                 } else {
 700                         printf(
 701 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 702                             fs->fs_fsmnt);
 703                         error = EPERM;
 704                         goto out;
 705                 }
 706                 if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
 707                     (mp->mnt_flag & MNT_FORCE)) {
 708                         printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
 709                             (intmax_t)fs->fs_pendingblocks,
 710                             fs->fs_pendinginodes);
 711                         fs->fs_pendingblocks = 0;
 712                         fs->fs_pendinginodes = 0;
 713                 }
 714         }
 715         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 716                 printf("%s: mount pending error: blocks %jd files %d\n",
 717                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 718                     fs->fs_pendinginodes);
 719                 fs->fs_pendingblocks = 0;
 720                 fs->fs_pendinginodes = 0;
 721         }
 722         if ((fs->fs_flags & FS_GJOURNAL) != 0) {
 723 #ifdef UFS_GJOURNAL
 724                 /*
 725                  * Get journal provider name.
 726                  */
 727                 size = 1024;
 728                 mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
 729                 if (g_io_getattr("GJOURNAL::provider", cp, &size,
 730                     mp->mnt_gjprovider) == 0) {
 731                         mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
 732                             M_UFSMNT, M_WAITOK);
 733                         MNT_ILOCK(mp);
 734                         mp->mnt_flag |= MNT_GJOURNAL;
 735                         MNT_IUNLOCK(mp);
 736                 } else {
 737                         printf(
 738 "WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
 739                             mp->mnt_stat.f_mntonname);
 740                         free(mp->mnt_gjprovider, M_UFSMNT);
 741                         mp->mnt_gjprovider = NULL;
 742                 }
 743 #else
 744                 printf(
 745 "WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
 746                     mp->mnt_stat.f_mntonname);
 747 #endif
 748         } else {
 749                 mp->mnt_gjprovider = NULL;
 750         }
 751         ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
 752         ump->um_cp = cp;
 753         ump->um_bo = &devvp->v_bufobj;
 754         ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
 755         if (fs->fs_magic == FS_UFS1_MAGIC) {
 756                 ump->um_fstype = UFS1;
 757                 ump->um_balloc = ffs_balloc_ufs1;
 758         } else {
 759                 ump->um_fstype = UFS2;
 760                 ump->um_balloc = ffs_balloc_ufs2;
 761         }
 762         ump->um_blkatoff = ffs_blkatoff;
 763         ump->um_truncate = ffs_truncate;
 764         ump->um_update = ffs_update;
 765         ump->um_valloc = ffs_valloc;
 766         ump->um_vfree = ffs_vfree;
 767         ump->um_ifree = ffs_ifree;
 768         ump->um_rdonly = ffs_rdonly;
 769         mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
 770         bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
 771         if (fs->fs_sbsize < SBLOCKSIZE)
 772                 bp->b_flags |= B_INVAL | B_NOCACHE;
 773         brelse(bp);
 774         bp = NULL;
 775         fs = ump->um_fs;
 776         ffs_oldfscompat_read(fs, ump, sblockloc);
 777         fs->fs_ronly = ronly;
 778         size = fs->fs_cssize;
 779         blks = howmany(size, fs->fs_fsize);
 780         if (fs->fs_contigsumsize > 0)
 781                 size += fs->fs_ncg * sizeof(int32_t);
 782         size += fs->fs_ncg * sizeof(u_int8_t);
 783         space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
 784         fs->fs_csp = space;
 785         for (i = 0; i < blks; i += fs->fs_frag) {
 786                 size = fs->fs_bsize;
 787                 if (i + fs->fs_frag > blks)
 788                         size = (blks - i) * fs->fs_fsize;
 789                 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 790                     cred, &bp)) != 0) {
 791                         free(fs->fs_csp, M_UFSMNT);
 792                         goto out;
 793                 }
 794                 bcopy(bp->b_data, space, (u_int)size);
 795                 space = (char *)space + size;
 796                 brelse(bp);
 797                 bp = NULL;
 798         }
 799         if (fs->fs_contigsumsize > 0) {
 800                 fs->fs_maxcluster = lp = space;
 801                 for (i = 0; i < fs->fs_ncg; i++)
 802                         *lp++ = fs->fs_contigsumsize;
 803                 space = lp;
 804         }
 805         size = fs->fs_ncg * sizeof(u_int8_t);
 806         fs->fs_contigdirs = (u_int8_t *)space;
 807         bzero(fs->fs_contigdirs, size);
 808         fs->fs_active = NULL;
 809         mp->mnt_data = ump;
 810         mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
 811         mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
 812         nmp = NULL;
 813         if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
 814             (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
 815                 if (nmp)
 816                         vfs_rel(nmp);
 817                 vfs_getnewfsid(mp);
 818         }
 819         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 820         MNT_ILOCK(mp);
 821         mp->mnt_flag |= MNT_LOCAL;
 822         MNT_IUNLOCK(mp);
 823         if ((fs->fs_flags & FS_MULTILABEL) != 0) {
 824 #ifdef MAC
 825                 MNT_ILOCK(mp);
 826                 mp->mnt_flag |= MNT_MULTILABEL;
 827                 MNT_IUNLOCK(mp);
 828 #else
 829                 printf(
 830 "WARNING: %s: multilabel flag on fs but no MAC support\n",
 831                     mp->mnt_stat.f_mntonname);
 832 #endif
 833         }
 834         if ((fs->fs_flags & FS_ACLS) != 0) {
 835 #ifdef UFS_ACL
 836                 MNT_ILOCK(mp);
 837                 mp->mnt_flag |= MNT_ACLS;
 838                 MNT_IUNLOCK(mp);
 839 #else
 840                 printf(
 841 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
 842                     mp->mnt_stat.f_mntonname);
 843 #endif
 844         }
 845         ump->um_mountp = mp;
 846         ump->um_dev = dev;
 847         ump->um_devvp = devvp;
 848         ump->um_nindir = fs->fs_nindir;
 849         ump->um_bptrtodb = fs->fs_fsbtodb;
 850         ump->um_seqinc = fs->fs_frag;
 851         for (i = 0; i < MAXQUOTAS; i++)
 852                 ump->um_quotas[i] = NULLVP;
 853 #ifdef UFS_EXTATTR
 854         ufs_extattr_uepm_init(&ump->um_extattr);
 855 #endif
 856         /*
 857          * Set FS local "last mounted on" information (NULL pad)
 858          */
 859         bzero(fs->fs_fsmnt, MAXMNTLEN);
 860         strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
 861
 862         if( mp->mnt_flag & MNT_ROOTFS) {
 863                 /*
 864                  * Root mount; update timestamp in mount structure.
 865                  * this will be used by the common root mount code
 866                  * to update the system clock.
 867                  */
 868                 mp->mnt_time = fs->fs_time;
 869         }
 870
 871         if (ronly == 0) {
 872                 if ((fs->fs_flags & FS_DOSOFTDEP) &&
 873                     (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
 874                         free(fs->fs_csp, M_UFSMNT);
 875                         goto out;
 876                 }
 877                 if (fs->fs_snapinum[0] != 0)
 878                         ffs_snapshot_mount(mp);
 879                 fs->fs_fmod = 1;
 880                 fs->fs_clean = 0;
 881                 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
 882         }
 883         /*
 884          * Initialize filesystem stat information in mount struct.
 885          */
 886         MNT_ILOCK(mp);
 887         mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED |
 888             MNTK_EXTENDED_SHARED;
 889         MNT_IUNLOCK(mp);
 890 #ifdef UFS_EXTATTR
 891 #ifdef UFS_EXTATTR_AUTOSTART
 892         /*
 893          *
 894          * Auto-starting does the following:
 895          *      - check for /.attribute in the fs, and extattr_start if so
 896          *      - for each file in .attribute, enable that file with
 897          *        an attribute of the same name.
 898          * Not clear how to report errors -- probably eat them.
 899          * This would all happen while the filesystem was busy/not
 900          * available, so would effectively be "atomic".
 901          */
 902         mp->mnt_stat.f_iosize = fs->fs_bsize;
 903         (void) ufs_extattr_autostart(mp, td);
 904 #endif /* !UFS_EXTATTR_AUTOSTART */
 905 #endif /* !UFS_EXTATTR */
 906         return (0);
 907 out:
 908         if (bp)
 909                 brelse(bp);
 910         if (cp != NULL) {
 911                 DROP_GIANT();
 912                 g_topology_lock();
 913                 g_vfs_close(cp);
 914                 g_topology_unlock();
 915                 PICKUP_GIANT();
 916         }
 917         if (ump) {
 918                 mtx_destroy(UFS_MTX(ump));
 919                 if (mp->mnt_gjprovider != NULL) {
 920                         free(mp->mnt_gjprovider, M_UFSMNT);
 921                         mp->mnt_gjprovider = NULL;
 922                 }
 923                 free(ump->um_fs, M_UFSMNT);
 924                 free(ump, M_UFSMNT);
 925                 mp->mnt_data = NULL;
 926         }
 927         dev_rel(dev);
 928         return (error);
 929 }
 930
 931 #include <sys/sysctl.h>
 932 static int bigcgs = 0;
 933 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
 934
 935 /*
 936  * Sanity checks for loading old filesystem superblocks.
 937  * See ffs_oldfscompat_write below for unwound actions.
 938  *
 939  * XXX - Parts get retired eventually.
 940  * Unfortunately new bits get added.
 941  */
 942 static void
 943 ffs_oldfscompat_read(fs, ump, sblockloc)
 944         struct fs *fs;
 945         struct ufsmount *ump;
 946         ufs2_daddr_t sblockloc;
 947 {
 948         off_t maxfilesize;
 949
 950         /*
 951          * If not yet done, update fs_flags location and value of fs_sblockloc.
 952          */
 953         if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
 954                 fs->fs_flags = fs->fs_old_flags;
 955                 fs->fs_old_flags |= FS_FLAGS_UPDATED;
 956                 fs->fs_sblockloc = sblockloc;
 957         }
 958         /*
 959          * If not yet done, update UFS1 superblock with new wider fields.
 960          */
 961         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
 962                 fs->fs_maxbsize = fs->fs_bsize;
 963                 fs->fs_time = fs->fs_old_time;
 964                 fs->fs_size = fs->fs_old_size;
 965                 fs->fs_dsize = fs->fs_old_dsize;
 966                 fs->fs_csaddr = fs->fs_old_csaddr;
 967                 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
 968                 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
 969                 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
 970                 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
 971         }
 972         if (fs->fs_magic == FS_UFS1_MAGIC &&
 973             fs->fs_old_inodefmt < FS_44INODEFMT) {
 974                 fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
 975                 fs->fs_qbmask = ~fs->fs_bmask;
 976                 fs->fs_qfmask = ~fs->fs_fmask;
 977         }
 978         if (fs->fs_magic == FS_UFS1_MAGIC) {
 979                 ump->um_savedmaxfilesize = fs->fs_maxfilesize;
 980                 maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
 981                 if (fs->fs_maxfilesize > maxfilesize)
 982                         fs->fs_maxfilesize = maxfilesize;
 983         }
 984         /* Compatibility for old filesystems */
 985         if (fs->fs_avgfilesize <= 0)
 986                 fs->fs_avgfilesize = AVFILESIZ;
 987         if (fs->fs_avgfpdir <= 0)
 988                 fs->fs_avgfpdir = AFPDIR;
 989         if (bigcgs) {
 990                 fs->fs_save_cgsize = fs->fs_cgsize;
 991                 fs->fs_cgsize = fs->fs_bsize;
 992         }
 993 }
 994
 995 /*
 996  * Unwinding superblock updates for old filesystems.
 997  * See ffs_oldfscompat_read above for details.
 998  *
 999  * XXX - Parts get retired eventually.
1000  * Unfortunately new bits get added.
1001  */
1002 static void
1003 ffs_oldfscompat_write(fs, ump)
1004         struct fs *fs;
1005         struct ufsmount *ump;
1006 {
1007
1008         /*
1009          * Copy back UFS2 updated fields that UFS1 inspects.
1010          */
1011         if (fs->fs_magic == FS_UFS1_MAGIC) {
1012                 fs->fs_old_time = fs->fs_time;
1013                 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1014                 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1015                 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1016                 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1017                 fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1018         }
1019         if (bigcgs) {
1020                 fs->fs_cgsize = fs->fs_save_cgsize;
1021                 fs->fs_save_cgsize = 0;
1022         }
1023 }
1024
1025 /*
1026  * unmount system call
1027  */
1028 static int
1029 ffs_unmount(mp, mntflags)
1030         struct mount *mp;
1031         int mntflags;
1032 {
1033         struct thread *td;
1034         struct ufsmount *ump = VFSTOUFS(mp);
1035         struct fs *fs;
1036         int error, flags, susp;
1037 #ifdef UFS_EXTATTR
1038         int e_restart;
1039 #endif
1040
1041         flags = 0;
1042         td = curthread;
1043         fs = ump->um_fs;
1044         if (mntflags & MNT_FORCE) {
1045                 flags |= FORCECLOSE;
1046                 susp = fs->fs_ronly != 0;
1047         } else
1048                 susp = 0;
1049 #ifdef UFS_EXTATTR
1050         if ((error = ufs_extattr_stop(mp, td))) {
1051                 if (error != EOPNOTSUPP)
1052                         printf("ffs_unmount: ufs_extattr_stop returned %d\n",
1053                             error);
1054                 e_restart = 0;
1055         } else {
1056                 ufs_extattr_uepm_destroy(&ump->um_extattr);
1057                 e_restart = 1;
1058         }
1059 #endif
1060         if (susp) {
1061                 /*
1062                  * dounmount already called vn_start_write().
1063                  */
1064                 for (;;) {
1065                         vn_finished_write(mp);
1066                         if ((error = vfs_write_suspend(mp)) != 0)
1067                                 return (error);
1068                         MNT_ILOCK(mp);
1069                         if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
1070                                 mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
1071                                     MNTK_SUSPEND2);
1072                                 wakeup(&mp->mnt_flag);
1073                                 MNT_IUNLOCK(mp);
1074                                 td->td_pflags |= TDP_IGNSUSP;
1075                                 break;
1076                         }
1077                         MNT_IUNLOCK(mp);
1078                         vn_start_write(NULL, &mp, V_WAIT);
1079                 }
1080         }
1081         if (mp->mnt_flag & MNT_SOFTDEP)
1082                 error = softdep_flushfiles(mp, flags, td);
1083         else
1084                 error = ffs_flushfiles(mp, flags, td);
1085         if (error != 0 && error != ENXIO)
1086                 goto fail;
1087
1088         UFS_LOCK(ump);
1089         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1090                 printf("%s: unmount pending error: blocks %jd files %d\n",
1091                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1092                     fs->fs_pendinginodes);
1093                 fs->fs_pendingblocks = 0;
1094                 fs->fs_pendinginodes = 0;
1095         }
1096         UFS_UNLOCK(ump);
1097         if (fs->fs_ronly == 0) {
1098                 fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1099                 error = ffs_sbupdate(ump, MNT_WAIT, 0);
1100                 if (error && error != ENXIO) {
1101                         fs->fs_clean = 0;
1102                         goto fail;
1103                 }
1104         }
1105         if (susp) {
1106                 vfs_write_resume(mp);
1107                 vn_start_write(NULL, &mp, V_WAIT);
1108         }
1109         DROP_GIANT();
1110         g_topology_lock();
1111         g_vfs_close(ump->um_cp);
1112         g_topology_unlock();
1113         PICKUP_GIANT();
1114         vrele(ump->um_devvp);
1115         dev_rel(ump->um_dev);
1116         mtx_destroy(UFS_MTX(ump));
1117         if (mp->mnt_gjprovider != NULL) {
1118                 free(mp->mnt_gjprovider, M_UFSMNT);
1119                 mp->mnt_gjprovider = NULL;
1120         }
1121         free(fs->fs_csp, M_UFSMNT);
1122         free(fs, M_UFSMNT);
1123         free(ump, M_UFSMNT);
1124         mp->mnt_data = NULL;
1125         MNT_ILOCK(mp);
1126         mp->mnt_flag &= ~MNT_LOCAL;
1127         MNT_IUNLOCK(mp);
1128         return (error);
1129
1130 fail:
1131         if (susp) {
1132                 vfs_write_resume(mp);
1133                 vn_start_write(NULL, &mp, V_WAIT);
1134         }
1135 #ifdef UFS_EXTATTR
1136         if (e_restart) {
1137                 ufs_extattr_uepm_init(&ump->um_extattr);
1138 #ifdef UFS_EXTATTR_AUTOSTART
1139                 (void) ufs_extattr_autostart(mp, td);
1140 #endif
1141         }
1142 #endif
1143
1144         return (error);
1145 }
1146
1147 /*
1148  * Flush out all the files in a filesystem.
1149  */
1150 int
1151 ffs_flushfiles(mp, flags, td)
1152         struct mount *mp;
1153         int flags;
1154         struct thread *td;
1155 {
1156         struct ufsmount *ump;
1157         int error;
1158
1159         ump = VFSTOUFS(mp);
1160 #ifdef QUOTA
1161         if (mp->mnt_flag & MNT_QUOTA) {
1162                 int i;
1163                 error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1164                 if (error)
1165                         return (error);
1166                 for (i = 0; i < MAXQUOTAS; i++) {
1167                         quotaoff(td, mp, i);
1168                 }
1169                 /*
1170                  * Here we fall through to vflush again to ensure
1171                  * that we have gotten rid of all the system vnodes.
1172                  */
1173         }
1174 #endif
1175         ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1176         if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1177                 if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1178                         return (error);
1179                 ffs_snapshot_unmount(mp);
1180                 flags |= FORCECLOSE;
1181                 /*
1182                  * Here we fall through to vflush again to ensure
1183                  * that we have gotten rid of all the system vnodes.
1184                  */
1185         }
1186         /*
1187          * Flush all the files.
1188          */
1189         if ((error = vflush(mp, 0, flags, td)) != 0)
1190                 return (error);
1191         /*
1192          * Flush filesystem metadata.
1193          */
1194         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1195         error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1196         VOP_UNLOCK(ump->um_devvp, 0);
1197         return (error);
1198 }
1199
1200 /*
1201  * Get filesystem statistics.
1202  */
1203 static int
1204 ffs_statfs(mp, sbp)
1205         struct mount *mp;
1206         struct statfs *sbp;
1207 {
1208         struct ufsmount *ump;
1209         struct fs *fs;
1210
1211         ump = VFSTOUFS(mp);
1212         fs = ump->um_fs;
1213         if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1214                 panic("ffs_statfs");
1215         sbp->f_version = STATFS_VERSION;
1216         sbp->f_bsize = fs->fs_fsize;
1217         sbp->f_iosize = fs->fs_bsize;
1218         sbp->f_blocks = fs->fs_dsize;
1219         UFS_LOCK(ump);
1220         sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1221             fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1222         sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1223             dbtofsb(fs, fs->fs_pendingblocks);
1224         sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1225         sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1226         UFS_UNLOCK(ump);
1227         sbp->f_namemax = NAME_MAX;
1228         return (0);
1229 }
1230
1231 /*
1232  * Go through the disk queues to initiate sandbagged IO;
1233  * go through the inodes to write those that have been modified;
1234  * initiate the writing of the super block if it has been modified.
1235  *
1236  * Note: we are always called with the filesystem marked `MPBUSY'.
1237  */
1238 static int
1239 ffs_sync(mp, waitfor)
1240         struct mount *mp;
1241         int waitfor;
1242 {
1243         struct vnode *mvp, *vp, *devvp;
1244         struct thread *td;
1245         struct inode *ip;
1246         struct ufsmount *ump = VFSTOUFS(mp);
1247         struct fs *fs;
1248         int error, count, wait, lockreq, allerror = 0;
1249         int suspend;
1250         int suspended;
1251         int secondary_writes;
1252         int secondary_accwrites;
1253         int softdep_deps;
1254         int softdep_accdeps;
1255         struct bufobj *bo;
1256
1257         td = curthread;
1258         fs = ump->um_fs;
1259         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
1260                 printf("fs = %s\n", fs->fs_fsmnt);
1261                 panic("ffs_sync: rofs mod");
1262         }
1263         /*
1264          * Write back each (modified) inode.
1265          */
1266         wait = 0;
1267         suspend = 0;
1268         suspended = 0;
1269         lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1270         if (waitfor == MNT_SUSPEND) {
1271                 suspend = 1;
1272                 waitfor = MNT_WAIT;
1273         }
1274         if (waitfor == MNT_WAIT) {
1275                 wait = 1;
1276                 lockreq = LK_EXCLUSIVE;
1277         }
1278         lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1279         MNT_ILOCK(mp);
1280 loop:
1281         /* Grab snapshot of secondary write counts */
1282         secondary_writes = mp->mnt_secondary_writes;
1283         secondary_accwrites = mp->mnt_secondary_accwrites;
1284
1285         /* Grab snapshot of softdep dependency counts */
1286         MNT_IUNLOCK(mp);
1287         softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1288         MNT_ILOCK(mp);
1289
1290         MNT_VNODE_FOREACH(vp, mp, mvp) {
1291                 /*
1292                  * Depend on the mntvnode_slock to keep things stable enough
1293                  * for a quick test.  Since there might be hundreds of
1294                  * thousands of vnodes, we cannot afford even a subroutine
1295                  * call unless there's a good chance that we have work to do.
1296                  */
1297                 VI_LOCK(vp);
1298                 if (vp->v_iflag & VI_DOOMED) {
1299                         VI_UNLOCK(vp);
1300                         continue;
1301                 }
1302                 ip = VTOI(vp);
1303                 if (vp->v_type == VNON || ((ip->i_flag &
1304                     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1305                     vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1306                         VI_UNLOCK(vp);
1307                         continue;
1308                 }
1309                 MNT_IUNLOCK(mp);
1310                 if ((error = vget(vp, lockreq, td)) != 0) {
1311                         MNT_ILOCK(mp);
1312                         if (error == ENOENT || error == ENOLCK) {
1313                                 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1314                                 goto loop;
1315                         }
1316                         continue;
1317                 }
1318                 if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1319                         allerror = error;
1320                 vput(vp);
1321                 MNT_ILOCK(mp);
1322         }
1323         MNT_IUNLOCK(mp);
1324         /*
1325          * Force stale filesystem control information to be flushed.
1326          */
1327         if (waitfor == MNT_WAIT) {
1328                 if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1329                         allerror = error;
1330                 /* Flushed work items may create new vnodes to clean */
1331                 if (allerror == 0 && count) {
1332                         MNT_ILOCK(mp);
1333                         goto loop;
1334                 }
1335         }
1336 #ifdef QUOTA
1337         qsync(mp);
1338 #endif
1339         devvp = ump->um_devvp;
1340         bo = &devvp->v_bufobj;
1341         BO_LOCK(bo);
1342         if (waitfor != MNT_LAZY &&
1343             (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1344                 BO_UNLOCK(bo);
1345                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1346                 if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1347                         allerror = error;
1348                 VOP_UNLOCK(devvp, 0);
1349                 if (allerror == 0 && waitfor == MNT_WAIT) {
1350                         MNT_ILOCK(mp);
1351                         goto loop;
1352                 }
1353         } else if (suspend != 0) {
1354                 if (softdep_check_suspend(mp,
1355                                           devvp,
1356                                           softdep_deps,
1357                                           softdep_accdeps,
1358                                           secondary_writes,
1359                                           secondary_accwrites) != 0)
1360                         goto loop;      /* More work needed */
1361                 mtx_assert(MNT_MTX(mp), MA_OWNED);
1362                 mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1363                 MNT_IUNLOCK(mp);
1364                 suspended = 1;
1365         } else
1366                 BO_UNLOCK(bo);
1367         /*
1368          * Write back modified superblock.
1369          */
1370         if (fs->fs_fmod != 0 &&
1371             (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1372                 allerror = error;
1373         return (allerror);
1374 }
1375
1376 int
1377 ffs_vget(mp, ino, flags, vpp)
1378         struct mount *mp;
1379         ino_t ino;
1380         int flags;
1381         struct vnode **vpp;
1382 {
1383         return (ffs_vgetf(mp, ino, flags, vpp, 0));
1384 }
1385
1386 int
1387 ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1388         struct mount *mp;
1389         ino_t ino;
1390         int flags;
1391         struct vnode **vpp;
1392         int ffs_flags;
1393 {
1394         struct fs *fs;
1395         struct inode *ip;
1396         struct ufsmount *ump;
1397         struct buf *bp;
1398         struct vnode *vp;
1399         struct cdev *dev;
1400         int error;
1401
1402         error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1403         if (error || *vpp != NULL)
1404                 return (error);
1405
1406         /*
1407          * We must promote to an exclusive lock for vnode creation.  This
1408          * can happen if lookup is passed LOCKSHARED.
1409          */
1410         if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1411                 flags &= ~LK_TYPE_MASK;
1412                 flags |= LK_EXCLUSIVE;
1413         }
1414
1415         /*
1416          * We do not lock vnode creation as it is believed to be too
1417          * expensive for such rare case as simultaneous creation of vnode
1418          * for same ino by different processes. We just allow them to race
1419          * and check later to decide who wins. Let the race begin!
1420          */
1421
1422         ump = VFSTOUFS(mp);
1423         dev = ump->um_dev;
1424         fs = ump->um_fs;
1425
1426         /*
1427          * If this malloc() is performed after the getnewvnode()
1428          * it might block, leaving a vnode with a NULL v_data to be
1429          * found by ffs_sync() if a sync happens to fire right then,
1430          * which will cause a panic because ffs_sync() blindly
1431          * dereferences vp->v_data (as well it should).
1432          */
1433         ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1434
1435         /* Allocate a new vnode/inode. */
1436         if (fs->fs_magic == FS_UFS1_MAGIC)
1437                 error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1438         else
1439                 error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1440         if (error) {
1441                 *vpp = NULL;
1442                 uma_zfree(uma_inode, ip);
1443                 return (error);
1444         }
1445         /*
1446          * FFS supports recursive locking.
1447          */
1448         VN_LOCK_AREC(vp);
1449         vp->v_data = ip;
1450         vp->v_bufobj.bo_bsize = fs->fs_bsize;
1451         ip->i_vnode = vp;
1452         ip->i_ump = ump;
1453         ip->i_fs = fs;
1454         ip->i_dev = dev;
1455         ip->i_number = ino;
1456         ip->i_ea_refs = 0;
1457 #ifdef QUOTA
1458         {
1459                 int i;
1460                 for (i = 0; i < MAXQUOTAS; i++)
1461                         ip->i_dquot[i] = NODQUOT;
1462         }
1463 #endif
1464
1465         lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1466         if (ffs_flags & FFSV_FORCEINSMQ)
1467                 vp->v_vflag |= VV_FORCEINSMQ;
1468         error = insmntque(vp, mp);
1469         if (error != 0) {
1470                 uma_zfree(uma_inode, ip);
1471                 *vpp = NULL;
1472                 return (error);
1473         }
1474         vp->v_vflag &= ~VV_FORCEINSMQ;
1475         error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1476         if (error || *vpp != NULL)
1477                 return (error);
1478
1479         /* Read in the disk contents for the inode, copy into the inode. */
1480         error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1481             (int)fs->fs_bsize, NOCRED, &bp);
1482         if (error) {
1483                 /*
1484                  * The inode does not contain anything useful, so it would
1485                  * be misleading to leave it on its hash chain. With mode
1486                  * still zero, it will be unlinked and returned to the free
1487                  * list by vput().
1488                  */
1489                 brelse(bp);
1490                 vput(vp);
1491                 *vpp = NULL;
1492                 return (error);
1493         }
1494         if (ip->i_ump->um_fstype == UFS1)
1495                 ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1496         else
1497                 ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1498         ffs_load_inode(bp, ip, fs, ino);
1499         if (DOINGSOFTDEP(vp))
1500                 softdep_load_inodeblock(ip);
1501         else
1502                 ip->i_effnlink = ip->i_nlink;
1503         bqrelse(bp);
1504
1505         /*
1506          * Initialize the vnode from the inode, check for aliases.
1507          * Note that the underlying vnode may have changed.
1508          */
1509         if (ip->i_ump->um_fstype == UFS1)
1510                 error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1511         else
1512                 error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1513         if (error) {
1514                 vput(vp);
1515                 *vpp = NULL;
1516                 return (error);
1517         }
1518
1519         /*
1520          * Finish inode initialization.
1521          */
1522         if (vp->v_type != VFIFO) {
1523                 /* FFS supports shared locking for all files except fifos. */
1524                 VN_LOCK_ASHARE(vp);
1525         }
1526
1527         /*
1528          * Set up a generation number for this inode if it does not
1529          * already have one. This should only happen on old filesystems.
1530          */
1531         if (ip->i_gen == 0) {
1532                 ip->i_gen = arc4random() / 2 + 1;
1533                 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1534                         ip->i_flag |= IN_MODIFIED;
1535                         DIP_SET(ip, i_gen, ip->i_gen);
1536                 }
1537         }
1538         /*
1539          * Ensure that uid and gid are correct. This is a temporary
1540          * fix until fsck has been changed to do the update.
1541          */
1542         if (fs->fs_magic == FS_UFS1_MAGIC &&            /* XXX */
1543             fs->fs_old_inodefmt < FS_44INODEFMT) {      /* XXX */
1544                 ip->i_uid = ip->i_din1->di_ouid;        /* XXX */
1545                 ip->i_gid = ip->i_din1->di_ogid;        /* XXX */
1546         }                                               /* XXX */
1547
1548 #ifdef MAC
1549         if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1550                 /*
1551                  * If this vnode is already allocated, and we're running
1552                  * multi-label, attempt to perform a label association
1553                  * from the extended attributes on the inode.
1554                  */
1555                 error = mac_vnode_associate_extattr(mp, vp);
1556                 if (error) {
1557                         /* ufs_inactive will release ip->i_devvp ref. */
1558                         vput(vp);
1559                         *vpp = NULL;
1560                         return (error);
1561                 }
1562         }
1563 #endif
1564
1565         *vpp = vp;
1566         return (0);
1567 }
1568
1569 /*
1570  * File handle to vnode
1571  *
1572  * Have to be really careful about stale file handles:
1573  * - check that the inode number is valid
1574  * - call ffs_vget() to get the locked inode
1575  * - check for an unallocated inode (i_mode == 0)
1576  * - check that the given client host has export rights and return
1577  *   those rights via. exflagsp and credanonp
1578  */
1579 static int
1580 ffs_fhtovp(mp, fhp, vpp)
1581         struct mount *mp;
1582         struct fid *fhp;
1583         struct vnode **vpp;
1584 {
1585         struct ufid *ufhp;
1586         struct fs *fs;
1587
1588         ufhp = (struct ufid *)fhp;
1589         fs = VFSTOUFS(mp)->um_fs;
1590         if (ufhp->ufid_ino < ROOTINO ||
1591             ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1592                 return (ESTALE);
1593         return (ufs_fhtovp(mp, ufhp, vpp));
1594 }
1595
1596 /*
1597  * Initialize the filesystem.
1598  */
1599 static int
1600 ffs_init(vfsp)
1601         struct vfsconf *vfsp;
1602 {
1603
1604         softdep_initialize();
1605         return (ufs_init(vfsp));
1606 }
1607
1608 /*
1609  * Undo the work of ffs_init().
1610  */
1611 static int
1612 ffs_uninit(vfsp)
1613         struct vfsconf *vfsp;
1614 {
1615         int ret;
1616
1617         ret = ufs_uninit(vfsp);
1618         softdep_uninitialize();
1619         return (ret);
1620 }
1621
1622 /*
1623  * Write a superblock and associated information back to disk.
1624  */
1625 int
1626 ffs_sbupdate(mp, waitfor, suspended)
1627         struct ufsmount *mp;
1628         int waitfor;
1629         int suspended;
1630 {
1631         struct fs *fs = mp->um_fs;
1632         struct buf *sbbp;
1633         struct buf *bp;
1634         int blks;
1635         void *space;
1636         int i, size, error, allerror = 0;
1637
1638         if (fs->fs_ronly == 1 &&
1639             (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1640             (MNT_RDONLY | MNT_UPDATE))
1641                 panic("ffs_sbupdate: write read-only filesystem");
1642         /*
1643          * We use the superblock's buf to serialize calls to ffs_sbupdate().
1644          */
1645         sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1646             0, 0, 0);
1647         /*
1648          * First write back the summary information.
1649          */
1650         blks = howmany(fs->fs_cssize, fs->fs_fsize);
1651         space = fs->fs_csp;
1652         for (i = 0; i < blks; i += fs->fs_frag) {
1653                 size = fs->fs_bsize;
1654                 if (i + fs->fs_frag > blks)
1655                         size = (blks - i) * fs->fs_fsize;
1656                 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1657                     size, 0, 0, 0);
1658                 bcopy(space, bp->b_data, (u_int)size);
1659                 space = (char *)space + size;
1660                 if (suspended)
1661                         bp->b_flags |= B_VALIDSUSPWRT;
1662                 if (waitfor != MNT_WAIT)
1663                         bawrite(bp);
1664                 else if ((error = bwrite(bp)) != 0)
1665                         allerror = error;
1666         }
1667         /*
1668          * Now write back the superblock itself. If any errors occurred
1669          * up to this point, then fail so that the superblock avoids
1670          * being written out as clean.
1671          */
1672         if (allerror) {
1673                 brelse(sbbp);
1674                 return (allerror);
1675         }
1676         bp = sbbp;
1677         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1678             (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1679                 printf("%s: correcting fs_sblockloc from %jd to %d\n",
1680                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1681                 fs->fs_sblockloc = SBLOCK_UFS1;
1682         }
1683         if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1684             (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1685                 printf("%s: correcting fs_sblockloc from %jd to %d\n",
1686                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1687                 fs->fs_sblockloc = SBLOCK_UFS2;
1688         }
1689         fs->fs_fmod = 0;
1690         fs->fs_time = time_second;
1691         bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1692         ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1693         if (suspended)
1694                 bp->b_flags |= B_VALIDSUSPWRT;
1695         if (waitfor != MNT_WAIT)
1696                 bawrite(bp);
1697         else if ((error = bwrite(bp)) != 0)
1698                 allerror = error;
1699         return (allerror);
1700 }
1701
1702 static int
1703 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1704         int attrnamespace, const char *attrname)
1705 {
1706
1707 #ifdef UFS_EXTATTR
1708         return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1709             attrname));
1710 #else
1711         return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1712             attrname));
1713 #endif
1714 }
1715
1716 static void
1717 ffs_ifree(struct ufsmount *ump, struct inode *ip)
1718 {
1719
1720         if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1721                 uma_zfree(uma_ufs1, ip->i_din1);
1722         else if (ip->i_din2 != NULL)
1723                 uma_zfree(uma_ufs2, ip->i_din2);
1724         uma_zfree(uma_inode, ip);
1725 }
1726
1727 static int dobkgrdwrite = 1;
1728 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1729     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1730
1731 /*
1732  * Complete a background write started from bwrite.
1733  */
1734 static void
1735 ffs_backgroundwritedone(struct buf *bp)
1736 {
1737         struct bufobj *bufobj;
1738         struct buf *origbp;
1739
1740         /*
1741          * Find the original buffer that we are writing.
1742          */
1743         bufobj = bp->b_bufobj;
1744         BO_LOCK(bufobj);
1745         if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1746                 panic("backgroundwritedone: lost buffer");
1747         /* Grab an extra reference to be dropped by the bufdone() below. */
1748         bufobj_wrefl(bufobj);
1749         BO_UNLOCK(bufobj);
1750         /*
1751          * Process dependencies then return any unfinished ones.
1752          */
1753         if (!LIST_EMPTY(&bp->b_dep))
1754                 buf_complete(bp);
1755 #ifdef SOFTUPDATES
1756         if (!LIST_EMPTY(&bp->b_dep))
1757                 softdep_move_dependencies(bp, origbp);
1758 #endif
1759         /*
1760          * This buffer is marked B_NOCACHE so when it is released
1761          * by biodone it will be tossed.
1762          */
1763         bp->b_flags |= B_NOCACHE;
1764         bp->b_flags &= ~B_CACHE;
1765         bufdone(bp);
1766         BO_LOCK(bufobj);
1767         /*
1768          * Clear the BV_BKGRDINPROG flag in the original buffer
1769          * and awaken it if it is waiting for the write to complete.
1770          * If BV_BKGRDINPROG is not set in the original buffer it must
1771          * have been released and re-instantiated - which is not legal.
1772          */
1773         KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1774             ("backgroundwritedone: lost buffer2"));
1775         origbp->b_vflags &= ~BV_BKGRDINPROG;
1776         if (origbp->b_vflags & BV_BKGRDWAIT) {
1777                 origbp->b_vflags &= ~BV_BKGRDWAIT;
1778                 wakeup(&origbp->b_xflags);
1779         }
1780         BO_UNLOCK(bufobj);
1781 }
1782
1783
1784 /*
1785  * Write, release buffer on completion.  (Done by iodone
1786  * if async).  Do not bother writing anything if the buffer
1787  * is invalid.
1788  *
1789  * Note that we set B_CACHE here, indicating that buffer is
1790  * fully valid and thus cacheable.  This is true even of NFS
1791  * now so we set it generally.  This could be set either here
1792  * or in biodone() since the I/O is synchronous.  We put it
1793  * here.
1794  */
1795 static int
1796 ffs_bufwrite(struct buf *bp)
1797 {
1798         int oldflags, s;
1799         struct buf *newbp;
1800
1801         CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1802         if (bp->b_flags & B_INVAL) {
1803                 brelse(bp);
1804                 return (0);
1805         }
1806
1807         oldflags = bp->b_flags;
1808
1809         if (!BUF_ISLOCKED(bp))
1810                 panic("bufwrite: buffer is not busy???");
1811         s = splbio();
1812         /*
1813          * If a background write is already in progress, delay
1814          * writing this block if it is asynchronous. Otherwise
1815          * wait for the background write to complete.
1816          */
1817         BO_LOCK(bp->b_bufobj);
1818         if (bp->b_vflags & BV_BKGRDINPROG) {
1819                 if (bp->b_flags & B_ASYNC) {
1820                         BO_UNLOCK(bp->b_bufobj);
1821                         splx(s);
1822                         bdwrite(bp);
1823                         return (0);
1824                 }
1825                 bp->b_vflags |= BV_BKGRDWAIT;
1826                 msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1827                 if (bp->b_vflags & BV_BKGRDINPROG)
1828                         panic("bufwrite: still writing");
1829         }
1830         BO_UNLOCK(bp->b_bufobj);
1831
1832         /* Mark the buffer clean */
1833         bundirty(bp);
1834
1835         /*
1836          * If this buffer is marked for background writing and we
1837          * do not have to wait for it, make a copy and write the
1838          * copy so as to leave this buffer ready for further use.
1839          *
1840          * This optimization eats a lot of memory.  If we have a page
1841          * or buffer shortfall we can't do it.
1842          */
1843         if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1844             (bp->b_flags & B_ASYNC) &&
1845             !vm_page_count_severe() &&
1846             !buf_dirty_count_severe()) {
1847                 KASSERT(bp->b_iodone == NULL,
1848                     ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1849
1850                 /* get a new block */
1851                 newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
1852                 if (newbp == NULL)
1853                         goto normal_write;
1854
1855                 /*
1856                  * set it to be identical to the old block.  We have to
1857                  * set b_lblkno and BKGRDMARKER before calling bgetvp()
1858                  * to avoid confusing the splay tree and gbincore().
1859                  */
1860                 memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1861                 newbp->b_lblkno = bp->b_lblkno;
1862                 newbp->b_xflags |= BX_BKGRDMARKER;
1863                 BO_LOCK(bp->b_bufobj);
1864                 bp->b_vflags |= BV_BKGRDINPROG;
1865                 bgetvp(bp->b_vp, newbp);
1866                 BO_UNLOCK(bp->b_bufobj);
1867                 newbp->b_bufobj = &bp->b_vp->v_bufobj;
1868                 newbp->b_blkno = bp->b_blkno;
1869                 newbp->b_offset = bp->b_offset;
1870                 newbp->b_iodone = ffs_backgroundwritedone;
1871                 newbp->b_flags |= B_ASYNC;
1872                 newbp->b_flags &= ~B_INVAL;
1873
1874 #ifdef SOFTUPDATES
1875                 /* move over the dependencies */
1876                 if (!LIST_EMPTY(&bp->b_dep))
1877                         softdep_move_dependencies(bp, newbp);
1878 #endif
1879
1880                 /*
1881                  * Initiate write on the copy, release the original to
1882                  * the B_LOCKED queue so that it cannot go away until
1883                  * the background write completes. If not locked it could go
1884                  * away and then be reconstituted while it was being written.
1885                  * If the reconstituted buffer were written, we could end up
1886                  * with two background copies being written at the same time.
1887                  */
1888                 bqrelse(bp);
1889                 bp = newbp;
1890         }
1891
1892         /* Let the normal bufwrite do the rest for us */
1893 normal_write:
1894         return (bufwrite(bp));
1895 }
1896
1897
1898 static void
1899 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1900 {
1901         struct vnode *vp;
1902         int error;
1903         struct buf *tbp;
1904
1905         vp = bo->__bo_vnode;
1906         if (bp->b_iocmd == BIO_WRITE) {
1907                 if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
1908                     bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
1909                     (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
1910                         panic("ffs_geom_strategy: bad I/O");
1911                 bp->b_flags &= ~B_VALIDSUSPWRT;
1912                 if ((vp->v_vflag & VV_COPYONWRITE) &&
1913                     vp->v_rdev->si_snapdata != NULL) {
1914                         if ((bp->b_flags & B_CLUSTER) != 0) {
1915                                 runningbufwakeup(bp);
1916                                 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1917                                               b_cluster.cluster_entry) {
1918                                         error = ffs_copyonwrite(vp, tbp);
1919                                         if (error != 0 &&
1920                                             error != EOPNOTSUPP) {
1921                                                 bp->b_error = error;
1922                                                 bp->b_ioflags |= BIO_ERROR;
1923                                                 bufdone(bp);
1924                                                 return;
1925                                         }
1926                                 }
1927                                 bp->b_runningbufspace = bp->b_bufsize;
1928                                 atomic_add_long(&runningbufspace,
1929                                                bp->b_runningbufspace);
1930                         } else {
1931                                 error = ffs_copyonwrite(vp, bp);
1932                                 if (error != 0 && error != EOPNOTSUPP) {
1933                                         bp->b_error = error;
1934                                         bp->b_ioflags |= BIO_ERROR;
1935                                         bufdone(bp);
1936                                         return;
1937                                 }
1938                         }
1939                 }
1940 #ifdef SOFTUPDATES
1941                 if ((bp->b_flags & B_CLUSTER) != 0) {
1942                         TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1943                                       b_cluster.cluster_entry) {
1944                                 if (!LIST_EMPTY(&tbp->b_dep))
1945                                         buf_start(tbp);
1946                         }
1947                 } else {
1948                         if (!LIST_EMPTY(&bp->b_dep))
1949                                 buf_start(bp);
1950                 }
1951
1952 #endif
1953         }
1954         g_vfs_strategy(bo, bp);
1955 }
1956
1957 #ifdef  DDB
1958
1959 static void
1960 db_print_ffs(struct ufsmount *ump)
1961 {
1962         db_printf("mp %p %s devvp %p fs %p su_wl %d su_wl_in %d su_deps %d "
1963                   "su_req %d\n",
1964             ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
1965             ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
1966             ump->softdep_on_worklist_inprogress, ump->softdep_deps,
1967             ump->softdep_req);
1968 }
1969
1970 DB_SHOW_COMMAND(ffs, db_show_ffs)
1971 {
1972         struct mount *mp;
1973         struct ufsmount *ump;
1974
1975         if (have_addr) {
1976                 ump = VFSTOUFS((struct mount *)addr);
1977                 db_print_ffs(ump);
1978                 return;
1979         }
1980
1981         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
1982                 if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
1983                         db_print_ffs(VFSTOUFS(mp));
1984         }
1985 }
1986
1987 #endif  /* DDB */