sys/ufs/ffs/ffs_vfsops.c

   1 /*-
   2  * Copyright (c) 1989, 1991, 1993, 1994
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 4. Neither the name of the University nor the names of its contributors
  14  *    may be used to endorse or promote products derived from this software
  15  *    without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  *
  29  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
  30  */
  31
  32 #include <sys/cdefs.h>
  33 __FBSDID("$FreeBSD$");
  34
  35 #include "opt_mac.h"
  36 #include "opt_quota.h"
  37 #include "opt_ufs.h"
  38 #include "opt_ffs.h"
  39 #include "opt_ddb.h"
  40
  41 #include <sys/param.h>
  42 #include <sys/systm.h>
  43 #include <sys/namei.h>
  44 #include <sys/priv.h>
  45 #include <sys/proc.h>
  46 #include <sys/kernel.h>
  47 #include <sys/vnode.h>
  48 #include <sys/mount.h>
  49 #include <sys/bio.h>
  50 #include <sys/buf.h>
  51 #include <sys/conf.h>
  52 #include <sys/fcntl.h>
  53 #include <sys/malloc.h>
  54 #include <sys/mutex.h>
  55
  56 #include <security/mac/mac_framework.h>
  57
  58 #include <ufs/ufs/extattr.h>
  59 #include <ufs/ufs/gjournal.h>
  60 #include <ufs/ufs/quota.h>
  61 #include <ufs/ufs/ufsmount.h>
  62 #include <ufs/ufs/inode.h>
  63 #include <ufs/ufs/ufs_extern.h>
  64
  65 #include <ufs/ffs/fs.h>
  66 #include <ufs/ffs/ffs_extern.h>
  67
  68 #include <vm/vm.h>
  69 #include <vm/uma.h>
  70 #include <vm/vm_page.h>
  71
  72 #include <geom/geom.h>
  73 #include <geom/geom_vfs.h>
  74
  75 #include <ddb/ddb.h>
  76
  77 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
  78
  79 static int      ffs_reload(struct mount *, struct thread *);
  80 static int      ffs_mountfs(struct vnode *, struct mount *, struct thread *);
  81 static void     ffs_oldfscompat_read(struct fs *, struct ufsmount *,
  82                     ufs2_daddr_t);
  83 static void     ffs_oldfscompat_write(struct fs *, struct ufsmount *);
  84 static void     ffs_ifree(struct ufsmount *ump, struct inode *ip);
  85 static vfs_init_t ffs_init;
  86 static vfs_uninit_t ffs_uninit;
  87 static vfs_extattrctl_t ffs_extattrctl;
  88 static vfs_cmount_t ffs_cmount;
  89 static vfs_unmount_t ffs_unmount;
  90 static vfs_mount_t ffs_mount;
  91 static vfs_statfs_t ffs_statfs;
  92 static vfs_fhtovp_t ffs_fhtovp;
  93 static vfs_sync_t ffs_sync;
  94
  95 static struct vfsops ufs_vfsops = {
  96         .vfs_extattrctl =       ffs_extattrctl,
  97         .vfs_fhtovp =           ffs_fhtovp,
  98         .vfs_init =             ffs_init,
  99         .vfs_mount =            ffs_mount,
 100         .vfs_cmount =           ffs_cmount,
 101         .vfs_quotactl =         ufs_quotactl,
 102         .vfs_root =             ufs_root,
 103         .vfs_statfs =           ffs_statfs,
 104         .vfs_sync =             ffs_sync,
 105         .vfs_uninit =           ffs_uninit,
 106         .vfs_unmount =          ffs_unmount,
 107         .vfs_vget =             ffs_vget,
 108         .vfs_susp_clean =       process_deferred_inactive,
 109 };
 110
 111 VFS_SET(ufs_vfsops, ufs, 0);
 112 MODULE_VERSION(ufs, 1);
 113
 114 static b_strategy_t ffs_geom_strategy;
 115 static b_write_t ffs_bufwrite;
 116
 117 static struct buf_ops ffs_ops = {
 118         .bop_name =     "FFS",
 119         .bop_write =    ffs_bufwrite,
 120         .bop_strategy = ffs_geom_strategy,
 121         .bop_sync =     bufsync,
 122 #ifdef NO_FFS_SNAPSHOT
 123         .bop_bdflush =  bufbdflush,
 124 #else
 125         .bop_bdflush =  ffs_bdflush,
 126 #endif
 127 };
 128
 129 static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
 130     "noclusterw", "noexec", "export", "force", "from", "multilabel",
 131     "snapshot", "nosuid", "suiddir", "nosymfollow", "sync",
 132     "union", NULL };
 133
 134 static int
 135 ffs_mount(struct mount *mp, struct thread *td)
 136 {
 137         struct vnode *devvp;
 138         struct ufsmount *ump = 0;
 139         struct fs *fs;
 140         int error, flags;
 141         u_int mntorflags, mntandnotflags;
 142         mode_t accessmode;
 143         struct nameidata ndp;
 144         char *fspec;
 145
 146         if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
 147                 return (EINVAL);
 148         if (uma_inode == NULL) {
 149                 uma_inode = uma_zcreate("FFS inode",
 150                     sizeof(struct inode), NULL, NULL, NULL, NULL,
 151                     UMA_ALIGN_PTR, 0);
 152                 uma_ufs1 = uma_zcreate("FFS1 dinode",
 153                     sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
 154                     UMA_ALIGN_PTR, 0);
 155                 uma_ufs2 = uma_zcreate("FFS2 dinode",
 156                     sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
 157                     UMA_ALIGN_PTR, 0);
 158         }
 159
 160         fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
 161         if (error)
 162                 return (error);
 163
 164         mntorflags = 0;
 165         mntandnotflags = 0;
 166         if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
 167                 mntorflags |= MNT_ACLS;
 168
 169         if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
 170                 mntorflags |= MNT_SNAPSHOT;
 171                 /*
 172                  * Once we have set the MNT_SNAPSHOT flag, do not
 173                  * persist "snapshot" in the options list.
 174                  */
 175                 vfs_deleteopt(mp->mnt_optnew, "snapshot");
 176                 vfs_deleteopt(mp->mnt_opt, "snapshot");
 177         }
 178
 179         MNT_ILOCK(mp);
 180         mp->mnt_flag = (mp->mnt_flag | mntorflags) & ~mntandnotflags;
 181         MNT_IUNLOCK(mp);
 182         /*
 183          * If updating, check whether changing from read-only to
 184          * read/write; if there is no device name, that's all we do.
 185          */
 186         if (mp->mnt_flag & MNT_UPDATE) {
 187                 ump = VFSTOUFS(mp);
 188                 fs = ump->um_fs;
 189                 devvp = ump->um_devvp;
 190                 if (fs->fs_ronly == 0 &&
 191                     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 192                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 193                                 return (error);
 194                         /*
 195                          * Flush any dirty data.
 196                          */
 197                         if ((error = ffs_sync(mp, MNT_WAIT, td)) != 0) {
 198                                 vn_finished_write(mp);
 199                                 return (error);
 200                         }
 201                         /*
 202                          * Check for and optionally get rid of files open
 203                          * for writing.
 204                          */
 205                         flags = WRITECLOSE;
 206                         if (mp->mnt_flag & MNT_FORCE)
 207                                 flags |= FORCECLOSE;
 208                         if (mp->mnt_flag & MNT_SOFTDEP) {
 209                                 error = softdep_flushfiles(mp, flags, td);
 210                         } else {
 211                                 error = ffs_flushfiles(mp, flags, td);
 212                         }
 213                         if (error) {
 214                                 vn_finished_write(mp);
 215                                 return (error);
 216                         }
 217                         if (fs->fs_pendingblocks != 0 ||
 218                             fs->fs_pendinginodes != 0) {
 219                                 printf("%s: %s: blocks %jd files %d\n",
 220                                     fs->fs_fsmnt, "update error",
 221                                     (intmax_t)fs->fs_pendingblocks,
 222                                     fs->fs_pendinginodes);
 223                                 fs->fs_pendingblocks = 0;
 224                                 fs->fs_pendinginodes = 0;
 225                         }
 226                         if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
 227                                 fs->fs_clean = 1;
 228                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 229                                 fs->fs_ronly = 0;
 230                                 fs->fs_clean = 0;
 231                                 vn_finished_write(mp);
 232                                 return (error);
 233                         }
 234                         vn_finished_write(mp);
 235                         DROP_GIANT();
 236                         g_topology_lock();
 237                         g_access(ump->um_cp, 0, -1, 0);
 238                         g_topology_unlock();
 239                         PICKUP_GIANT();
 240                         fs->fs_ronly = 1;
 241                         MNT_ILOCK(mp);
 242                         mp->mnt_flag |= MNT_RDONLY;
 243                         MNT_IUNLOCK(mp);
 244                 }
 245                 if ((mp->mnt_flag & MNT_RELOAD) &&
 246                     (error = ffs_reload(mp, td)) != 0)
 247                         return (error);
 248                 if (fs->fs_ronly &&
 249                     !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 250                         /*
 251                          * If upgrade to read-write by non-root, then verify
 252                          * that user has necessary permissions on the device.
 253                          */
 254                         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 255                         error = VOP_ACCESS(devvp, VREAD | VWRITE,
 256                             td->td_ucred, td);
 257                         if (error)
 258                                 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 259                         if (error) {
 260                                 VOP_UNLOCK(devvp, 0);
 261                                 return (error);
 262                         }
 263                         VOP_UNLOCK(devvp, 0);
 264                         fs->fs_flags &= ~FS_UNCLEAN;
 265                         if (fs->fs_clean == 0) {
 266                                 fs->fs_flags |= FS_UNCLEAN;
 267                                 if ((mp->mnt_flag & MNT_FORCE) ||
 268                                     ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
 269                                      (fs->fs_flags & FS_DOSOFTDEP))) {
 270                                         printf("WARNING: %s was not %s\n",
 271                                            fs->fs_fsmnt, "properly dismounted");
 272                                 } else {
 273                                         printf(
 274 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 275                                             fs->fs_fsmnt);
 276                                         return (EPERM);
 277                                 }
 278                         }
 279                         DROP_GIANT();
 280                         g_topology_lock();
 281                         /*
 282                          * If we're the root device, we may not have an E count
 283                          * yet, get it now.
 284                          */
 285                         if (ump->um_cp->ace == 0)
 286                                 error = g_access(ump->um_cp, 0, 1, 1);
 287                         else
 288                                 error = g_access(ump->um_cp, 0, 1, 0);
 289                         g_topology_unlock();
 290                         PICKUP_GIANT();
 291                         if (error)
 292                                 return (error);
 293                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 294                                 return (error);
 295                         fs->fs_ronly = 0;
 296                         MNT_ILOCK(mp);
 297                         mp->mnt_flag &= ~MNT_RDONLY;
 298                         MNT_IUNLOCK(mp);
 299                         fs->fs_clean = 0;
 300                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 301                                 vn_finished_write(mp);
 302                                 return (error);
 303                         }
 304                         /* check to see if we need to start softdep */
 305                         if ((fs->fs_flags & FS_DOSOFTDEP) &&
 306                             (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
 307                                 vn_finished_write(mp);
 308                                 return (error);
 309                         }
 310                         if (fs->fs_snapinum[0] != 0)
 311                                 ffs_snapshot_mount(mp);
 312                         vn_finished_write(mp);
 313                 }
 314                 /*
 315                  * Soft updates is incompatible with "async",
 316                  * so if we are doing softupdates stop the user
 317                  * from setting the async flag in an update.
 318                  * Softdep_mount() clears it in an initial mount
 319                  * or ro->rw remount.
 320                  */
 321                 if (mp->mnt_flag & MNT_SOFTDEP) {
 322                         /* XXX: Reset too late ? */
 323                         MNT_ILOCK(mp);
 324                         mp->mnt_flag &= ~MNT_ASYNC;
 325                         MNT_IUNLOCK(mp);
 326                 }
 327                 /*
 328                  * Keep MNT_ACLS flag if it is stored in superblock.
 329                  */
 330                 if ((fs->fs_flags & FS_ACLS) != 0) {
 331                         /* XXX: Set too late ? */
 332                         MNT_ILOCK(mp);
 333                         mp->mnt_flag |= MNT_ACLS;
 334                         MNT_IUNLOCK(mp);
 335                 }
 336
 337                 /*
 338                  * If this is a snapshot request, take the snapshot.
 339                  */
 340                 if (mp->mnt_flag & MNT_SNAPSHOT)
 341                         return (ffs_snapshot(mp, fspec));
 342         }
 343
 344         /*
 345          * Not an update, or updating the name: look up the name
 346          * and verify that it refers to a sensible disk device.
 347          */
 348         NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
 349         if ((error = namei(&ndp)) != 0)
 350                 return (error);
 351         NDFREE(&ndp, NDF_ONLY_PNBUF);
 352         devvp = ndp.ni_vp;
 353         if (!vn_isdisk(devvp, &error)) {
 354                 vput(devvp);
 355                 return (error);
 356         }
 357
 358         /*
 359          * If mount by non-root, then verify that user has necessary
 360          * permissions on the device.
 361          */
 362         accessmode = VREAD;
 363         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 364                 accessmode |= VWRITE;
 365         error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td);
 366         if (error)
 367                 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 368         if (error) {
 369                 vput(devvp);
 370                 return (error);
 371         }
 372
 373         if (mp->mnt_flag & MNT_UPDATE) {
 374                 /*
 375                  * Update only
 376                  *
 377                  * If it's not the same vnode, or at least the same device
 378                  * then it's not correct.
 379                  */
 380
 381                 if (devvp->v_rdev != ump->um_devvp->v_rdev)
 382                         error = EINVAL; /* needs translation */
 383                 vput(devvp);
 384                 if (error)
 385                         return (error);
 386         } else {
 387                 /*
 388                  * New mount
 389                  *
 390                  * We need the name for the mount point (also used for
 391                  * "last mounted on") copied in. If an error occurs,
 392                  * the mount point is discarded by the upper level code.
 393                  * Note that vfs_mount() populates f_mntonname for us.
 394                  */
 395                 if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
 396                         vrele(devvp);
 397                         return (error);
 398                 }
 399         }
 400         vfs_mountedfrom(mp, fspec);
 401         return (0);
 402 }
 403
 404 /*
 405  * Compatibility with old mount system call.
 406  */
 407
 408 static int
 409 ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
 410 {
 411         struct ufs_args args;
 412         int error;
 413
 414         if (data == NULL)
 415                 return (EINVAL);
 416         error = copyin(data, &args, sizeof args);
 417         if (error)
 418                 return (error);
 419
 420         ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
 421         ma = mount_arg(ma, "export", &args.export, sizeof args.export);
 422         error = kernel_mount(ma, flags);
 423
 424         return (error);
 425 }
 426
 427 /*
 428  * Reload all incore data for a filesystem (used after running fsck on
 429  * the root filesystem and finding things to fix). The filesystem must
 430  * be mounted read-only.
 431  *
 432  * Things to do to update the mount:
 433  *      1) invalidate all cached meta-data.
 434  *      2) re-read superblock from disk.
 435  *      3) re-read summary information from disk.
 436  *      4) invalidate all inactive vnodes.
 437  *      5) invalidate all cached file data.
 438  *      6) re-read inode data for all active vnodes.
 439  */
 440 static int
 441 ffs_reload(struct mount *mp, struct thread *td)
 442 {
 443         struct vnode *vp, *mvp, *devvp;
 444         struct inode *ip;
 445         void *space;
 446         struct buf *bp;
 447         struct fs *fs, *newfs;
 448         struct ufsmount *ump;
 449         ufs2_daddr_t sblockloc;
 450         int i, blks, size, error;
 451         int32_t *lp;
 452
 453         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 454                 return (EINVAL);
 455         ump = VFSTOUFS(mp);
 456         /*
 457          * Step 1: invalidate all cached meta-data.
 458          */
 459         devvp = VFSTOUFS(mp)->um_devvp;
 460         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 461         if (vinvalbuf(devvp, 0, td, 0, 0) != 0)
 462                 panic("ffs_reload: dirty1");
 463         VOP_UNLOCK(devvp, 0);
 464
 465         /*
 466          * Step 2: re-read superblock from disk.
 467          */
 468         fs = VFSTOUFS(mp)->um_fs;
 469         if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
 470             NOCRED, &bp)) != 0)
 471                 return (error);
 472         newfs = (struct fs *)bp->b_data;
 473         if ((newfs->fs_magic != FS_UFS1_MAGIC &&
 474              newfs->fs_magic != FS_UFS2_MAGIC) ||
 475             newfs->fs_bsize > MAXBSIZE ||
 476             newfs->fs_bsize < sizeof(struct fs)) {
 477                         brelse(bp);
 478                         return (EIO);           /* XXX needs translation */
 479         }
 480         /*
 481          * Copy pointer fields back into superblock before copying in   XXX
 482          * new superblock. These should really be in the ufsmount.      XXX
 483          * Note that important parameters (eg fs_ncg) are unchanged.
 484          */
 485         newfs->fs_csp = fs->fs_csp;
 486         newfs->fs_maxcluster = fs->fs_maxcluster;
 487         newfs->fs_contigdirs = fs->fs_contigdirs;
 488         newfs->fs_active = fs->fs_active;
 489         /* The file system is still read-only. */
 490         newfs->fs_ronly = 1;
 491         sblockloc = fs->fs_sblockloc;
 492         bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 493         brelse(bp);
 494         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 495         ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
 496         UFS_LOCK(ump);
 497         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 498                 printf("%s: reload pending error: blocks %jd files %d\n",
 499                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 500                     fs->fs_pendinginodes);
 501                 fs->fs_pendingblocks = 0;
 502                 fs->fs_pendinginodes = 0;
 503         }
 504         UFS_UNLOCK(ump);
 505
 506         /*
 507          * Step 3: re-read summary information from disk.
 508          */
 509         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 510         space = fs->fs_csp;
 511         for (i = 0; i < blks; i += fs->fs_frag) {
 512                 size = fs->fs_bsize;
 513                 if (i + fs->fs_frag > blks)
 514                         size = (blks - i) * fs->fs_fsize;
 515                 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 516                     NOCRED, &bp);
 517                 if (error)
 518                         return (error);
 519                 bcopy(bp->b_data, space, (u_int)size);
 520                 space = (char *)space + size;
 521                 brelse(bp);
 522         }
 523         /*
 524          * We no longer know anything about clusters per cylinder group.
 525          */
 526         if (fs->fs_contigsumsize > 0) {
 527                 lp = fs->fs_maxcluster;
 528                 for (i = 0; i < fs->fs_ncg; i++)
 529                         *lp++ = fs->fs_contigsumsize;
 530         }
 531
 532 loop:
 533         MNT_ILOCK(mp);
 534         MNT_VNODE_FOREACH(vp, mp, mvp) {
 535                 VI_LOCK(vp);
 536                 if (vp->v_iflag & VI_DOOMED) {
 537                         VI_UNLOCK(vp);
 538                         continue;
 539                 }
 540                 MNT_IUNLOCK(mp);
 541                 /*
 542                  * Step 4: invalidate all cached file data.
 543                  */
 544                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 545                         MNT_VNODE_FOREACH_ABORT(mp, mvp);
 546                         goto loop;
 547                 }
 548                 if (vinvalbuf(vp, 0, td, 0, 0))
 549                         panic("ffs_reload: dirty2");
 550                 /*
 551                  * Step 5: re-read inode data for all active vnodes.
 552                  */
 553                 ip = VTOI(vp);
 554                 error =
 555                     bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 556                     (int)fs->fs_bsize, NOCRED, &bp);
 557                 if (error) {
 558                         VOP_UNLOCK(vp, 0);
 559                         vrele(vp);
 560                         MNT_VNODE_FOREACH_ABORT(mp, mvp);
 561                         return (error);
 562                 }
 563                 ffs_load_inode(bp, ip, fs, ip->i_number);
 564                 ip->i_effnlink = ip->i_nlink;
 565                 brelse(bp);
 566                 VOP_UNLOCK(vp, 0);
 567                 vrele(vp);
 568                 MNT_ILOCK(mp);
 569         }
 570         MNT_IUNLOCK(mp);
 571         return (0);
 572 }
 573
 574 /*
 575  * Possible superblock locations ordered from most to least likely.
 576  */
 577 static int sblock_try[] = SBLOCKSEARCH;
 578
 579 /*
 580  * Common code for mount and mountroot
 581  */
 582 static int
 583 ffs_mountfs(devvp, mp, td)
 584         struct vnode *devvp;
 585         struct mount *mp;
 586         struct thread *td;
 587 {
 588         struct ufsmount *ump;
 589         struct buf *bp;
 590         struct fs *fs;
 591         struct cdev *dev;
 592         void *space;
 593         ufs2_daddr_t sblockloc;
 594         int error, i, blks, size, ronly;
 595         int32_t *lp;
 596         struct ucred *cred;
 597         struct g_consumer *cp;
 598         struct mount *nmp;
 599
 600         dev = devvp->v_rdev;
 601         cred = td ? td->td_ucred : NOCRED;
 602
 603         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 604         DROP_GIANT();
 605         g_topology_lock();
 606         error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
 607
 608         /*
 609          * If we are a root mount, drop the E flag so fsck can do its magic.
 610          * We will pick it up again when we remount R/W.
 611          */
 612         if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
 613                 error = g_access(cp, 0, 0, -1);
 614         g_topology_unlock();
 615         PICKUP_GIANT();
 616         VOP_UNLOCK(devvp, 0);
 617         if (error)
 618                 return (error);
 619         if (devvp->v_rdev->si_iosize_max != 0)
 620                 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
 621         if (mp->mnt_iosize_max > MAXPHYS)
 622                 mp->mnt_iosize_max = MAXPHYS;
 623
 624         devvp->v_bufobj.bo_private = cp;
 625         devvp->v_bufobj.bo_ops = &ffs_ops;
 626
 627         bp = NULL;
 628         ump = NULL;
 629         fs = NULL;
 630         sblockloc = 0;
 631         /*
 632          * Try reading the superblock in each of its possible locations.
 633          */
 634         for (i = 0; sblock_try[i] != -1; i++) {
 635                 if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
 636                         error = EINVAL;
 637                         vfs_mount_error(mp,
 638                             "Invalid sectorsize %d for superblock size %d",
 639                             cp->provider->sectorsize, SBLOCKSIZE);
 640                         goto out;
 641                 }
 642                 if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
 643                     cred, &bp)) != 0)
 644                         goto out;
 645                 fs = (struct fs *)bp->b_data;
 646                 sblockloc = sblock_try[i];
 647                 if ((fs->fs_magic == FS_UFS1_MAGIC ||
 648                      (fs->fs_magic == FS_UFS2_MAGIC &&
 649                       (fs->fs_sblockloc == sblockloc ||
 650                        (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
 651                     fs->fs_bsize <= MAXBSIZE &&
 652                     fs->fs_bsize >= sizeof(struct fs))
 653                         break;
 654                 brelse(bp);
 655                 bp = NULL;
 656         }
 657         if (sblock_try[i] == -1) {
 658                 error = EINVAL;         /* XXX needs translation */
 659                 goto out;
 660         }
 661         fs->fs_fmod = 0;
 662         fs->fs_flags &= ~FS_INDEXDIRS;  /* no support for directory indicies */
 663         fs->fs_flags &= ~FS_UNCLEAN;
 664         if (fs->fs_clean == 0) {
 665                 fs->fs_flags |= FS_UNCLEAN;
 666                 if (ronly || (mp->mnt_flag & MNT_FORCE) ||
 667                     ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
 668                      (fs->fs_flags & FS_DOSOFTDEP))) {
 669                         printf(
 670 "WARNING: %s was not properly dismounted\n",
 671                             fs->fs_fsmnt);
 672                 } else {
 673                         printf(
 674 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 675                             fs->fs_fsmnt);
 676                         error = EPERM;
 677                         goto out;
 678                 }
 679                 if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
 680                     (mp->mnt_flag & MNT_FORCE)) {
 681                         printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
 682                             (intmax_t)fs->fs_pendingblocks,
 683                             fs->fs_pendinginodes);
 684                         fs->fs_pendingblocks = 0;
 685                         fs->fs_pendinginodes = 0;
 686                 }
 687         }
 688         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 689                 printf("%s: mount pending error: blocks %jd files %d\n",
 690                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 691                     fs->fs_pendinginodes);
 692                 fs->fs_pendingblocks = 0;
 693                 fs->fs_pendinginodes = 0;
 694         }
 695         if ((fs->fs_flags & FS_GJOURNAL) != 0) {
 696 #ifdef UFS_GJOURNAL
 697                 /*
 698                  * Get journal provider name.
 699                  */
 700                 size = 1024;
 701                 mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
 702                 if (g_io_getattr("GJOURNAL::provider", cp, &size,
 703                     mp->mnt_gjprovider) == 0) {
 704                         mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
 705                             M_UFSMNT, M_WAITOK);
 706                         MNT_ILOCK(mp);
 707                         mp->mnt_flag |= MNT_GJOURNAL;
 708                         MNT_IUNLOCK(mp);
 709                 } else {
 710                         printf(
 711 "WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
 712                             mp->mnt_stat.f_mntonname);
 713                         free(mp->mnt_gjprovider, M_UFSMNT);
 714                         mp->mnt_gjprovider = NULL;
 715                 }
 716 #else
 717                 printf(
 718 "WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
 719                     mp->mnt_stat.f_mntonname);
 720 #endif
 721         } else {
 722                 mp->mnt_gjprovider = NULL;
 723         }
 724         ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
 725         ump->um_cp = cp;
 726         ump->um_bo = &devvp->v_bufobj;
 727         ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
 728         if (fs->fs_magic == FS_UFS1_MAGIC) {
 729                 ump->um_fstype = UFS1;
 730                 ump->um_balloc = ffs_balloc_ufs1;
 731         } else {
 732                 ump->um_fstype = UFS2;
 733                 ump->um_balloc = ffs_balloc_ufs2;
 734         }
 735         ump->um_blkatoff = ffs_blkatoff;
 736         ump->um_truncate = ffs_truncate;
 737         ump->um_update = ffs_update;
 738         ump->um_valloc = ffs_valloc;
 739         ump->um_vfree = ffs_vfree;
 740         ump->um_ifree = ffs_ifree;
 741         ump->um_rdonly = ffs_rdonly;
 742         mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
 743         bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
 744         if (fs->fs_sbsize < SBLOCKSIZE)
 745                 bp->b_flags |= B_INVAL | B_NOCACHE;
 746         brelse(bp);
 747         bp = NULL;
 748         fs = ump->um_fs;
 749         ffs_oldfscompat_read(fs, ump, sblockloc);
 750         fs->fs_ronly = ronly;
 751         size = fs->fs_cssize;
 752         blks = howmany(size, fs->fs_fsize);
 753         if (fs->fs_contigsumsize > 0)
 754                 size += fs->fs_ncg * sizeof(int32_t);
 755         size += fs->fs_ncg * sizeof(u_int8_t);
 756         space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
 757         fs->fs_csp = space;
 758         for (i = 0; i < blks; i += fs->fs_frag) {
 759                 size = fs->fs_bsize;
 760                 if (i + fs->fs_frag > blks)
 761                         size = (blks - i) * fs->fs_fsize;
 762                 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 763                     cred, &bp)) != 0) {
 764                         free(fs->fs_csp, M_UFSMNT);
 765                         goto out;
 766                 }
 767                 bcopy(bp->b_data, space, (u_int)size);
 768                 space = (char *)space + size;
 769                 brelse(bp);
 770                 bp = NULL;
 771         }
 772         if (fs->fs_contigsumsize > 0) {
 773                 fs->fs_maxcluster = lp = space;
 774                 for (i = 0; i < fs->fs_ncg; i++)
 775                         *lp++ = fs->fs_contigsumsize;
 776                 space = lp;
 777         }
 778         size = fs->fs_ncg * sizeof(u_int8_t);
 779         fs->fs_contigdirs = (u_int8_t *)space;
 780         bzero(fs->fs_contigdirs, size);
 781         fs->fs_active = NULL;
 782         mp->mnt_data = ump;
 783         mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
 784         mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
 785         nmp = NULL;
 786         if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
 787             (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
 788                 if (nmp)
 789                         vfs_rel(nmp);
 790                 vfs_getnewfsid(mp);
 791         }
 792         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 793         MNT_ILOCK(mp);
 794         mp->mnt_flag |= MNT_LOCAL;
 795         MNT_IUNLOCK(mp);
 796         if ((fs->fs_flags & FS_MULTILABEL) != 0) {
 797 #ifdef MAC
 798                 MNT_ILOCK(mp);
 799                 mp->mnt_flag |= MNT_MULTILABEL;
 800                 MNT_IUNLOCK(mp);
 801 #else
 802                 printf(
 803 "WARNING: %s: multilabel flag on fs but no MAC support\n",
 804                     mp->mnt_stat.f_mntonname);
 805 #endif
 806         }
 807         if ((fs->fs_flags & FS_ACLS) != 0) {
 808 #ifdef UFS_ACL
 809                 MNT_ILOCK(mp);
 810                 mp->mnt_flag |= MNT_ACLS;
 811                 MNT_IUNLOCK(mp);
 812 #else
 813                 printf(
 814 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
 815                     mp->mnt_stat.f_mntonname);
 816 #endif
 817         }
 818         ump->um_mountp = mp;
 819         ump->um_dev = dev;
 820         ump->um_devvp = devvp;
 821         ump->um_nindir = fs->fs_nindir;
 822         ump->um_bptrtodb = fs->fs_fsbtodb;
 823         ump->um_seqinc = fs->fs_frag;
 824         for (i = 0; i < MAXQUOTAS; i++)
 825                 ump->um_quotas[i] = NULLVP;
 826 #ifdef UFS_EXTATTR
 827         ufs_extattr_uepm_init(&ump->um_extattr);
 828 #endif
 829         /*
 830          * Set FS local "last mounted on" information (NULL pad)
 831          */
 832         bzero(fs->fs_fsmnt, MAXMNTLEN);
 833         strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
 834
 835         if( mp->mnt_flag & MNT_ROOTFS) {
 836                 /*
 837                  * Root mount; update timestamp in mount structure.
 838                  * this will be used by the common root mount code
 839                  * to update the system clock.
 840                  */
 841                 mp->mnt_time = fs->fs_time;
 842         }
 843
 844         if (ronly == 0) {
 845                 if ((fs->fs_flags & FS_DOSOFTDEP) &&
 846                     (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
 847                         free(fs->fs_csp, M_UFSMNT);
 848                         goto out;
 849                 }
 850                 if (fs->fs_snapinum[0] != 0)
 851                         ffs_snapshot_mount(mp);
 852                 fs->fs_fmod = 1;
 853                 fs->fs_clean = 0;
 854                 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
 855         }
 856         /*
 857          * Initialize filesystem stat information in mount struct.
 858          */
 859         MNT_ILOCK(mp);
 860         mp->mnt_kern_flag |= MNTK_MPSAFE;
 861         MNT_IUNLOCK(mp);
 862 #ifdef UFS_EXTATTR
 863 #ifdef UFS_EXTATTR_AUTOSTART
 864         /*
 865          *
 866          * Auto-starting does the following:
 867          *      - check for /.attribute in the fs, and extattr_start if so
 868          *      - for each file in .attribute, enable that file with
 869          *        an attribute of the same name.
 870          * Not clear how to report errors -- probably eat them.
 871          * This would all happen while the filesystem was busy/not
 872          * available, so would effectively be "atomic".
 873          */
 874         mp->mnt_stat.f_iosize = fs->fs_bsize;
 875         (void) ufs_extattr_autostart(mp, td);
 876 #endif /* !UFS_EXTATTR_AUTOSTART */
 877 #endif /* !UFS_EXTATTR */
 878         return (0);
 879 out:
 880         if (bp)
 881                 brelse(bp);
 882         if (cp != NULL) {
 883                 DROP_GIANT();
 884                 g_topology_lock();
 885                 g_vfs_close(cp, td);
 886                 g_topology_unlock();
 887                 PICKUP_GIANT();
 888         }
 889         if (ump) {
 890                 mtx_destroy(UFS_MTX(ump));
 891                 if (mp->mnt_gjprovider != NULL) {
 892                         free(mp->mnt_gjprovider, M_UFSMNT);
 893                         mp->mnt_gjprovider = NULL;
 894                 }
 895                 free(ump->um_fs, M_UFSMNT);
 896                 free(ump, M_UFSMNT);
 897                 mp->mnt_data = NULL;
 898         }
 899         return (error);
 900 }
 901
 902 #include <sys/sysctl.h>
 903 static int bigcgs = 0;
 904 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
 905
 906 /*
 907  * Sanity checks for loading old filesystem superblocks.
 908  * See ffs_oldfscompat_write below for unwound actions.
 909  *
 910  * XXX - Parts get retired eventually.
 911  * Unfortunately new bits get added.
 912  */
 913 static void
 914 ffs_oldfscompat_read(fs, ump, sblockloc)
 915         struct fs *fs;
 916         struct ufsmount *ump;
 917         ufs2_daddr_t sblockloc;
 918 {
 919         off_t maxfilesize;
 920
 921         /*
 922          * If not yet done, update fs_flags location and value of fs_sblockloc.
 923          */
 924         if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
 925                 fs->fs_flags = fs->fs_old_flags;
 926                 fs->fs_old_flags |= FS_FLAGS_UPDATED;
 927                 fs->fs_sblockloc = sblockloc;
 928         }
 929         /*
 930          * If not yet done, update UFS1 superblock with new wider fields.
 931          */
 932         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
 933                 fs->fs_maxbsize = fs->fs_bsize;
 934                 fs->fs_time = fs->fs_old_time;
 935                 fs->fs_size = fs->fs_old_size;
 936                 fs->fs_dsize = fs->fs_old_dsize;
 937                 fs->fs_csaddr = fs->fs_old_csaddr;
 938                 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
 939                 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
 940                 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
 941                 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
 942         }
 943         if (fs->fs_magic == FS_UFS1_MAGIC &&
 944             fs->fs_old_inodefmt < FS_44INODEFMT) {
 945                 fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
 946                 fs->fs_qbmask = ~fs->fs_bmask;
 947                 fs->fs_qfmask = ~fs->fs_fmask;
 948         }
 949         if (fs->fs_magic == FS_UFS1_MAGIC) {
 950                 ump->um_savedmaxfilesize = fs->fs_maxfilesize;
 951                 maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
 952                 if (fs->fs_maxfilesize > maxfilesize)
 953                         fs->fs_maxfilesize = maxfilesize;
 954         }
 955         /* Compatibility for old filesystems */
 956         if (fs->fs_avgfilesize <= 0)
 957                 fs->fs_avgfilesize = AVFILESIZ;
 958         if (fs->fs_avgfpdir <= 0)
 959                 fs->fs_avgfpdir = AFPDIR;
 960         if (bigcgs) {
 961                 fs->fs_save_cgsize = fs->fs_cgsize;
 962                 fs->fs_cgsize = fs->fs_bsize;
 963         }
 964 }
 965
 966 /*
 967  * Unwinding superblock updates for old filesystems.
 968  * See ffs_oldfscompat_read above for details.
 969  *
 970  * XXX - Parts get retired eventually.
 971  * Unfortunately new bits get added.
 972  */
 973 static void
 974 ffs_oldfscompat_write(fs, ump)
 975         struct fs *fs;
 976         struct ufsmount *ump;
 977 {
 978
 979         /*
 980          * Copy back UFS2 updated fields that UFS1 inspects.
 981          */
 982         if (fs->fs_magic == FS_UFS1_MAGIC) {
 983                 fs->fs_old_time = fs->fs_time;
 984                 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
 985                 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
 986                 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
 987                 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
 988                 fs->fs_maxfilesize = ump->um_savedmaxfilesize;
 989         }
 990         if (bigcgs) {
 991                 fs->fs_cgsize = fs->fs_save_cgsize;
 992                 fs->fs_save_cgsize = 0;
 993         }
 994 }
 995
 996 /*
 997  * unmount system call
 998  */
 999 static int
1000 ffs_unmount(mp, mntflags, td)
1001         struct mount *mp;
1002         int mntflags;
1003         struct thread *td;
1004 {
1005         struct ufsmount *ump = VFSTOUFS(mp);
1006         struct fs *fs;
1007         int error, flags;
1008
1009         flags = 0;
1010         if (mntflags & MNT_FORCE) {
1011                 flags |= FORCECLOSE;
1012         }
1013 #ifdef UFS_EXTATTR
1014         if ((error = ufs_extattr_stop(mp, td))) {
1015                 if (error != EOPNOTSUPP)
1016                         printf("ffs_unmount: ufs_extattr_stop returned %d\n",
1017                             error);
1018         } else {
1019                 ufs_extattr_uepm_destroy(&ump->um_extattr);
1020         }
1021 #endif
1022         if (mp->mnt_flag & MNT_SOFTDEP) {
1023                 if ((error = softdep_flushfiles(mp, flags, td)) != 0)
1024                         return (error);
1025         } else {
1026                 if ((error = ffs_flushfiles(mp, flags, td)) != 0)
1027                         return (error);
1028         }
1029         fs = ump->um_fs;
1030         UFS_LOCK(ump);
1031         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1032                 printf("%s: unmount pending error: blocks %jd files %d\n",
1033                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1034                     fs->fs_pendinginodes);
1035                 fs->fs_pendingblocks = 0;
1036                 fs->fs_pendinginodes = 0;
1037         }
1038         UFS_UNLOCK(ump);
1039         if (fs->fs_ronly == 0) {
1040                 fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1041                 error = ffs_sbupdate(ump, MNT_WAIT, 0);
1042                 if (error) {
1043                         fs->fs_clean = 0;
1044                         return (error);
1045                 }
1046         }
1047         DROP_GIANT();
1048         g_topology_lock();
1049         g_vfs_close(ump->um_cp, td);
1050         g_topology_unlock();
1051         PICKUP_GIANT();
1052         vrele(ump->um_devvp);
1053         mtx_destroy(UFS_MTX(ump));
1054         if (mp->mnt_gjprovider != NULL) {
1055                 free(mp->mnt_gjprovider, M_UFSMNT);
1056                 mp->mnt_gjprovider = NULL;
1057         }
1058         free(fs->fs_csp, M_UFSMNT);
1059         free(fs, M_UFSMNT);
1060         free(ump, M_UFSMNT);
1061         mp->mnt_data = NULL;
1062         MNT_ILOCK(mp);
1063         mp->mnt_flag &= ~MNT_LOCAL;
1064         MNT_IUNLOCK(mp);
1065         return (error);
1066 }
1067
1068 /*
1069  * Flush out all the files in a filesystem.
1070  */
1071 int
1072 ffs_flushfiles(mp, flags, td)
1073         struct mount *mp;
1074         int flags;
1075         struct thread *td;
1076 {
1077         struct ufsmount *ump;
1078         int error;
1079
1080         ump = VFSTOUFS(mp);
1081 #ifdef QUOTA
1082         if (mp->mnt_flag & MNT_QUOTA) {
1083                 int i;
1084                 error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1085                 if (error)
1086                         return (error);
1087                 for (i = 0; i < MAXQUOTAS; i++) {
1088                         quotaoff(td, mp, i);
1089                 }
1090                 /*
1091                  * Here we fall through to vflush again to ensure
1092                  * that we have gotten rid of all the system vnodes.
1093                  */
1094         }
1095 #endif
1096         ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1097         if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1098                 if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1099                         return (error);
1100                 ffs_snapshot_unmount(mp);
1101                 flags |= FORCECLOSE;
1102                 /*
1103                  * Here we fall through to vflush again to ensure
1104                  * that we have gotten rid of all the system vnodes.
1105                  */
1106         }
1107         /*
1108          * Flush all the files.
1109          */
1110         if ((error = vflush(mp, 0, flags, td)) != 0)
1111                 return (error);
1112         /*
1113          * Flush filesystem metadata.
1114          */
1115         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1116         error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1117         VOP_UNLOCK(ump->um_devvp, 0);
1118         return (error);
1119 }
1120
1121 /*
1122  * Get filesystem statistics.
1123  */
1124 static int
1125 ffs_statfs(mp, sbp, td)
1126         struct mount *mp;
1127         struct statfs *sbp;
1128         struct thread *td;
1129 {
1130         struct ufsmount *ump;
1131         struct fs *fs;
1132
1133         ump = VFSTOUFS(mp);
1134         fs = ump->um_fs;
1135         if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1136                 panic("ffs_statfs");
1137         sbp->f_version = STATFS_VERSION;
1138         sbp->f_bsize = fs->fs_fsize;
1139         sbp->f_iosize = fs->fs_bsize;
1140         sbp->f_blocks = fs->fs_dsize;
1141         UFS_LOCK(ump);
1142         sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1143             fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1144         sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1145             dbtofsb(fs, fs->fs_pendingblocks);
1146         sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1147         sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1148         UFS_UNLOCK(ump);
1149         sbp->f_namemax = NAME_MAX;
1150         return (0);
1151 }
1152
1153 /*
1154  * Go through the disk queues to initiate sandbagged IO;
1155  * go through the inodes to write those that have been modified;
1156  * initiate the writing of the super block if it has been modified.
1157  *
1158  * Note: we are always called with the filesystem marked `MPBUSY'.
1159  */
1160 static int
1161 ffs_sync(mp, waitfor, td)
1162         struct mount *mp;
1163         int waitfor;
1164         struct thread *td;
1165 {
1166         struct vnode *mvp, *vp, *devvp;
1167         struct inode *ip;
1168         struct ufsmount *ump = VFSTOUFS(mp);
1169         struct fs *fs;
1170         int error, count, wait, lockreq, allerror = 0;
1171         int suspend;
1172         int suspended;
1173         int secondary_writes;
1174         int secondary_accwrites;
1175         int softdep_deps;
1176         int softdep_accdeps;
1177         struct bufobj *bo;
1178
1179         fs = ump->um_fs;
1180         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
1181                 printf("fs = %s\n", fs->fs_fsmnt);
1182                 panic("ffs_sync: rofs mod");
1183         }
1184         /*
1185          * Write back each (modified) inode.
1186          */
1187         wait = 0;
1188         suspend = 0;
1189         suspended = 0;
1190         lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1191         if (waitfor == MNT_SUSPEND) {
1192                 suspend = 1;
1193                 waitfor = MNT_WAIT;
1194         }
1195         if (waitfor == MNT_WAIT) {
1196                 wait = 1;
1197                 lockreq = LK_EXCLUSIVE;
1198         }
1199         lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1200         MNT_ILOCK(mp);
1201 loop:
1202         /* Grab snapshot of secondary write counts */
1203         secondary_writes = mp->mnt_secondary_writes;
1204         secondary_accwrites = mp->mnt_secondary_accwrites;
1205
1206         /* Grab snapshot of softdep dependency counts */
1207         MNT_IUNLOCK(mp);
1208         softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1209         MNT_ILOCK(mp);
1210
1211         MNT_VNODE_FOREACH(vp, mp, mvp) {
1212                 /*
1213                  * Depend on the mntvnode_slock to keep things stable enough
1214                  * for a quick test.  Since there might be hundreds of
1215                  * thousands of vnodes, we cannot afford even a subroutine
1216                  * call unless there's a good chance that we have work to do.
1217                  */
1218                 VI_LOCK(vp);
1219                 if (vp->v_iflag & VI_DOOMED) {
1220                         VI_UNLOCK(vp);
1221                         continue;
1222                 }
1223                 ip = VTOI(vp);
1224                 if (vp->v_type == VNON || ((ip->i_flag &
1225                     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1226                     vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1227                         VI_UNLOCK(vp);
1228                         continue;
1229                 }
1230                 MNT_IUNLOCK(mp);
1231                 if ((error = vget(vp, lockreq, td)) != 0) {
1232                         MNT_ILOCK(mp);
1233                         if (error == ENOENT || error == ENOLCK) {
1234                                 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1235                                 goto loop;
1236                         }
1237                         continue;
1238                 }
1239                 if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1240                         allerror = error;
1241                 vput(vp);
1242                 MNT_ILOCK(mp);
1243         }
1244         MNT_IUNLOCK(mp);
1245         /*
1246          * Force stale filesystem control information to be flushed.
1247          */
1248         if (waitfor == MNT_WAIT) {
1249                 if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1250                         allerror = error;
1251                 /* Flushed work items may create new vnodes to clean */
1252                 if (allerror == 0 && count) {
1253                         MNT_ILOCK(mp);
1254                         goto loop;
1255                 }
1256         }
1257 #ifdef QUOTA
1258         qsync(mp);
1259 #endif
1260         devvp = ump->um_devvp;
1261         bo = &devvp->v_bufobj;
1262         BO_LOCK(bo);
1263         if (waitfor != MNT_LAZY &&
1264             (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1265                 BO_UNLOCK(bo);
1266                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1267                 if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1268                         allerror = error;
1269                 VOP_UNLOCK(devvp, 0);
1270                 if (allerror == 0 && waitfor == MNT_WAIT) {
1271                         MNT_ILOCK(mp);
1272                         goto loop;
1273                 }
1274         } else if (suspend != 0) {
1275                 if (softdep_check_suspend(mp,
1276                                           devvp,
1277                                           softdep_deps,
1278                                           softdep_accdeps,
1279                                           secondary_writes,
1280                                           secondary_accwrites) != 0)
1281                         goto loop;      /* More work needed */
1282                 mtx_assert(MNT_MTX(mp), MA_OWNED);
1283                 mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1284                 MNT_IUNLOCK(mp);
1285                 suspended = 1;
1286         } else
1287                 BO_UNLOCK(bo);
1288         /*
1289          * Write back modified superblock.
1290          */
1291         if (fs->fs_fmod != 0 &&
1292             (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1293                 allerror = error;
1294         return (allerror);
1295 }
1296
1297 int
1298 ffs_vget(mp, ino, flags, vpp)
1299         struct mount *mp;
1300         ino_t ino;
1301         int flags;
1302         struct vnode **vpp;
1303 {
1304         return (ffs_vgetf(mp, ino, flags, vpp, 0));
1305 }
1306
1307 int
1308 ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1309         struct mount *mp;
1310         ino_t ino;
1311         int flags;
1312         struct vnode **vpp;
1313         int ffs_flags;
1314 {
1315         struct fs *fs;
1316         struct inode *ip;
1317         struct ufsmount *ump;
1318         struct buf *bp;
1319         struct vnode *vp;
1320         struct cdev *dev;
1321         int error;
1322
1323         error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1324         if (error || *vpp != NULL)
1325                 return (error);
1326
1327         /*
1328          * We must promote to an exclusive lock for vnode creation.  This
1329          * can happen if lookup is passed LOCKSHARED.
1330          */
1331         if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1332                 flags &= ~LK_TYPE_MASK;
1333                 flags |= LK_EXCLUSIVE;
1334         }
1335
1336         /*
1337          * We do not lock vnode creation as it is believed to be too
1338          * expensive for such rare case as simultaneous creation of vnode
1339          * for same ino by different processes. We just allow them to race
1340          * and check later to decide who wins. Let the race begin!
1341          */
1342
1343         ump = VFSTOUFS(mp);
1344         dev = ump->um_dev;
1345         fs = ump->um_fs;
1346
1347         /*
1348          * If this MALLOC() is performed after the getnewvnode()
1349          * it might block, leaving a vnode with a NULL v_data to be
1350          * found by ffs_sync() if a sync happens to fire right then,
1351          * which will cause a panic because ffs_sync() blindly
1352          * dereferences vp->v_data (as well it should).
1353          */
1354         ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1355
1356         /* Allocate a new vnode/inode. */
1357         if (fs->fs_magic == FS_UFS1_MAGIC)
1358                 error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1359         else
1360                 error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1361         if (error) {
1362                 *vpp = NULL;
1363                 uma_zfree(uma_inode, ip);
1364                 return (error);
1365         }
1366         /*
1367          * FFS supports recursive and shared locking.
1368          */
1369         VN_LOCK_AREC(vp);
1370         VN_LOCK_ASHARE(vp);
1371         vp->v_data = ip;
1372         vp->v_bufobj.bo_bsize = fs->fs_bsize;
1373         ip->i_vnode = vp;
1374         ip->i_ump = ump;
1375         ip->i_fs = fs;
1376         ip->i_dev = dev;
1377         ip->i_number = ino;
1378 #ifdef QUOTA
1379         {
1380                 int i;
1381                 for (i = 0; i < MAXQUOTAS; i++)
1382                         ip->i_dquot[i] = NODQUOT;
1383         }
1384 #endif
1385
1386         lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1387         if (ffs_flags & FFSV_FORCEINSMQ)
1388                 vp->v_vflag |= VV_FORCEINSMQ;
1389         error = insmntque(vp, mp);
1390         if (error != 0) {
1391                 uma_zfree(uma_inode, ip);
1392                 *vpp = NULL;
1393                 return (error);
1394         }
1395         vp->v_vflag &= ~VV_FORCEINSMQ;
1396         error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1397         if (error || *vpp != NULL)
1398                 return (error);
1399
1400         /* Read in the disk contents for the inode, copy into the inode. */
1401         error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1402             (int)fs->fs_bsize, NOCRED, &bp);
1403         if (error) {
1404                 /*
1405                  * The inode does not contain anything useful, so it would
1406                  * be misleading to leave it on its hash chain. With mode
1407                  * still zero, it will be unlinked and returned to the free
1408                  * list by vput().
1409                  */
1410                 brelse(bp);
1411                 vput(vp);
1412                 *vpp = NULL;
1413                 return (error);
1414         }
1415         if (ip->i_ump->um_fstype == UFS1)
1416                 ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1417         else
1418                 ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1419         ffs_load_inode(bp, ip, fs, ino);
1420         if (DOINGSOFTDEP(vp))
1421                 softdep_load_inodeblock(ip);
1422         else
1423                 ip->i_effnlink = ip->i_nlink;
1424         bqrelse(bp);
1425
1426         /*
1427          * Initialize the vnode from the inode, check for aliases.
1428          * Note that the underlying vnode may have changed.
1429          */
1430         if (ip->i_ump->um_fstype == UFS1)
1431                 error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1432         else
1433                 error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1434         if (error) {
1435                 vput(vp);
1436                 *vpp = NULL;
1437                 return (error);
1438         }
1439
1440         /*
1441          * Finish inode initialization.
1442          */
1443
1444         /*
1445          * Set up a generation number for this inode if it does not
1446          * already have one. This should only happen on old filesystems.
1447          */
1448         if (ip->i_gen == 0) {
1449                 ip->i_gen = arc4random() / 2 + 1;
1450                 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1451                         ip->i_flag |= IN_MODIFIED;
1452                         DIP_SET(ip, i_gen, ip->i_gen);
1453                 }
1454         }
1455         /*
1456          * Ensure that uid and gid are correct. This is a temporary
1457          * fix until fsck has been changed to do the update.
1458          */
1459         if (fs->fs_magic == FS_UFS1_MAGIC &&            /* XXX */
1460             fs->fs_old_inodefmt < FS_44INODEFMT) {      /* XXX */
1461                 ip->i_uid = ip->i_din1->di_ouid;        /* XXX */
1462                 ip->i_gid = ip->i_din1->di_ogid;        /* XXX */
1463         }                                               /* XXX */
1464
1465 #ifdef MAC
1466         if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1467                 /*
1468                  * If this vnode is already allocated, and we're running
1469                  * multi-label, attempt to perform a label association
1470                  * from the extended attributes on the inode.
1471                  */
1472                 error = mac_vnode_associate_extattr(mp, vp);
1473                 if (error) {
1474                         /* ufs_inactive will release ip->i_devvp ref. */
1475                         vput(vp);
1476                         *vpp = NULL;
1477                         return (error);
1478                 }
1479         }
1480 #endif
1481
1482         *vpp = vp;
1483         return (0);
1484 }
1485
1486 /*
1487  * File handle to vnode
1488  *
1489  * Have to be really careful about stale file handles:
1490  * - check that the inode number is valid
1491  * - call ffs_vget() to get the locked inode
1492  * - check for an unallocated inode (i_mode == 0)
1493  * - check that the given client host has export rights and return
1494  *   those rights via. exflagsp and credanonp
1495  */
1496 static int
1497 ffs_fhtovp(mp, fhp, vpp)
1498         struct mount *mp;
1499         struct fid *fhp;
1500         struct vnode **vpp;
1501 {
1502         struct ufid *ufhp;
1503         struct fs *fs;
1504
1505         ufhp = (struct ufid *)fhp;
1506         fs = VFSTOUFS(mp)->um_fs;
1507         if (ufhp->ufid_ino < ROOTINO ||
1508             ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1509                 return (ESTALE);
1510         return (ufs_fhtovp(mp, ufhp, vpp));
1511 }
1512
1513 /*
1514  * Initialize the filesystem.
1515  */
1516 static int
1517 ffs_init(vfsp)
1518         struct vfsconf *vfsp;
1519 {
1520
1521         softdep_initialize();
1522         return (ufs_init(vfsp));
1523 }
1524
1525 /*
1526  * Undo the work of ffs_init().
1527  */
1528 static int
1529 ffs_uninit(vfsp)
1530         struct vfsconf *vfsp;
1531 {
1532         int ret;
1533
1534         ret = ufs_uninit(vfsp);
1535         softdep_uninitialize();
1536         return (ret);
1537 }
1538
1539 /*
1540  * Write a superblock and associated information back to disk.
1541  */
1542 int
1543 ffs_sbupdate(mp, waitfor, suspended)
1544         struct ufsmount *mp;
1545         int waitfor;
1546         int suspended;
1547 {
1548         struct fs *fs = mp->um_fs;
1549         struct buf *sbbp;
1550         struct buf *bp;
1551         int blks;
1552         void *space;
1553         int i, size, error, allerror = 0;
1554
1555         if (fs->fs_ronly == 1 &&
1556             (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1557             (MNT_RDONLY | MNT_UPDATE))
1558                 panic("ffs_sbupdate: write read-only filesystem");
1559         /*
1560          * We use the superblock's buf to serialize calls to ffs_sbupdate().
1561          */
1562         sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1563             0, 0, 0);
1564         /*
1565          * First write back the summary information.
1566          */
1567         blks = howmany(fs->fs_cssize, fs->fs_fsize);
1568         space = fs->fs_csp;
1569         for (i = 0; i < blks; i += fs->fs_frag) {
1570                 size = fs->fs_bsize;
1571                 if (i + fs->fs_frag > blks)
1572                         size = (blks - i) * fs->fs_fsize;
1573                 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1574                     size, 0, 0, 0);
1575                 bcopy(space, bp->b_data, (u_int)size);
1576                 space = (char *)space + size;
1577                 if (suspended)
1578                         bp->b_flags |= B_VALIDSUSPWRT;
1579                 if (waitfor != MNT_WAIT)
1580                         bawrite(bp);
1581                 else if ((error = bwrite(bp)) != 0)
1582                         allerror = error;
1583         }
1584         /*
1585          * Now write back the superblock itself. If any errors occurred
1586          * up to this point, then fail so that the superblock avoids
1587          * being written out as clean.
1588          */
1589         if (allerror) {
1590                 brelse(sbbp);
1591                 return (allerror);
1592         }
1593         bp = sbbp;
1594         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1595             (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1596                 printf("%s: correcting fs_sblockloc from %jd to %d\n",
1597                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1598                 fs->fs_sblockloc = SBLOCK_UFS1;
1599         }
1600         if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1601             (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1602                 printf("%s: correcting fs_sblockloc from %jd to %d\n",
1603                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1604                 fs->fs_sblockloc = SBLOCK_UFS2;
1605         }
1606         fs->fs_fmod = 0;
1607         fs->fs_time = time_second;
1608         bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1609         ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1610         if (suspended)
1611                 bp->b_flags |= B_VALIDSUSPWRT;
1612         if (waitfor != MNT_WAIT)
1613                 bawrite(bp);
1614         else if ((error = bwrite(bp)) != 0)
1615                 allerror = error;
1616         return (allerror);
1617 }
1618
1619 static int
1620 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1621         int attrnamespace, const char *attrname, struct thread *td)
1622 {
1623
1624 #ifdef UFS_EXTATTR
1625         return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1626             attrname, td));
1627 #else
1628         return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1629             attrname, td));
1630 #endif
1631 }
1632
1633 static void
1634 ffs_ifree(struct ufsmount *ump, struct inode *ip)
1635 {
1636
1637         if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1638                 uma_zfree(uma_ufs1, ip->i_din1);
1639         else if (ip->i_din2 != NULL)
1640                 uma_zfree(uma_ufs2, ip->i_din2);
1641         uma_zfree(uma_inode, ip);
1642 }
1643
1644 static int dobkgrdwrite = 1;
1645 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1646     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1647
1648 /*
1649  * Complete a background write started from bwrite.
1650  */
1651 static void
1652 ffs_backgroundwritedone(struct buf *bp)
1653 {
1654         struct bufobj *bufobj;
1655         struct buf *origbp;
1656
1657         /*
1658          * Find the original buffer that we are writing.
1659          */
1660         bufobj = bp->b_bufobj;
1661         BO_LOCK(bufobj);
1662         if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1663                 panic("backgroundwritedone: lost buffer");
1664         /* Grab an extra reference to be dropped by the bufdone() below. */
1665         bufobj_wrefl(bufobj);
1666         BO_UNLOCK(bufobj);
1667         /*
1668          * Process dependencies then return any unfinished ones.
1669          */
1670         if (!LIST_EMPTY(&bp->b_dep))
1671                 buf_complete(bp);
1672 #ifdef SOFTUPDATES
1673         if (!LIST_EMPTY(&bp->b_dep))
1674                 softdep_move_dependencies(bp, origbp);
1675 #endif
1676         /*
1677          * This buffer is marked B_NOCACHE so when it is released
1678          * by biodone it will be tossed.
1679          */
1680         bp->b_flags |= B_NOCACHE;
1681         bp->b_flags &= ~B_CACHE;
1682         bufdone(bp);
1683         BO_LOCK(bufobj);
1684         /*
1685          * Clear the BV_BKGRDINPROG flag in the original buffer
1686          * and awaken it if it is waiting for the write to complete.
1687          * If BV_BKGRDINPROG is not set in the original buffer it must
1688          * have been released and re-instantiated - which is not legal.
1689          */
1690         KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1691             ("backgroundwritedone: lost buffer2"));
1692         origbp->b_vflags &= ~BV_BKGRDINPROG;
1693         if (origbp->b_vflags & BV_BKGRDWAIT) {
1694                 origbp->b_vflags &= ~BV_BKGRDWAIT;
1695                 wakeup(&origbp->b_xflags);
1696         }
1697         BO_UNLOCK(bufobj);
1698 }
1699
1700
1701 /*
1702  * Write, release buffer on completion.  (Done by iodone
1703  * if async).  Do not bother writing anything if the buffer
1704  * is invalid.
1705  *
1706  * Note that we set B_CACHE here, indicating that buffer is
1707  * fully valid and thus cacheable.  This is true even of NFS
1708  * now so we set it generally.  This could be set either here
1709  * or in biodone() since the I/O is synchronous.  We put it
1710  * here.
1711  */
1712 static int
1713 ffs_bufwrite(struct buf *bp)
1714 {
1715         int oldflags, s;
1716         struct buf *newbp;
1717
1718         CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1719         if (bp->b_flags & B_INVAL) {
1720                 brelse(bp);
1721                 return (0);
1722         }
1723
1724         oldflags = bp->b_flags;
1725
1726         if (!BUF_ISLOCKED(bp))
1727                 panic("bufwrite: buffer is not busy???");
1728         s = splbio();
1729         /*
1730          * If a background write is already in progress, delay
1731          * writing this block if it is asynchronous. Otherwise
1732          * wait for the background write to complete.
1733          */
1734         BO_LOCK(bp->b_bufobj);
1735         if (bp->b_vflags & BV_BKGRDINPROG) {
1736                 if (bp->b_flags & B_ASYNC) {
1737                         BO_UNLOCK(bp->b_bufobj);
1738                         splx(s);
1739                         bdwrite(bp);
1740                         return (0);
1741                 }
1742                 bp->b_vflags |= BV_BKGRDWAIT;
1743                 msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1744                 if (bp->b_vflags & BV_BKGRDINPROG)
1745                         panic("bufwrite: still writing");
1746         }
1747         BO_UNLOCK(bp->b_bufobj);
1748
1749         /* Mark the buffer clean */
1750         bundirty(bp);
1751
1752         /*
1753          * If this buffer is marked for background writing and we
1754          * do not have to wait for it, make a copy and write the
1755          * copy so as to leave this buffer ready for further use.
1756          *
1757          * This optimization eats a lot of memory.  If we have a page
1758          * or buffer shortfall we can't do it.
1759          */
1760         if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1761             (bp->b_flags & B_ASYNC) &&
1762             !vm_page_count_severe() &&
1763             !buf_dirty_count_severe()) {
1764                 KASSERT(bp->b_iodone == NULL,
1765                     ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1766
1767                 /* get a new block */
1768                 newbp = geteblk(bp->b_bufsize);
1769
1770                 /*
1771                  * set it to be identical to the old block.  We have to
1772                  * set b_lblkno and BKGRDMARKER before calling bgetvp()
1773                  * to avoid confusing the splay tree and gbincore().
1774                  */
1775                 memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1776                 newbp->b_lblkno = bp->b_lblkno;
1777                 newbp->b_xflags |= BX_BKGRDMARKER;
1778                 BO_LOCK(bp->b_bufobj);
1779                 bp->b_vflags |= BV_BKGRDINPROG;
1780                 bgetvp(bp->b_vp, newbp);
1781                 BO_UNLOCK(bp->b_bufobj);
1782                 newbp->b_bufobj = &bp->b_vp->v_bufobj;
1783                 newbp->b_blkno = bp->b_blkno;
1784                 newbp->b_offset = bp->b_offset;
1785                 newbp->b_iodone = ffs_backgroundwritedone;
1786                 newbp->b_flags |= B_ASYNC;
1787                 newbp->b_flags &= ~B_INVAL;
1788
1789 #ifdef SOFTUPDATES
1790                 /* move over the dependencies */
1791                 if (!LIST_EMPTY(&bp->b_dep))
1792                         softdep_move_dependencies(bp, newbp);
1793 #endif
1794
1795                 /*
1796                  * Initiate write on the copy, release the original to
1797                  * the B_LOCKED queue so that it cannot go away until
1798                  * the background write completes. If not locked it could go
1799                  * away and then be reconstituted while it was being written.
1800                  * If the reconstituted buffer were written, we could end up
1801                  * with two background copies being written at the same time.
1802                  */
1803                 bqrelse(bp);
1804                 bp = newbp;
1805         }
1806
1807         /* Let the normal bufwrite do the rest for us */
1808         return (bufwrite(bp));
1809 }
1810
1811
1812 static void
1813 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1814 {
1815         struct vnode *vp;
1816         int error;
1817         struct buf *tbp;
1818
1819         vp = bo->__bo_vnode;
1820         if (bp->b_iocmd == BIO_WRITE) {
1821                 if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
1822                     bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
1823                     (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
1824                         panic("ffs_geom_strategy: bad I/O");
1825                 bp->b_flags &= ~B_VALIDSUSPWRT;
1826                 if ((vp->v_vflag & VV_COPYONWRITE) &&
1827                     vp->v_rdev->si_snapdata != NULL) {
1828                         if ((bp->b_flags & B_CLUSTER) != 0) {
1829                                 runningbufwakeup(bp);
1830                                 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1831                                               b_cluster.cluster_entry) {
1832                                         error = ffs_copyonwrite(vp, tbp);
1833                                         if (error != 0 &&
1834                                             error != EOPNOTSUPP) {
1835                                                 bp->b_error = error;
1836                                                 bp->b_ioflags |= BIO_ERROR;
1837                                                 bufdone(bp);
1838                                                 return;
1839                                         }
1840                                 }
1841                                 bp->b_runningbufspace = bp->b_bufsize;
1842                                 atomic_add_int(&runningbufspace,
1843                                                bp->b_runningbufspace);
1844                         } else {
1845                                 error = ffs_copyonwrite(vp, bp);
1846                                 if (error != 0 && error != EOPNOTSUPP) {
1847                                         bp->b_error = error;
1848                                         bp->b_ioflags |= BIO_ERROR;
1849                                         bufdone(bp);
1850                                         return;
1851                                 }
1852                         }
1853                 }
1854 #ifdef SOFTUPDATES
1855                 if ((bp->b_flags & B_CLUSTER) != 0) {
1856                         TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1857                                       b_cluster.cluster_entry) {
1858                                 if (!LIST_EMPTY(&tbp->b_dep))
1859                                         buf_start(tbp);
1860                         }
1861                 } else {
1862                         if (!LIST_EMPTY(&bp->b_dep))
1863                                 buf_start(bp);
1864                 }
1865
1866 #endif
1867         }
1868         g_vfs_strategy(bo, bp);
1869 }
1870
1871 #ifdef  DDB
1872
1873 static void
1874 db_print_ffs(struct ufsmount *ump)
1875 {
1876         db_printf("mp %p %s devvp %p fs %p su_wl %d su_wl_in %d su_deps %d "
1877                   "su_req %d\n",
1878             ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
1879             ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
1880             ump->softdep_on_worklist_inprogress, ump->softdep_deps,
1881             ump->softdep_req);
1882 }
1883
1884 DB_SHOW_COMMAND(ffs, db_show_ffs)
1885 {
1886         struct mount *mp;
1887         struct ufsmount *ump;
1888
1889         if (have_addr) {
1890                 ump = VFSTOUFS((struct mount *)addr);
1891                 db_print_ffs(ump);
1892                 return;
1893         }
1894
1895         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
1896                 if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
1897                         db_print_ffs(VFSTOUFS(mp));
1898         }
1899 }
1900
1901 #endif  /* DDB */