sys/ufs/ffs/ffs_vfsops.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-3-Clause
   3  *
   4  * Copyright (c) 1989, 1991, 1993, 1994
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  * 3. Neither the name of the University nor the names of its contributors
  16  *    may be used to endorse or promote products derived from this software
  17  *    without specific prior written permission.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  29  * SUCH DAMAGE.
  30  *
  31  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
  32  */
  33
  34 #include <sys/cdefs.h>
  35 __FBSDID("$FreeBSD$");
  36
  37 #include "opt_quota.h"
  38 #include "opt_ufs.h"
  39 #include "opt_ffs.h"
  40 #include "opt_ddb.h"
  41
  42 #include <sys/param.h>
  43 #include <sys/systm.h>
  44 #include <sys/namei.h>
  45 #include <sys/priv.h>
  46 #include <sys/proc.h>
  47 #include <sys/taskqueue.h>
  48 #include <sys/kernel.h>
  49 #include <sys/vnode.h>
  50 #include <sys/mount.h>
  51 #include <sys/bio.h>
  52 #include <sys/buf.h>
  53 #include <sys/conf.h>
  54 #include <sys/fcntl.h>
  55 #include <sys/ioccom.h>
  56 #include <sys/malloc.h>
  57 #include <sys/mutex.h>
  58 #include <sys/rwlock.h>
  59 #include <sys/vmmeter.h>
  60
  61 #include <security/mac/mac_framework.h>
  62
  63 #include <ufs/ufs/dir.h>
  64 #include <ufs/ufs/extattr.h>
  65 #include <ufs/ufs/gjournal.h>
  66 #include <ufs/ufs/quota.h>
  67 #include <ufs/ufs/ufsmount.h>
  68 #include <ufs/ufs/inode.h>
  69 #include <ufs/ufs/ufs_extern.h>
  70
  71 #include <ufs/ffs/fs.h>
  72 #include <ufs/ffs/ffs_extern.h>
  73
  74 #include <vm/vm.h>
  75 #include <vm/uma.h>
  76 #include <vm/vm_page.h>
  77
  78 #include <geom/geom.h>
  79 #include <geom/geom_vfs.h>
  80
  81 #include <ddb/ddb.h>
  82
  83 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
  84
  85 static int      ffs_mountfs(struct vnode *, struct mount *, struct thread *);
  86 static void     ffs_oldfscompat_read(struct fs *, struct ufsmount *,
  87                     ufs2_daddr_t);
  88 static void     ffs_ifree(struct ufsmount *ump, struct inode *ip);
  89 static int      ffs_sync_lazy(struct mount *mp);
  90
  91 static vfs_init_t ffs_init;
  92 static vfs_uninit_t ffs_uninit;
  93 static vfs_extattrctl_t ffs_extattrctl;
  94 static vfs_cmount_t ffs_cmount;
  95 static vfs_unmount_t ffs_unmount;
  96 static vfs_mount_t ffs_mount;
  97 static vfs_statfs_t ffs_statfs;
  98 static vfs_fhtovp_t ffs_fhtovp;
  99 static vfs_sync_t ffs_sync;
 100
 101 static struct vfsops ufs_vfsops = {
 102         .vfs_extattrctl =       ffs_extattrctl,
 103         .vfs_fhtovp =           ffs_fhtovp,
 104         .vfs_init =             ffs_init,
 105         .vfs_mount =            ffs_mount,
 106         .vfs_cmount =           ffs_cmount,
 107         .vfs_quotactl =         ufs_quotactl,
 108         .vfs_root =             ufs_root,
 109         .vfs_statfs =           ffs_statfs,
 110         .vfs_sync =             ffs_sync,
 111         .vfs_uninit =           ffs_uninit,
 112         .vfs_unmount =          ffs_unmount,
 113         .vfs_vget =             ffs_vget,
 114         .vfs_susp_clean =       process_deferred_inactive,
 115 };
 116
 117 VFS_SET(ufs_vfsops, ufs, 0);
 118 MODULE_VERSION(ufs, 1);
 119
 120 static b_strategy_t ffs_geom_strategy;
 121 static b_write_t ffs_bufwrite;
 122
 123 static struct buf_ops ffs_ops = {
 124         .bop_name =     "FFS",
 125         .bop_write =    ffs_bufwrite,
 126         .bop_strategy = ffs_geom_strategy,
 127         .bop_sync =     bufsync,
 128 #ifdef NO_FFS_SNAPSHOT
 129         .bop_bdflush =  bufbdflush,
 130 #else
 131         .bop_bdflush =  ffs_bdflush,
 132 #endif
 133 };
 134
 135 /*
 136  * Note that userquota and groupquota options are not currently used
 137  * by UFS/FFS code and generally mount(8) does not pass those options
 138  * from userland, but they can be passed by loader(8) via
 139  * vfs.root.mountfrom.options.
 140  */
 141 static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
 142     "noclusterw", "noexec", "export", "force", "from", "groupquota",
 143     "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir",
 144     "nosymfollow", "sync", "union", "userquota", NULL };
 145
 146 static int
 147 ffs_mount(struct mount *mp)
 148 {
 149         struct vnode *devvp;
 150         struct thread *td;
 151         struct ufsmount *ump = NULL;
 152         struct fs *fs;
 153         pid_t fsckpid = 0;
 154         int error, error1, flags;
 155         uint64_t mntorflags;
 156         accmode_t accmode;
 157         struct nameidata ndp;
 158         char *fspec;
 159
 160         td = curthread;
 161         if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
 162                 return (EINVAL);
 163         if (uma_inode == NULL) {
 164                 uma_inode = uma_zcreate("FFS inode",
 165                     sizeof(struct inode), NULL, NULL, NULL, NULL,
 166                     UMA_ALIGN_PTR, 0);
 167                 uma_ufs1 = uma_zcreate("FFS1 dinode",
 168                     sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
 169                     UMA_ALIGN_PTR, 0);
 170                 uma_ufs2 = uma_zcreate("FFS2 dinode",
 171                     sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
 172                     UMA_ALIGN_PTR, 0);
 173         }
 174
 175         vfs_deleteopt(mp->mnt_optnew, "groupquota");
 176         vfs_deleteopt(mp->mnt_optnew, "userquota");
 177
 178         fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
 179         if (error)
 180                 return (error);
 181
 182         mntorflags = 0;
 183         if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
 184                 mntorflags |= MNT_ACLS;
 185
 186         if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
 187                 mntorflags |= MNT_SNAPSHOT;
 188                 /*
 189                  * Once we have set the MNT_SNAPSHOT flag, do not
 190                  * persist "snapshot" in the options list.
 191                  */
 192                 vfs_deleteopt(mp->mnt_optnew, "snapshot");
 193                 vfs_deleteopt(mp->mnt_opt, "snapshot");
 194         }
 195
 196         if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 &&
 197             vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) {
 198                 /*
 199                  * Once we have set the restricted PID, do not
 200                  * persist "fsckpid" in the options list.
 201                  */
 202                 vfs_deleteopt(mp->mnt_optnew, "fsckpid");
 203                 vfs_deleteopt(mp->mnt_opt, "fsckpid");
 204                 if (mp->mnt_flag & MNT_UPDATE) {
 205                         if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 &&
 206                              vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
 207                                 vfs_mount_error(mp,
 208                                     "Checker enable: Must be read-only");
 209                                 return (EINVAL);
 210                         }
 211                 } else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
 212                         vfs_mount_error(mp,
 213                             "Checker enable: Must be read-only");
 214                         return (EINVAL);
 215                 }
 216                 /* Set to -1 if we are done */
 217                 if (fsckpid == 0)
 218                         fsckpid = -1;
 219         }
 220
 221         if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
 222                 if (mntorflags & MNT_ACLS) {
 223                         vfs_mount_error(mp,
 224                             "\"acls\" and \"nfsv4acls\" options "
 225                             "are mutually exclusive");
 226                         return (EINVAL);
 227                 }
 228                 mntorflags |= MNT_NFS4ACLS;
 229         }
 230
 231         MNT_ILOCK(mp);
 232         mp->mnt_flag |= mntorflags;
 233         MNT_IUNLOCK(mp);
 234         /*
 235          * If updating, check whether changing from read-only to
 236          * read/write; if there is no device name, that's all we do.
 237          */
 238         if (mp->mnt_flag & MNT_UPDATE) {
 239                 ump = VFSTOUFS(mp);
 240                 fs = ump->um_fs;
 241                 devvp = ump->um_devvp;
 242                 if (fsckpid == -1 && ump->um_fsckpid > 0) {
 243                         if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 ||
 244                             (error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0)
 245                                 return (error);
 246                         g_topology_lock();
 247                         /*
 248                          * Return to normal read-only mode.
 249                          */
 250                         error = g_access(ump->um_cp, 0, -1, 0);
 251                         g_topology_unlock();
 252                         ump->um_fsckpid = 0;
 253                 }
 254                 if (fs->fs_ronly == 0 &&
 255                     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 256                         /*
 257                          * Flush any dirty data and suspend filesystem.
 258                          */
 259                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 260                                 return (error);
 261                         error = vfs_write_suspend_umnt(mp);
 262                         if (error != 0)
 263                                 return (error);
 264                         /*
 265                          * Check for and optionally get rid of files open
 266                          * for writing.
 267                          */
 268                         flags = WRITECLOSE;
 269                         if (mp->mnt_flag & MNT_FORCE)
 270                                 flags |= FORCECLOSE;
 271                         if (MOUNTEDSOFTDEP(mp)) {
 272                                 error = softdep_flushfiles(mp, flags, td);
 273                         } else {
 274                                 error = ffs_flushfiles(mp, flags, td);
 275                         }
 276                         if (error) {
 277                                 vfs_write_resume(mp, 0);
 278                                 return (error);
 279                         }
 280                         if (fs->fs_pendingblocks != 0 ||
 281                             fs->fs_pendinginodes != 0) {
 282                                 printf("WARNING: %s Update error: blocks %jd "
 283                                     "files %d\n", fs->fs_fsmnt,
 284                                     (intmax_t)fs->fs_pendingblocks,
 285                                     fs->fs_pendinginodes);
 286                                 fs->fs_pendingblocks = 0;
 287                                 fs->fs_pendinginodes = 0;
 288                         }
 289                         if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
 290                                 fs->fs_clean = 1;
 291                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 292                                 fs->fs_ronly = 0;
 293                                 fs->fs_clean = 0;
 294                                 vfs_write_resume(mp, 0);
 295                                 return (error);
 296                         }
 297                         if (MOUNTEDSOFTDEP(mp))
 298                                 softdep_unmount(mp);
 299                         g_topology_lock();
 300                         /*
 301                          * Drop our write and exclusive access.
 302                          */
 303                         g_access(ump->um_cp, 0, -1, -1);
 304                         g_topology_unlock();
 305                         fs->fs_ronly = 1;
 306                         MNT_ILOCK(mp);
 307                         mp->mnt_flag |= MNT_RDONLY;
 308                         MNT_IUNLOCK(mp);
 309                         /*
 310                          * Allow the writers to note that filesystem
 311                          * is ro now.
 312                          */
 313                         vfs_write_resume(mp, 0);
 314                 }
 315                 if ((mp->mnt_flag & MNT_RELOAD) &&
 316                     (error = ffs_reload(mp, td, 0)) != 0)
 317                         return (error);
 318                 if (fs->fs_ronly &&
 319                     !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 320                         /*
 321                          * If we are running a checker, do not allow upgrade.
 322                          */
 323                         if (ump->um_fsckpid > 0) {
 324                                 vfs_mount_error(mp,
 325                                     "Active checker, cannot upgrade to write");
 326                                 return (EINVAL);
 327                         }
 328                         /*
 329                          * If upgrade to read-write by non-root, then verify
 330                          * that user has necessary permissions on the device.
 331                          */
 332                         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 333                         error = VOP_ACCESS(devvp, VREAD | VWRITE,
 334                             td->td_ucred, td);
 335                         if (error)
 336                                 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 337                         if (error) {
 338                                 VOP_UNLOCK(devvp, 0);
 339                                 return (error);
 340                         }
 341                         VOP_UNLOCK(devvp, 0);
 342                         fs->fs_flags &= ~FS_UNCLEAN;
 343                         if (fs->fs_clean == 0) {
 344                                 fs->fs_flags |= FS_UNCLEAN;
 345                                 if ((mp->mnt_flag & MNT_FORCE) ||
 346                                     ((fs->fs_flags &
 347                                      (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
 348                                      (fs->fs_flags & FS_DOSOFTDEP))) {
 349                                         printf("WARNING: %s was not properly "
 350                                            "dismounted\n", fs->fs_fsmnt);
 351                                 } else {
 352                                         vfs_mount_error(mp,
 353                                            "R/W mount of %s denied. %s.%s",
 354                                            fs->fs_fsmnt,
 355                                            "Filesystem is not clean - run fsck",
 356                                            (fs->fs_flags & FS_SUJ) == 0 ? "" :
 357                                            " Forced mount will invalidate"
 358                                            " journal contents");
 359                                         return (EPERM);
 360                                 }
 361                         }
 362                         g_topology_lock();
 363                         /*
 364                          * Request exclusive write access.
 365                          */
 366                         error = g_access(ump->um_cp, 0, 1, 1);
 367                         g_topology_unlock();
 368                         if (error)
 369                                 return (error);
 370                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 371                                 return (error);
 372                         fs->fs_ronly = 0;
 373                         MNT_ILOCK(mp);
 374                         mp->mnt_flag &= ~MNT_RDONLY;
 375                         MNT_IUNLOCK(mp);
 376                         fs->fs_mtime = time_second;
 377                         /* check to see if we need to start softdep */
 378                         if ((fs->fs_flags & FS_DOSOFTDEP) &&
 379                             (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
 380                                 vn_finished_write(mp);
 381                                 return (error);
 382                         }
 383                         fs->fs_clean = 0;
 384                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 385                                 vn_finished_write(mp);
 386                                 return (error);
 387                         }
 388                         if (fs->fs_snapinum[0] != 0)
 389                                 ffs_snapshot_mount(mp);
 390                         vn_finished_write(mp);
 391                 }
 392                 /*
 393                  * Soft updates is incompatible with "async",
 394                  * so if we are doing softupdates stop the user
 395                  * from setting the async flag in an update.
 396                  * Softdep_mount() clears it in an initial mount
 397                  * or ro->rw remount.
 398                  */
 399                 if (MOUNTEDSOFTDEP(mp)) {
 400                         /* XXX: Reset too late ? */
 401                         MNT_ILOCK(mp);
 402                         mp->mnt_flag &= ~MNT_ASYNC;
 403                         MNT_IUNLOCK(mp);
 404                 }
 405                 /*
 406                  * Keep MNT_ACLS flag if it is stored in superblock.
 407                  */
 408                 if ((fs->fs_flags & FS_ACLS) != 0) {
 409                         /* XXX: Set too late ? */
 410                         MNT_ILOCK(mp);
 411                         mp->mnt_flag |= MNT_ACLS;
 412                         MNT_IUNLOCK(mp);
 413                 }
 414
 415                 if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
 416                         /* XXX: Set too late ? */
 417                         MNT_ILOCK(mp);
 418                         mp->mnt_flag |= MNT_NFS4ACLS;
 419                         MNT_IUNLOCK(mp);
 420                 }
 421                 /*
 422                  * If this is a request from fsck to clean up the filesystem,
 423                  * then allow the specified pid to proceed.
 424                  */
 425                 if (fsckpid > 0) {
 426                         if (ump->um_fsckpid != 0) {
 427                                 vfs_mount_error(mp,
 428                                     "Active checker already running on %s",
 429                                     fs->fs_fsmnt);
 430                                 return (EINVAL);
 431                         }
 432                         KASSERT(MOUNTEDSOFTDEP(mp) == 0,
 433                             ("soft updates enabled on read-only file system"));
 434                         g_topology_lock();
 435                         /*
 436                          * Request write access.
 437                          */
 438                         error = g_access(ump->um_cp, 0, 1, 0);
 439                         g_topology_unlock();
 440                         if (error) {
 441                                 vfs_mount_error(mp,
 442                                     "Checker activation failed on %s",
 443                                     fs->fs_fsmnt);
 444                                 return (error);
 445                         }
 446                         ump->um_fsckpid = fsckpid;
 447                         if (fs->fs_snapinum[0] != 0)
 448                                 ffs_snapshot_mount(mp);
 449                         fs->fs_mtime = time_second;
 450                         fs->fs_fmod = 1;
 451                         fs->fs_clean = 0;
 452                         (void) ffs_sbupdate(ump, MNT_WAIT, 0);
 453                 }
 454
 455                 /*
 456                  * If this is a snapshot request, take the snapshot.
 457                  */
 458                 if (mp->mnt_flag & MNT_SNAPSHOT)
 459                         return (ffs_snapshot(mp, fspec));
 460
 461                 /*
 462                  * Must not call namei() while owning busy ref.
 463                  */
 464                 vfs_unbusy(mp);
 465         }
 466
 467         /*
 468          * Not an update, or updating the name: look up the name
 469          * and verify that it refers to a sensible disk device.
 470          */
 471         NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
 472         error = namei(&ndp);
 473         if ((mp->mnt_flag & MNT_UPDATE) != 0) {
 474                 /*
 475                  * Unmount does not start if MNT_UPDATE is set.  Mount
 476                  * update busies mp before setting MNT_UPDATE.  We
 477                  * must be able to retain our busy ref succesfully,
 478                  * without sleep.
 479                  */
 480                 error1 = vfs_busy(mp, MBF_NOWAIT);
 481                 MPASS(error1 == 0);
 482         }
 483         if (error != 0)
 484                 return (error);
 485         NDFREE(&ndp, NDF_ONLY_PNBUF);
 486         devvp = ndp.ni_vp;
 487         if (!vn_isdisk(devvp, &error)) {
 488                 vput(devvp);
 489                 return (error);
 490         }
 491
 492         /*
 493          * If mount by non-root, then verify that user has necessary
 494          * permissions on the device.
 495          */
 496         accmode = VREAD;
 497         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 498                 accmode |= VWRITE;
 499         error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
 500         if (error)
 501                 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 502         if (error) {
 503                 vput(devvp);
 504                 return (error);
 505         }
 506
 507         if (mp->mnt_flag & MNT_UPDATE) {
 508                 /*
 509                  * Update only
 510                  *
 511                  * If it's not the same vnode, or at least the same device
 512                  * then it's not correct.
 513                  */
 514
 515                 if (devvp->v_rdev != ump->um_devvp->v_rdev)
 516                         error = EINVAL; /* needs translation */
 517                 vput(devvp);
 518                 if (error)
 519                         return (error);
 520         } else {
 521                 /*
 522                  * New mount
 523                  *
 524                  * We need the name for the mount point (also used for
 525                  * "last mounted on") copied in. If an error occurs,
 526                  * the mount point is discarded by the upper level code.
 527                  * Note that vfs_mount_alloc() populates f_mntonname for us.
 528                  */
 529                 if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
 530                         vrele(devvp);
 531                         return (error);
 532                 }
 533                 if (fsckpid > 0) {
 534                         KASSERT(MOUNTEDSOFTDEP(mp) == 0,
 535                             ("soft updates enabled on read-only file system"));
 536                         ump = VFSTOUFS(mp);
 537                         fs = ump->um_fs;
 538                         g_topology_lock();
 539                         /*
 540                          * Request write access.
 541                          */
 542                         error = g_access(ump->um_cp, 0, 1, 0);
 543                         g_topology_unlock();
 544                         if (error) {
 545                                 printf("WARNING: %s: Checker activation "
 546                                     "failed\n", fs->fs_fsmnt);
 547                         } else {
 548                                 ump->um_fsckpid = fsckpid;
 549                                 if (fs->fs_snapinum[0] != 0)
 550                                         ffs_snapshot_mount(mp);
 551                                 fs->fs_mtime = time_second;
 552                                 fs->fs_clean = 0;
 553                                 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
 554                         }
 555                 }
 556         }
 557         vfs_mountedfrom(mp, fspec);
 558         return (0);
 559 }
 560
 561 /*
 562  * Compatibility with old mount system call.
 563  */
 564
 565 static int
 566 ffs_cmount(struct mntarg *ma, void *data, uint64_t flags)
 567 {
 568         struct ufs_args args;
 569         struct export_args exp;
 570         int error;
 571
 572         if (data == NULL)
 573                 return (EINVAL);
 574         error = copyin(data, &args, sizeof args);
 575         if (error)
 576                 return (error);
 577         vfs_oexport_conv(&args.export, &exp);
 578
 579         ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
 580         ma = mount_arg(ma, "export", &exp, sizeof(exp));
 581         error = kernel_mount(ma, flags);
 582
 583         return (error);
 584 }
 585
 586 /*
 587  * Reload all incore data for a filesystem (used after running fsck on
 588  * the root filesystem and finding things to fix). If the 'force' flag
 589  * is 0, the filesystem must be mounted read-only.
 590  *
 591  * Things to do to update the mount:
 592  *      1) invalidate all cached meta-data.
 593  *      2) re-read superblock from disk.
 594  *      3) re-read summary information from disk.
 595  *      4) invalidate all inactive vnodes.
 596  *      5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary
 597  *         writers, if requested.
 598  *      6) invalidate all cached file data.
 599  *      7) re-read inode data for all active vnodes.
 600  */
 601 int
 602 ffs_reload(struct mount *mp, struct thread *td, int flags)
 603 {
 604         struct vnode *vp, *mvp, *devvp;
 605         struct inode *ip;
 606         void *space;
 607         struct buf *bp;
 608         struct fs *fs, *newfs;
 609         struct ufsmount *ump;
 610         ufs2_daddr_t sblockloc;
 611         int i, blks, error;
 612         u_long size;
 613         int32_t *lp;
 614
 615         ump = VFSTOUFS(mp);
 616
 617         MNT_ILOCK(mp);
 618         if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) {
 619                 MNT_IUNLOCK(mp);
 620                 return (EINVAL);
 621         }
 622         MNT_IUNLOCK(mp);
 623
 624         /*
 625          * Step 1: invalidate all cached meta-data.
 626          */
 627         devvp = VFSTOUFS(mp)->um_devvp;
 628         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 629         if (vinvalbuf(devvp, 0, 0, 0) != 0)
 630                 panic("ffs_reload: dirty1");
 631         VOP_UNLOCK(devvp, 0);
 632
 633         /*
 634          * Step 2: re-read superblock from disk.
 635          */
 636         fs = VFSTOUFS(mp)->um_fs;
 637         if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
 638             NOCRED, &bp)) != 0)
 639                 return (error);
 640         newfs = (struct fs *)bp->b_data;
 641         if ((newfs->fs_magic != FS_UFS1_MAGIC &&
 642              newfs->fs_magic != FS_UFS2_MAGIC) ||
 643             newfs->fs_bsize > MAXBSIZE ||
 644             newfs->fs_bsize < sizeof(struct fs)) {
 645                         brelse(bp);
 646                         return (EIO);           /* XXX needs translation */
 647         }
 648         /*
 649          * Copy pointer fields back into superblock before copying in   XXX
 650          * new superblock. These should really be in the ufsmount.      XXX
 651          * Note that important parameters (eg fs_ncg) are unchanged.
 652          */
 653         newfs->fs_csp = fs->fs_csp;
 654         newfs->fs_maxcluster = fs->fs_maxcluster;
 655         newfs->fs_contigdirs = fs->fs_contigdirs;
 656         newfs->fs_active = fs->fs_active;
 657         newfs->fs_ronly = fs->fs_ronly;
 658         sblockloc = fs->fs_sblockloc;
 659         bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 660         brelse(bp);
 661         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 662         ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
 663         UFS_LOCK(ump);
 664         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 665                 printf("WARNING: %s: reload pending error: blocks %jd "
 666                     "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 667                     fs->fs_pendinginodes);
 668                 fs->fs_pendingblocks = 0;
 669                 fs->fs_pendinginodes = 0;
 670         }
 671         UFS_UNLOCK(ump);
 672
 673         /*
 674          * Step 3: re-read summary information from disk.
 675          */
 676         size = fs->fs_cssize;
 677         blks = howmany(size, fs->fs_fsize);
 678         if (fs->fs_contigsumsize > 0)
 679                 size += fs->fs_ncg * sizeof(int32_t);
 680         size += fs->fs_ncg * sizeof(u_int8_t);
 681         free(fs->fs_csp, M_UFSMNT);
 682         space = malloc(size, M_UFSMNT, M_WAITOK);
 683         fs->fs_csp = space;
 684         for (i = 0; i < blks; i += fs->fs_frag) {
 685                 size = fs->fs_bsize;
 686                 if (i + fs->fs_frag > blks)
 687                         size = (blks - i) * fs->fs_fsize;
 688                 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 689                     NOCRED, &bp);
 690                 if (error)
 691                         return (error);
 692                 bcopy(bp->b_data, space, (u_int)size);
 693                 space = (char *)space + size;
 694                 brelse(bp);
 695         }
 696         /*
 697          * We no longer know anything about clusters per cylinder group.
 698          */
 699         if (fs->fs_contigsumsize > 0) {
 700                 fs->fs_maxcluster = lp = space;
 701                 for (i = 0; i < fs->fs_ncg; i++)
 702                         *lp++ = fs->fs_contigsumsize;
 703                 space = lp;
 704         }
 705         size = fs->fs_ncg * sizeof(u_int8_t);
 706         fs->fs_contigdirs = (u_int8_t *)space;
 707         bzero(fs->fs_contigdirs, size);
 708         if ((flags & FFSR_UNSUSPEND) != 0) {
 709                 MNT_ILOCK(mp);
 710                 mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2);
 711                 wakeup(&mp->mnt_flag);
 712                 MNT_IUNLOCK(mp);
 713         }
 714
 715 loop:
 716         MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 717                 /*
 718                  * Skip syncer vnode.
 719                  */
 720                 if (vp->v_type == VNON) {
 721                         VI_UNLOCK(vp);
 722                         continue;
 723                 }
 724                 /*
 725                  * Step 4: invalidate all cached file data.
 726                  */
 727                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 728                         MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 729                         goto loop;
 730                 }
 731                 if (vinvalbuf(vp, 0, 0, 0))
 732                         panic("ffs_reload: dirty2");
 733                 /*
 734                  * Step 5: re-read inode data for all active vnodes.
 735                  */
 736                 ip = VTOI(vp);
 737                 error =
 738                     bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 739                     (int)fs->fs_bsize, NOCRED, &bp);
 740                 if (error) {
 741                         VOP_UNLOCK(vp, 0);
 742                         vrele(vp);
 743                         MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 744                         return (error);
 745                 }
 746                 ffs_load_inode(bp, ip, fs, ip->i_number);
 747                 ip->i_effnlink = ip->i_nlink;
 748                 brelse(bp);
 749                 VOP_UNLOCK(vp, 0);
 750                 vrele(vp);
 751         }
 752         return (0);
 753 }
 754
 755 /*
 756  * Possible superblock locations ordered from most to least likely.
 757  */
 758 static int sblock_try[] = SBLOCKSEARCH;
 759
 760 /*
 761  * Common code for mount and mountroot
 762  */
 763 static int
 764 ffs_mountfs(devvp, mp, td)
 765         struct vnode *devvp;
 766         struct mount *mp;
 767         struct thread *td;
 768 {
 769         struct ufsmount *ump;
 770         struct buf *bp;
 771         struct fs *fs;
 772         struct cdev *dev;
 773         void *space;
 774         ufs2_daddr_t sblockloc;
 775         int error, i, blks, len, ronly;
 776         u_long size;
 777         int32_t *lp;
 778         struct ucred *cred;
 779         struct g_consumer *cp;
 780         struct mount *nmp;
 781
 782         bp = NULL;
 783         ump = NULL;
 784         cred = td ? td->td_ucred : NOCRED;
 785         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 786
 787         KASSERT(devvp->v_type == VCHR, ("reclaimed devvp"));
 788         dev = devvp->v_rdev;
 789         if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0,
 790             (uintptr_t)mp) == 0) {
 791                 VOP_UNLOCK(devvp, 0);
 792                 return (EBUSY);
 793         }
 794         g_topology_lock();
 795         error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
 796         g_topology_unlock();
 797         if (error != 0) {
 798                 atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
 799                 VOP_UNLOCK(devvp, 0);
 800                 return (error);
 801         }
 802         dev_ref(dev);
 803         devvp->v_bufobj.bo_ops = &ffs_ops;
 804         VOP_UNLOCK(devvp, 0);
 805         if (dev->si_iosize_max != 0)
 806                 mp->mnt_iosize_max = dev->si_iosize_max;
 807         if (mp->mnt_iosize_max > MAXPHYS)
 808                 mp->mnt_iosize_max = MAXPHYS;
 809
 810         fs = NULL;
 811         sblockloc = 0;
 812         /*
 813          * Try reading the superblock in each of its possible locations.
 814          */
 815         for (i = 0; sblock_try[i] != -1; i++) {
 816                 if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
 817                         error = EINVAL;
 818                         vfs_mount_error(mp,
 819                             "Invalid sectorsize %d for superblock size %d",
 820                             cp->provider->sectorsize, SBLOCKSIZE);
 821                         goto out;
 822                 }
 823                 if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
 824                     cred, &bp)) != 0)
 825                         goto out;
 826                 fs = (struct fs *)bp->b_data;
 827                 sblockloc = sblock_try[i];
 828                 if ((fs->fs_magic == FS_UFS1_MAGIC ||
 829                      (fs->fs_magic == FS_UFS2_MAGIC &&
 830                       (fs->fs_sblockloc == sblockloc ||
 831                        (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
 832                     fs->fs_bsize <= MAXBSIZE &&
 833                     fs->fs_bsize >= sizeof(struct fs))
 834                         break;
 835                 brelse(bp);
 836                 bp = NULL;
 837         }
 838         if (sblock_try[i] == -1) {
 839                 error = EINVAL;         /* XXX needs translation */
 840                 goto out;
 841         }
 842         fs->fs_fmod = 0;
 843         /* none of these types of check-hashes are maintained */
 844         fs->fs_metackhash &= ~(CK_SUPERBLOCK | CK_INODE | CK_INDIR | CK_DIR);
 845         /* no support for directory indices or any other undefined flags */
 846         fs->fs_flags &= ~FS_INDEXDIRS;
 847         fs->fs_flags &= FS_SUPPORTED;
 848         fs->fs_flags &= ~FS_UNCLEAN;
 849         if (fs->fs_clean == 0) {
 850                 fs->fs_flags |= FS_UNCLEAN;
 851                 if (ronly || (mp->mnt_flag & MNT_FORCE) ||
 852                     ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
 853                      (fs->fs_flags & FS_DOSOFTDEP))) {
 854                         printf("WARNING: %s was not properly dismounted\n",
 855                             fs->fs_fsmnt);
 856                 } else {
 857                         vfs_mount_error(mp, "R/W mount of %s denied. %s%s",
 858                             fs->fs_fsmnt, "Filesystem is not clean - run fsck.",
 859                             (fs->fs_flags & FS_SUJ) == 0 ? "" :
 860                             " Forced mount will invalidate journal contents");
 861                         error = EPERM;
 862                         goto out;
 863                 }
 864                 if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
 865                     (mp->mnt_flag & MNT_FORCE)) {
 866                         printf("WARNING: %s: lost blocks %jd files %d\n",
 867                             fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 868                             fs->fs_pendinginodes);
 869                         fs->fs_pendingblocks = 0;
 870                         fs->fs_pendinginodes = 0;
 871                 }
 872         }
 873         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 874                 printf("WARNING: %s: mount pending error: blocks %jd "
 875                     "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 876                     fs->fs_pendinginodes);
 877                 fs->fs_pendingblocks = 0;
 878                 fs->fs_pendinginodes = 0;
 879         }
 880         if ((fs->fs_flags & FS_GJOURNAL) != 0) {
 881 #ifdef UFS_GJOURNAL
 882                 /*
 883                  * Get journal provider name.
 884                  */
 885                 len = 1024;
 886                 mp->mnt_gjprovider = malloc((u_long)len, M_UFSMNT, M_WAITOK);
 887                 if (g_io_getattr("GJOURNAL::provider", cp, &len,
 888                     mp->mnt_gjprovider) == 0) {
 889                         mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len,
 890                             M_UFSMNT, M_WAITOK);
 891                         MNT_ILOCK(mp);
 892                         mp->mnt_flag |= MNT_GJOURNAL;
 893                         MNT_IUNLOCK(mp);
 894                 } else {
 895                         printf("WARNING: %s: GJOURNAL flag on fs "
 896                             "but no gjournal provider below\n",
 897                             mp->mnt_stat.f_mntonname);
 898                         free(mp->mnt_gjprovider, M_UFSMNT);
 899                         mp->mnt_gjprovider = NULL;
 900                 }
 901 #else
 902                 printf("WARNING: %s: GJOURNAL flag on fs but no "
 903                     "UFS_GJOURNAL support\n", mp->mnt_stat.f_mntonname);
 904 #endif
 905         } else {
 906                 mp->mnt_gjprovider = NULL;
 907         }
 908         ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
 909         ump->um_cp = cp;
 910         ump->um_bo = &devvp->v_bufobj;
 911         ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
 912         if (fs->fs_magic == FS_UFS1_MAGIC) {
 913                 ump->um_fstype = UFS1;
 914                 ump->um_balloc = ffs_balloc_ufs1;
 915         } else {
 916                 ump->um_fstype = UFS2;
 917                 ump->um_balloc = ffs_balloc_ufs2;
 918         }
 919         ump->um_blkatoff = ffs_blkatoff;
 920         ump->um_truncate = ffs_truncate;
 921         ump->um_update = ffs_update;
 922         ump->um_valloc = ffs_valloc;
 923         ump->um_vfree = ffs_vfree;
 924         ump->um_ifree = ffs_ifree;
 925         ump->um_rdonly = ffs_rdonly;
 926         ump->um_snapgone = ffs_snapgone;
 927         mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
 928         bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
 929         if (fs->fs_sbsize < SBLOCKSIZE)
 930                 bp->b_flags |= B_INVAL | B_NOCACHE;
 931         brelse(bp);
 932         bp = NULL;
 933         fs = ump->um_fs;
 934         ffs_oldfscompat_read(fs, ump, sblockloc);
 935         fs->fs_ronly = ronly;
 936         size = fs->fs_cssize;
 937         blks = howmany(size, fs->fs_fsize);
 938         if (fs->fs_contigsumsize > 0)
 939                 size += fs->fs_ncg * sizeof(int32_t);
 940         size += fs->fs_ncg * sizeof(u_int8_t);
 941         space = malloc(size, M_UFSMNT, M_WAITOK);
 942         fs->fs_csp = space;
 943         for (i = 0; i < blks; i += fs->fs_frag) {
 944                 size = fs->fs_bsize;
 945                 if (i + fs->fs_frag > blks)
 946                         size = (blks - i) * fs->fs_fsize;
 947                 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 948                     cred, &bp)) != 0) {
 949                         free(fs->fs_csp, M_UFSMNT);
 950                         goto out;
 951                 }
 952                 bcopy(bp->b_data, space, (u_int)size);
 953                 space = (char *)space + size;
 954                 brelse(bp);
 955                 bp = NULL;
 956         }
 957         if (fs->fs_contigsumsize > 0) {
 958                 fs->fs_maxcluster = lp = space;
 959                 for (i = 0; i < fs->fs_ncg; i++)
 960                         *lp++ = fs->fs_contigsumsize;
 961                 space = lp;
 962         }
 963         size = fs->fs_ncg * sizeof(u_int8_t);
 964         fs->fs_contigdirs = (u_int8_t *)space;
 965         bzero(fs->fs_contigdirs, size);
 966         fs->fs_active = NULL;
 967         mp->mnt_data = ump;
 968         mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
 969         mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
 970         nmp = NULL;
 971         if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
 972             (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
 973                 if (nmp)
 974                         vfs_rel(nmp);
 975                 vfs_getnewfsid(mp);
 976         }
 977         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 978         MNT_ILOCK(mp);
 979         mp->mnt_flag |= MNT_LOCAL;
 980         MNT_IUNLOCK(mp);
 981         if ((fs->fs_flags & FS_MULTILABEL) != 0) {
 982 #ifdef MAC
 983                 MNT_ILOCK(mp);
 984                 mp->mnt_flag |= MNT_MULTILABEL;
 985                 MNT_IUNLOCK(mp);
 986 #else
 987                 printf("WARNING: %s: multilabel flag on fs but "
 988                     "no MAC support\n", mp->mnt_stat.f_mntonname);
 989 #endif
 990         }
 991         if ((fs->fs_flags & FS_ACLS) != 0) {
 992 #ifdef UFS_ACL
 993                 MNT_ILOCK(mp);
 994
 995                 if (mp->mnt_flag & MNT_NFS4ACLS)
 996                         printf("WARNING: %s: ACLs flag on fs conflicts with "
 997                             "\"nfsv4acls\" mount option; option ignored\n",
 998                             mp->mnt_stat.f_mntonname);
 999                 mp->mnt_flag &= ~MNT_NFS4ACLS;
1000                 mp->mnt_flag |= MNT_ACLS;
1001
1002                 MNT_IUNLOCK(mp);
1003 #else
1004                 printf("WARNING: %s: ACLs flag on fs but no ACLs support\n",
1005                     mp->mnt_stat.f_mntonname);
1006 #endif
1007         }
1008         if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
1009 #ifdef UFS_ACL
1010                 MNT_ILOCK(mp);
1011
1012                 if (mp->mnt_flag & MNT_ACLS)
1013                         printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts "
1014                             "with \"acls\" mount option; option ignored\n",
1015                             mp->mnt_stat.f_mntonname);
1016                 mp->mnt_flag &= ~MNT_ACLS;
1017                 mp->mnt_flag |= MNT_NFS4ACLS;
1018
1019                 MNT_IUNLOCK(mp);
1020 #else
1021                 printf("WARNING: %s: NFSv4 ACLs flag on fs but no "
1022                     "ACLs support\n", mp->mnt_stat.f_mntonname);
1023 #endif
1024         }
1025         if ((fs->fs_flags & FS_TRIM) != 0) {
1026                 len = sizeof(int);
1027                 if (g_io_getattr("GEOM::candelete", cp, &len,
1028                     &ump->um_candelete) == 0) {
1029                         if (!ump->um_candelete)
1030                                 printf("WARNING: %s: TRIM flag on fs but disk "
1031                                     "does not support TRIM\n",
1032                                     mp->mnt_stat.f_mntonname);
1033                 } else {
1034                         printf("WARNING: %s: TRIM flag on fs but disk does "
1035                             "not confirm that it supports TRIM\n",
1036                             mp->mnt_stat.f_mntonname);
1037                         ump->um_candelete = 0;
1038                 }
1039                 if (ump->um_candelete) {
1040                         ump->um_trim_tq = taskqueue_create("trim", M_WAITOK,
1041                             taskqueue_thread_enqueue, &ump->um_trim_tq);
1042                         taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS,
1043                             "%s trim", mp->mnt_stat.f_mntonname);
1044                 }
1045         }
1046
1047         ump->um_mountp = mp;
1048         ump->um_dev = dev;
1049         ump->um_devvp = devvp;
1050         ump->um_nindir = fs->fs_nindir;
1051         ump->um_bptrtodb = fs->fs_fsbtodb;
1052         ump->um_seqinc = fs->fs_frag;
1053         for (i = 0; i < MAXQUOTAS; i++)
1054                 ump->um_quotas[i] = NULLVP;
1055 #ifdef UFS_EXTATTR
1056         ufs_extattr_uepm_init(&ump->um_extattr);
1057 #endif
1058         /*
1059          * Set FS local "last mounted on" information (NULL pad)
1060          */
1061         bzero(fs->fs_fsmnt, MAXMNTLEN);
1062         strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
1063         mp->mnt_stat.f_iosize = fs->fs_bsize;
1064
1065         if (mp->mnt_flag & MNT_ROOTFS) {
1066                 /*
1067                  * Root mount; update timestamp in mount structure.
1068                  * this will be used by the common root mount code
1069                  * to update the system clock.
1070                  */
1071                 mp->mnt_time = fs->fs_time;
1072         }
1073
1074         if (ronly == 0) {
1075                 fs->fs_mtime = time_second;
1076                 if ((fs->fs_flags & FS_DOSOFTDEP) &&
1077                     (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
1078                         free(fs->fs_csp, M_UFSMNT);
1079                         ffs_flushfiles(mp, FORCECLOSE, td);
1080                         goto out;
1081                 }
1082                 if (fs->fs_snapinum[0] != 0)
1083                         ffs_snapshot_mount(mp);
1084                 fs->fs_fmod = 1;
1085                 fs->fs_clean = 0;
1086                 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
1087         }
1088         /*
1089          * Initialize filesystem state information in mount struct.
1090          */
1091         MNT_ILOCK(mp);
1092         mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
1093             MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE;
1094         MNT_IUNLOCK(mp);
1095 #ifdef UFS_EXTATTR
1096 #ifdef UFS_EXTATTR_AUTOSTART
1097         /*
1098          *
1099          * Auto-starting does the following:
1100          *      - check for /.attribute in the fs, and extattr_start if so
1101          *      - for each file in .attribute, enable that file with
1102          *        an attribute of the same name.
1103          * Not clear how to report errors -- probably eat them.
1104          * This would all happen while the filesystem was busy/not
1105          * available, so would effectively be "atomic".
1106          */
1107         (void) ufs_extattr_autostart(mp, td);
1108 #endif /* !UFS_EXTATTR_AUTOSTART */
1109 #endif /* !UFS_EXTATTR */
1110         return (0);
1111 out:
1112         if (bp)
1113                 brelse(bp);
1114         if (cp != NULL) {
1115                 g_topology_lock();
1116                 g_vfs_close(cp);
1117                 g_topology_unlock();
1118         }
1119         if (ump) {
1120                 mtx_destroy(UFS_MTX(ump));
1121                 if (mp->mnt_gjprovider != NULL) {
1122                         free(mp->mnt_gjprovider, M_UFSMNT);
1123                         mp->mnt_gjprovider = NULL;
1124                 }
1125                 free(ump->um_fs, M_UFSMNT);
1126                 free(ump, M_UFSMNT);
1127                 mp->mnt_data = NULL;
1128         }
1129         atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
1130         dev_rel(dev);
1131         return (error);
1132 }
1133
1134 #include <sys/sysctl.h>
1135 static int bigcgs = 0;
1136 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
1137
1138 /*
1139  * Sanity checks for loading old filesystem superblocks.
1140  * See ffs_oldfscompat_write below for unwound actions.
1141  *
1142  * XXX - Parts get retired eventually.
1143  * Unfortunately new bits get added.
1144  */
1145 static void
1146 ffs_oldfscompat_read(fs, ump, sblockloc)
1147         struct fs *fs;
1148         struct ufsmount *ump;
1149         ufs2_daddr_t sblockloc;
1150 {
1151         off_t maxfilesize;
1152
1153         /*
1154          * If not yet done, update fs_flags location and value of fs_sblockloc.
1155          */
1156         if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1157                 fs->fs_flags = fs->fs_old_flags;
1158                 fs->fs_old_flags |= FS_FLAGS_UPDATED;
1159                 fs->fs_sblockloc = sblockloc;
1160         }
1161         /*
1162          * If not yet done, update UFS1 superblock with new wider fields.
1163          */
1164         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
1165                 fs->fs_maxbsize = fs->fs_bsize;
1166                 fs->fs_time = fs->fs_old_time;
1167                 fs->fs_size = fs->fs_old_size;
1168                 fs->fs_dsize = fs->fs_old_dsize;
1169                 fs->fs_csaddr = fs->fs_old_csaddr;
1170                 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1171                 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1172                 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1173                 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1174         }
1175         if (fs->fs_magic == FS_UFS1_MAGIC &&
1176             fs->fs_old_inodefmt < FS_44INODEFMT) {
1177                 fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
1178                 fs->fs_qbmask = ~fs->fs_bmask;
1179                 fs->fs_qfmask = ~fs->fs_fmask;
1180         }
1181         if (fs->fs_magic == FS_UFS1_MAGIC) {
1182                 ump->um_savedmaxfilesize = fs->fs_maxfilesize;
1183                 maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
1184                 if (fs->fs_maxfilesize > maxfilesize)
1185                         fs->fs_maxfilesize = maxfilesize;
1186         }
1187         /* Compatibility for old filesystems */
1188         if (fs->fs_avgfilesize <= 0)
1189                 fs->fs_avgfilesize = AVFILESIZ;
1190         if (fs->fs_avgfpdir <= 0)
1191                 fs->fs_avgfpdir = AFPDIR;
1192         if (bigcgs) {
1193                 fs->fs_save_cgsize = fs->fs_cgsize;
1194                 fs->fs_cgsize = fs->fs_bsize;
1195         }
1196 }
1197
1198 /*
1199  * Unwinding superblock updates for old filesystems.
1200  * See ffs_oldfscompat_read above for details.
1201  *
1202  * XXX - Parts get retired eventually.
1203  * Unfortunately new bits get added.
1204  */
1205 void
1206 ffs_oldfscompat_write(fs, ump)
1207         struct fs *fs;
1208         struct ufsmount *ump;
1209 {
1210
1211         /*
1212          * Copy back UFS2 updated fields that UFS1 inspects.
1213          */
1214         if (fs->fs_magic == FS_UFS1_MAGIC) {
1215                 fs->fs_old_time = fs->fs_time;
1216                 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1217                 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1218                 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1219                 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1220                 fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1221         }
1222         if (bigcgs) {
1223                 fs->fs_cgsize = fs->fs_save_cgsize;
1224                 fs->fs_save_cgsize = 0;
1225         }
1226 }
1227
1228 /*
1229  * unmount system call
1230  */
1231 static int
1232 ffs_unmount(mp, mntflags)
1233         struct mount *mp;
1234         int mntflags;
1235 {
1236         struct thread *td;
1237         struct ufsmount *ump = VFSTOUFS(mp);
1238         struct fs *fs;
1239         int error, flags, susp;
1240 #ifdef UFS_EXTATTR
1241         int e_restart;
1242 #endif
1243
1244         flags = 0;
1245         td = curthread;
1246         fs = ump->um_fs;
1247         susp = 0;
1248         if (mntflags & MNT_FORCE) {
1249                 flags |= FORCECLOSE;
1250                 susp = fs->fs_ronly == 0;
1251         }
1252 #ifdef UFS_EXTATTR
1253         if ((error = ufs_extattr_stop(mp, td))) {
1254                 if (error != EOPNOTSUPP)
1255                         printf("WARNING: unmount %s: ufs_extattr_stop "
1256                             "returned errno %d\n", mp->mnt_stat.f_mntonname,
1257                             error);
1258                 e_restart = 0;
1259         } else {
1260                 ufs_extattr_uepm_destroy(&ump->um_extattr);
1261                 e_restart = 1;
1262         }
1263 #endif
1264         if (susp) {
1265                 error = vfs_write_suspend_umnt(mp);
1266                 if (error != 0)
1267                         goto fail1;
1268         }
1269         if (MOUNTEDSOFTDEP(mp))
1270                 error = softdep_flushfiles(mp, flags, td);
1271         else
1272                 error = ffs_flushfiles(mp, flags, td);
1273         if (error != 0 && error != ENXIO)
1274                 goto fail;
1275
1276         UFS_LOCK(ump);
1277         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1278                 printf("WARNING: unmount %s: pending error: blocks %jd "
1279                     "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1280                     fs->fs_pendinginodes);
1281                 fs->fs_pendingblocks = 0;
1282                 fs->fs_pendinginodes = 0;
1283         }
1284         UFS_UNLOCK(ump);
1285         if (MOUNTEDSOFTDEP(mp))
1286                 softdep_unmount(mp);
1287         if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) {
1288                 fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1289                 error = ffs_sbupdate(ump, MNT_WAIT, 0);
1290                 if (error && error != ENXIO) {
1291                         fs->fs_clean = 0;
1292                         goto fail;
1293                 }
1294         }
1295         if (susp)
1296                 vfs_write_resume(mp, VR_START_WRITE);
1297         if (ump->um_trim_tq != NULL) {
1298                 while (ump->um_trim_inflight != 0)
1299                         pause("ufsutr", hz);
1300                 taskqueue_drain_all(ump->um_trim_tq);
1301                 taskqueue_free(ump->um_trim_tq);
1302         }
1303         g_topology_lock();
1304         if (ump->um_fsckpid > 0) {
1305                 /*
1306                  * Return to normal read-only mode.
1307                  */
1308                 error = g_access(ump->um_cp, 0, -1, 0);
1309                 ump->um_fsckpid = 0;
1310         }
1311         g_vfs_close(ump->um_cp);
1312         g_topology_unlock();
1313         atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0);
1314         vrele(ump->um_devvp);
1315         dev_rel(ump->um_dev);
1316         mtx_destroy(UFS_MTX(ump));
1317         if (mp->mnt_gjprovider != NULL) {
1318                 free(mp->mnt_gjprovider, M_UFSMNT);
1319                 mp->mnt_gjprovider = NULL;
1320         }
1321         free(fs->fs_csp, M_UFSMNT);
1322         free(fs, M_UFSMNT);
1323         free(ump, M_UFSMNT);
1324         mp->mnt_data = NULL;
1325         MNT_ILOCK(mp);
1326         mp->mnt_flag &= ~MNT_LOCAL;
1327         MNT_IUNLOCK(mp);
1328         if (td->td_su == mp) {
1329                 td->td_su = NULL;
1330                 vfs_rel(mp);
1331         }
1332         return (error);
1333
1334 fail:
1335         if (susp)
1336                 vfs_write_resume(mp, VR_START_WRITE);
1337 fail1:
1338 #ifdef UFS_EXTATTR
1339         if (e_restart) {
1340                 ufs_extattr_uepm_init(&ump->um_extattr);
1341 #ifdef UFS_EXTATTR_AUTOSTART
1342                 (void) ufs_extattr_autostart(mp, td);
1343 #endif
1344         }
1345 #endif
1346
1347         return (error);
1348 }
1349
1350 /*
1351  * Flush out all the files in a filesystem.
1352  */
1353 int
1354 ffs_flushfiles(mp, flags, td)
1355         struct mount *mp;
1356         int flags;
1357         struct thread *td;
1358 {
1359         struct ufsmount *ump;
1360         int qerror, error;
1361
1362         ump = VFSTOUFS(mp);
1363         qerror = 0;
1364 #ifdef QUOTA
1365         if (mp->mnt_flag & MNT_QUOTA) {
1366                 int i;
1367                 error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1368                 if (error)
1369                         return (error);
1370                 for (i = 0; i < MAXQUOTAS; i++) {
1371                         error = quotaoff(td, mp, i);
1372                         if (error != 0) {
1373                                 if ((flags & EARLYFLUSH) == 0)
1374                                         return (error);
1375                                 else
1376                                         qerror = error;
1377                         }
1378                 }
1379
1380                 /*
1381                  * Here we fall through to vflush again to ensure that
1382                  * we have gotten rid of all the system vnodes, unless
1383                  * quotas must not be closed.
1384                  */
1385         }
1386 #endif
1387         ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1388         if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1389                 if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1390                         return (error);
1391                 ffs_snapshot_unmount(mp);
1392                 flags |= FORCECLOSE;
1393                 /*
1394                  * Here we fall through to vflush again to ensure
1395                  * that we have gotten rid of all the system vnodes.
1396                  */
1397         }
1398
1399         /*
1400          * Do not close system files if quotas were not closed, to be
1401          * able to sync the remaining dquots.  The freeblks softupdate
1402          * workitems might hold a reference on a dquot, preventing
1403          * quotaoff() from completing.  Next round of
1404          * softdep_flushworklist() iteration should process the
1405          * blockers, allowing the next run of quotaoff() to finally
1406          * flush held dquots.
1407          *
1408          * Otherwise, flush all the files.
1409          */
1410         if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0)
1411                 return (error);
1412
1413         /*
1414          * Flush filesystem metadata.
1415          */
1416         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1417         error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1418         VOP_UNLOCK(ump->um_devvp, 0);
1419         return (error);
1420 }
1421
1422 /*
1423  * Get filesystem statistics.
1424  */
1425 static int
1426 ffs_statfs(mp, sbp)
1427         struct mount *mp;
1428         struct statfs *sbp;
1429 {
1430         struct ufsmount *ump;
1431         struct fs *fs;
1432
1433         ump = VFSTOUFS(mp);
1434         fs = ump->um_fs;
1435         if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1436                 panic("ffs_statfs");
1437         sbp->f_version = STATFS_VERSION;
1438         sbp->f_bsize = fs->fs_fsize;
1439         sbp->f_iosize = fs->fs_bsize;
1440         sbp->f_blocks = fs->fs_dsize;
1441         UFS_LOCK(ump);
1442         sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1443             fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1444         sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1445             dbtofsb(fs, fs->fs_pendingblocks);
1446         sbp->f_files =  fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO;
1447         sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1448         UFS_UNLOCK(ump);
1449         sbp->f_namemax = UFS_MAXNAMLEN;
1450         return (0);
1451 }
1452
1453 static bool
1454 sync_doupdate(struct inode *ip)
1455 {
1456
1457         return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
1458             IN_UPDATE)) != 0);
1459 }
1460
1461 /*
1462  * For a lazy sync, we only care about access times, quotas and the
1463  * superblock.  Other filesystem changes are already converted to
1464  * cylinder group blocks or inode blocks updates and are written to
1465  * disk by syncer.
1466  */
1467 static int
1468 ffs_sync_lazy(mp)
1469      struct mount *mp;
1470 {
1471         struct vnode *mvp, *vp;
1472         struct inode *ip;
1473         struct thread *td;
1474         int allerror, error;
1475
1476         allerror = 0;
1477         td = curthread;
1478         if ((mp->mnt_flag & MNT_NOATIME) != 0)
1479                 goto qupdate;
1480         MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) {
1481                 if (vp->v_type == VNON) {
1482                         VI_UNLOCK(vp);
1483                         continue;
1484                 }
1485                 ip = VTOI(vp);
1486
1487                 /*
1488                  * The IN_ACCESS flag is converted to IN_MODIFIED by
1489                  * ufs_close() and ufs_getattr() by the calls to
1490                  * ufs_itimes_locked(), without subsequent UFS_UPDATE().
1491                  * Test also all the other timestamp flags too, to pick up
1492                  * any other cases that could be missed.
1493                  */
1494                 if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) {
1495                         VI_UNLOCK(vp);
1496                         continue;
1497                 }
1498                 if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
1499                     td)) != 0)
1500                         continue;
1501                 if (sync_doupdate(ip))
1502                         error = ffs_update(vp, 0);
1503                 if (error != 0)
1504                         allerror = error;
1505                 vput(vp);
1506         }
1507
1508 qupdate:
1509 #ifdef QUOTA
1510         qsync(mp);
1511 #endif
1512
1513         if (VFSTOUFS(mp)->um_fs->fs_fmod != 0 &&
1514             (error = ffs_sbupdate(VFSTOUFS(mp), MNT_LAZY, 0)) != 0)
1515                 allerror = error;
1516         return (allerror);
1517 }
1518
1519 /*
1520  * Go through the disk queues to initiate sandbagged IO;
1521  * go through the inodes to write those that have been modified;
1522  * initiate the writing of the super block if it has been modified.
1523  *
1524  * Note: we are always called with the filesystem marked busy using
1525  * vfs_busy().
1526  */
1527 static int
1528 ffs_sync(mp, waitfor)
1529         struct mount *mp;
1530         int waitfor;
1531 {
1532         struct vnode *mvp, *vp, *devvp;
1533         struct thread *td;
1534         struct inode *ip;
1535         struct ufsmount *ump = VFSTOUFS(mp);
1536         struct fs *fs;
1537         int error, count, lockreq, allerror = 0;
1538         int suspend;
1539         int suspended;
1540         int secondary_writes;
1541         int secondary_accwrites;
1542         int softdep_deps;
1543         int softdep_accdeps;
1544         struct bufobj *bo;
1545
1546         suspend = 0;
1547         suspended = 0;
1548         td = curthread;
1549         fs = ump->um_fs;
1550         if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
1551                 panic("%s: ffs_sync: modification on read-only filesystem",
1552                     fs->fs_fsmnt);
1553         if (waitfor == MNT_LAZY) {
1554                 if (!rebooting)
1555                         return (ffs_sync_lazy(mp));
1556                 waitfor = MNT_NOWAIT;
1557         }
1558
1559         /*
1560          * Write back each (modified) inode.
1561          */
1562         lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1563         if (waitfor == MNT_SUSPEND) {
1564                 suspend = 1;
1565                 waitfor = MNT_WAIT;
1566         }
1567         if (waitfor == MNT_WAIT)
1568                 lockreq = LK_EXCLUSIVE;
1569         lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1570 loop:
1571         /* Grab snapshot of secondary write counts */
1572         MNT_ILOCK(mp);
1573         secondary_writes = mp->mnt_secondary_writes;
1574         secondary_accwrites = mp->mnt_secondary_accwrites;
1575         MNT_IUNLOCK(mp);
1576
1577         /* Grab snapshot of softdep dependency counts */
1578         softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1579
1580         MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1581                 /*
1582                  * Depend on the vnode interlock to keep things stable enough
1583                  * for a quick test.  Since there might be hundreds of
1584                  * thousands of vnodes, we cannot afford even a subroutine
1585                  * call unless there's a good chance that we have work to do.
1586                  */
1587                 if (vp->v_type == VNON) {
1588                         VI_UNLOCK(vp);
1589                         continue;
1590                 }
1591                 ip = VTOI(vp);
1592                 if ((ip->i_flag &
1593                     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1594                     vp->v_bufobj.bo_dirty.bv_cnt == 0) {
1595                         VI_UNLOCK(vp);
1596                         continue;
1597                 }
1598                 if ((error = vget(vp, lockreq, td)) != 0) {
1599                         if (error == ENOENT || error == ENOLCK) {
1600                                 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1601                                 goto loop;
1602                         }
1603                         continue;
1604                 }
1605                 if ((error = ffs_syncvnode(vp, waitfor, 0)) != 0)
1606                         allerror = error;
1607                 vput(vp);
1608         }
1609         /*
1610          * Force stale filesystem control information to be flushed.
1611          */
1612         if (waitfor == MNT_WAIT || rebooting) {
1613                 if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1614                         allerror = error;
1615                 /* Flushed work items may create new vnodes to clean */
1616                 if (allerror == 0 && count)
1617                         goto loop;
1618         }
1619 #ifdef QUOTA
1620         qsync(mp);
1621 #endif
1622
1623         devvp = ump->um_devvp;
1624         bo = &devvp->v_bufobj;
1625         BO_LOCK(bo);
1626         if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
1627                 BO_UNLOCK(bo);
1628                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1629                 error = VOP_FSYNC(devvp, waitfor, td);
1630                 VOP_UNLOCK(devvp, 0);
1631                 if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN))
1632                         error = ffs_sbupdate(ump, waitfor, 0);
1633                 if (error != 0)
1634                         allerror = error;
1635                 if (allerror == 0 && waitfor == MNT_WAIT)
1636                         goto loop;
1637         } else if (suspend != 0) {
1638                 if (softdep_check_suspend(mp,
1639                                           devvp,
1640                                           softdep_deps,
1641                                           softdep_accdeps,
1642                                           secondary_writes,
1643                                           secondary_accwrites) != 0) {
1644                         MNT_IUNLOCK(mp);
1645                         goto loop;      /* More work needed */
1646                 }
1647                 mtx_assert(MNT_MTX(mp), MA_OWNED);
1648                 mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1649                 MNT_IUNLOCK(mp);
1650                 suspended = 1;
1651         } else
1652                 BO_UNLOCK(bo);
1653         /*
1654          * Write back modified superblock.
1655          */
1656         if (fs->fs_fmod != 0 &&
1657             (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1658                 allerror = error;
1659         return (allerror);
1660 }
1661
1662 int
1663 ffs_vget(mp, ino, flags, vpp)
1664         struct mount *mp;
1665         ino_t ino;
1666         int flags;
1667         struct vnode **vpp;
1668 {
1669         return (ffs_vgetf(mp, ino, flags, vpp, 0));
1670 }
1671
1672 int
1673 ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1674         struct mount *mp;
1675         ino_t ino;
1676         int flags;
1677         struct vnode **vpp;
1678         int ffs_flags;
1679 {
1680         struct fs *fs;
1681         struct inode *ip;
1682         struct ufsmount *ump;
1683         struct buf *bp;
1684         struct vnode *vp;
1685         int error;
1686
1687         error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1688         if (error || *vpp != NULL)
1689                 return (error);
1690
1691         /*
1692          * We must promote to an exclusive lock for vnode creation.  This
1693          * can happen if lookup is passed LOCKSHARED.
1694          */
1695         if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1696                 flags &= ~LK_TYPE_MASK;
1697                 flags |= LK_EXCLUSIVE;
1698         }
1699
1700         /*
1701          * We do not lock vnode creation as it is believed to be too
1702          * expensive for such rare case as simultaneous creation of vnode
1703          * for same ino by different processes. We just allow them to race
1704          * and check later to decide who wins. Let the race begin!
1705          */
1706
1707         ump = VFSTOUFS(mp);
1708         fs = ump->um_fs;
1709         ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1710
1711         /* Allocate a new vnode/inode. */
1712         error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ?
1713             &ffs_vnodeops1 : &ffs_vnodeops2, &vp);
1714         if (error) {
1715                 *vpp = NULL;
1716                 uma_zfree(uma_inode, ip);
1717                 return (error);
1718         }
1719         /*
1720          * FFS supports recursive locking.
1721          */
1722         lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1723         VN_LOCK_AREC(vp);
1724         vp->v_data = ip;
1725         vp->v_bufobj.bo_bsize = fs->fs_bsize;
1726         ip->i_vnode = vp;
1727         ip->i_ump = ump;
1728         ip->i_number = ino;
1729         ip->i_ea_refs = 0;
1730         ip->i_nextclustercg = -1;
1731         ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2;
1732 #ifdef QUOTA
1733         {
1734                 int i;
1735                 for (i = 0; i < MAXQUOTAS; i++)
1736                         ip->i_dquot[i] = NODQUOT;
1737         }
1738 #endif
1739
1740         if (ffs_flags & FFSV_FORCEINSMQ)
1741                 vp->v_vflag |= VV_FORCEINSMQ;
1742         error = insmntque(vp, mp);
1743         if (error != 0) {
1744                 uma_zfree(uma_inode, ip);
1745                 *vpp = NULL;
1746                 return (error);
1747         }
1748         vp->v_vflag &= ~VV_FORCEINSMQ;
1749         error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1750         if (error || *vpp != NULL)
1751                 return (error);
1752
1753         /* Read in the disk contents for the inode, copy into the inode. */
1754         error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1755             (int)fs->fs_bsize, NOCRED, &bp);
1756         if (error) {
1757                 /*
1758                  * The inode does not contain anything useful, so it would
1759                  * be misleading to leave it on its hash chain. With mode
1760                  * still zero, it will be unlinked and returned to the free
1761                  * list by vput().
1762                  */
1763                 brelse(bp);
1764                 vput(vp);
1765                 *vpp = NULL;
1766                 return (error);
1767         }
1768         if (I_IS_UFS1(ip))
1769                 ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1770         else
1771                 ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1772         ffs_load_inode(bp, ip, fs, ino);
1773         if (DOINGSOFTDEP(vp))
1774                 softdep_load_inodeblock(ip);
1775         else
1776                 ip->i_effnlink = ip->i_nlink;
1777         bqrelse(bp);
1778
1779         /*
1780          * Initialize the vnode from the inode, check for aliases.
1781          * Note that the underlying vnode may have changed.
1782          */
1783         error = ufs_vinit(mp, I_IS_UFS1(ip) ? &ffs_fifoops1 : &ffs_fifoops2,
1784             &vp);
1785         if (error) {
1786                 vput(vp);
1787                 *vpp = NULL;
1788                 return (error);
1789         }
1790
1791         /*
1792          * Finish inode initialization.
1793          */
1794         if (vp->v_type != VFIFO) {
1795                 /* FFS supports shared locking for all files except fifos. */
1796                 VN_LOCK_ASHARE(vp);
1797         }
1798
1799         /*
1800          * Set up a generation number for this inode if it does not
1801          * already have one. This should only happen on old filesystems.
1802          */
1803         if (ip->i_gen == 0) {
1804                 while (ip->i_gen == 0)
1805                         ip->i_gen = arc4random();
1806                 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1807                         ip->i_flag |= IN_MODIFIED;
1808                         DIP_SET(ip, i_gen, ip->i_gen);
1809                 }
1810         }
1811 #ifdef MAC
1812         if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1813                 /*
1814                  * If this vnode is already allocated, and we're running
1815                  * multi-label, attempt to perform a label association
1816                  * from the extended attributes on the inode.
1817                  */
1818                 error = mac_vnode_associate_extattr(mp, vp);
1819                 if (error) {
1820                         /* ufs_inactive will release ip->i_devvp ref. */
1821                         vput(vp);
1822                         *vpp = NULL;
1823                         return (error);
1824                 }
1825         }
1826 #endif
1827
1828         *vpp = vp;
1829         return (0);
1830 }
1831
1832 /*
1833  * File handle to vnode
1834  *
1835  * Have to be really careful about stale file handles:
1836  * - check that the inode number is valid
1837  * - for UFS2 check that the inode number is initialized
1838  * - call ffs_vget() to get the locked inode
1839  * - check for an unallocated inode (i_mode == 0)
1840  * - check that the given client host has export rights and return
1841  *   those rights via. exflagsp and credanonp
1842  */
1843 static int
1844 ffs_fhtovp(mp, fhp, flags, vpp)
1845         struct mount *mp;
1846         struct fid *fhp;
1847         int flags;
1848         struct vnode **vpp;
1849 {
1850         struct ufid *ufhp;
1851         struct ufsmount *ump;
1852         struct fs *fs;
1853         struct cg *cgp;
1854         struct buf *bp;
1855         ino_t ino;
1856         u_int cg;
1857         int error;
1858
1859         ufhp = (struct ufid *)fhp;
1860         ino = ufhp->ufid_ino;
1861         ump = VFSTOUFS(mp);
1862         fs = ump->um_fs;
1863         if (ino < UFS_ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg)
1864                 return (ESTALE);
1865         /*
1866          * Need to check if inode is initialized because UFS2 does lazy
1867          * initialization and nfs_fhtovp can offer arbitrary inode numbers.
1868          */
1869         if (fs->fs_magic != FS_UFS2_MAGIC)
1870                 return (ufs_fhtovp(mp, ufhp, flags, vpp));
1871         cg = ino_to_cg(fs, ino);
1872         if ((error = ffs_getcg(fs, ump->um_devvp, cg, &bp, &cgp)) != 0)
1873                 return (error);
1874         if (ino >= cg * fs->fs_ipg + cgp->cg_initediblk) {
1875                 brelse(bp);
1876                 return (ESTALE);
1877         }
1878         brelse(bp);
1879         return (ufs_fhtovp(mp, ufhp, flags, vpp));
1880 }
1881
1882 /*
1883  * Initialize the filesystem.
1884  */
1885 static int
1886 ffs_init(vfsp)
1887         struct vfsconf *vfsp;
1888 {
1889
1890         ffs_susp_initialize();
1891         softdep_initialize();
1892         return (ufs_init(vfsp));
1893 }
1894
1895 /*
1896  * Undo the work of ffs_init().
1897  */
1898 static int
1899 ffs_uninit(vfsp)
1900         struct vfsconf *vfsp;
1901 {
1902         int ret;
1903
1904         ret = ufs_uninit(vfsp);
1905         softdep_uninitialize();
1906         ffs_susp_uninitialize();
1907         return (ret);
1908 }
1909
1910 /*
1911  * Write a superblock and associated information back to disk.
1912  */
1913 int
1914 ffs_sbupdate(ump, waitfor, suspended)
1915         struct ufsmount *ump;
1916         int waitfor;
1917         int suspended;
1918 {
1919         struct fs *fs = ump->um_fs;
1920         struct buf *sbbp;
1921         struct buf *bp;
1922         int blks;
1923         void *space;
1924         int i, size, error, allerror = 0;
1925
1926         if (fs->fs_ronly == 1 &&
1927             (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1928             (MNT_RDONLY | MNT_UPDATE) && ump->um_fsckpid == 0)
1929                 panic("ffs_sbupdate: write read-only filesystem");
1930         /*
1931          * We use the superblock's buf to serialize calls to ffs_sbupdate().
1932          */
1933         sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
1934             (int)fs->fs_sbsize, 0, 0, 0);
1935         /*
1936          * First write back the summary information.
1937          */
1938         blks = howmany(fs->fs_cssize, fs->fs_fsize);
1939         space = fs->fs_csp;
1940         for (i = 0; i < blks; i += fs->fs_frag) {
1941                 size = fs->fs_bsize;
1942                 if (i + fs->fs_frag > blks)
1943                         size = (blks - i) * fs->fs_fsize;
1944                 bp = getblk(ump->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1945                     size, 0, 0, 0);
1946                 bcopy(space, bp->b_data, (u_int)size);
1947                 space = (char *)space + size;
1948                 if (suspended)
1949                         bp->b_flags |= B_VALIDSUSPWRT;
1950                 if (waitfor != MNT_WAIT)
1951                         bawrite(bp);
1952                 else if ((error = bwrite(bp)) != 0)
1953                         allerror = error;
1954         }
1955         /*
1956          * Now write back the superblock itself. If any errors occurred
1957          * up to this point, then fail so that the superblock avoids
1958          * being written out as clean.
1959          */
1960         if (allerror) {
1961                 brelse(sbbp);
1962                 return (allerror);
1963         }
1964         bp = sbbp;
1965         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1966             (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1967                 printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
1968                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1969                 fs->fs_sblockloc = SBLOCK_UFS1;
1970         }
1971         if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1972             (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1973                 printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
1974                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1975                 fs->fs_sblockloc = SBLOCK_UFS2;
1976         }
1977         fs->fs_fmod = 0;
1978         fs->fs_time = time_second;
1979         if (MOUNTEDSOFTDEP(ump->um_mountp))
1980                 softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp);
1981         bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1982         ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
1983         if (suspended)
1984                 bp->b_flags |= B_VALIDSUSPWRT;
1985         if (waitfor != MNT_WAIT)
1986                 bawrite(bp);
1987         else if ((error = bwrite(bp)) != 0)
1988                 allerror = error;
1989         return (allerror);
1990 }
1991
1992 static int
1993 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1994         int attrnamespace, const char *attrname)
1995 {
1996
1997 #ifdef UFS_EXTATTR
1998         return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1999             attrname));
2000 #else
2001         return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
2002             attrname));
2003 #endif
2004 }
2005
2006 static void
2007 ffs_ifree(struct ufsmount *ump, struct inode *ip)
2008 {
2009
2010         if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
2011                 uma_zfree(uma_ufs1, ip->i_din1);
2012         else if (ip->i_din2 != NULL)
2013                 uma_zfree(uma_ufs2, ip->i_din2);
2014         uma_zfree(uma_inode, ip);
2015 }
2016
2017 static int dobkgrdwrite = 1;
2018 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
2019     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
2020
2021 /*
2022  * Complete a background write started from bwrite.
2023  */
2024 static void
2025 ffs_backgroundwritedone(struct buf *bp)
2026 {
2027         struct bufobj *bufobj;
2028         struct buf *origbp;
2029
2030         /*
2031          * Find the original buffer that we are writing.
2032          */
2033         bufobj = bp->b_bufobj;
2034         BO_LOCK(bufobj);
2035         if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
2036                 panic("backgroundwritedone: lost buffer");
2037
2038         /*
2039          * We should mark the cylinder group buffer origbp as
2040          * dirty, to not loose the failed write.
2041          */
2042         if ((bp->b_ioflags & BIO_ERROR) != 0)
2043                 origbp->b_vflags |= BV_BKGRDERR;
2044         BO_UNLOCK(bufobj);
2045         /*
2046          * Process dependencies then return any unfinished ones.
2047          */
2048         pbrelvp(bp);
2049         if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0)
2050                 buf_complete(bp);
2051 #ifdef SOFTUPDATES
2052         if (!LIST_EMPTY(&bp->b_dep))
2053                 softdep_move_dependencies(bp, origbp);
2054 #endif
2055         /*
2056          * This buffer is marked B_NOCACHE so when it is released
2057          * by biodone it will be tossed.
2058          */
2059         bp->b_flags |= B_NOCACHE;
2060         bp->b_flags &= ~B_CACHE;
2061
2062         /*
2063          * Prevent brelse() from trying to keep and re-dirtying bp on
2064          * errors. It causes b_bufobj dereference in
2065          * bdirty()/reassignbuf(), and b_bufobj was cleared in
2066          * pbrelvp() above.
2067          */
2068         if ((bp->b_ioflags & BIO_ERROR) != 0)
2069                 bp->b_flags |= B_INVAL;
2070         bufdone(bp);
2071         BO_LOCK(bufobj);
2072         /*
2073          * Clear the BV_BKGRDINPROG flag in the original buffer
2074          * and awaken it if it is waiting for the write to complete.
2075          * If BV_BKGRDINPROG is not set in the original buffer it must
2076          * have been released and re-instantiated - which is not legal.
2077          */
2078         KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
2079             ("backgroundwritedone: lost buffer2"));
2080         origbp->b_vflags &= ~BV_BKGRDINPROG;
2081         if (origbp->b_vflags & BV_BKGRDWAIT) {
2082                 origbp->b_vflags &= ~BV_BKGRDWAIT;
2083                 wakeup(&origbp->b_xflags);
2084         }
2085         BO_UNLOCK(bufobj);
2086 }
2087
2088
2089 /*
2090  * Write, release buffer on completion.  (Done by iodone
2091  * if async).  Do not bother writing anything if the buffer
2092  * is invalid.
2093  *
2094  * Note that we set B_CACHE here, indicating that buffer is
2095  * fully valid and thus cacheable.  This is true even of NFS
2096  * now so we set it generally.  This could be set either here
2097  * or in biodone() since the I/O is synchronous.  We put it
2098  * here.
2099  */
2100 static int
2101 ffs_bufwrite(struct buf *bp)
2102 {
2103         struct buf *newbp;
2104
2105         CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
2106         if (bp->b_flags & B_INVAL) {
2107                 brelse(bp);
2108                 return (0);
2109         }
2110
2111         if (!BUF_ISLOCKED(bp))
2112                 panic("bufwrite: buffer is not busy???");
2113         /*
2114          * If a background write is already in progress, delay
2115          * writing this block if it is asynchronous. Otherwise
2116          * wait for the background write to complete.
2117          */
2118         BO_LOCK(bp->b_bufobj);
2119         if (bp->b_vflags & BV_BKGRDINPROG) {
2120                 if (bp->b_flags & B_ASYNC) {
2121                         BO_UNLOCK(bp->b_bufobj);
2122                         bdwrite(bp);
2123                         return (0);
2124                 }
2125                 bp->b_vflags |= BV_BKGRDWAIT;
2126                 msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO,
2127                     "bwrbg", 0);
2128                 if (bp->b_vflags & BV_BKGRDINPROG)
2129                         panic("bufwrite: still writing");
2130         }
2131         bp->b_vflags &= ~BV_BKGRDERR;
2132         BO_UNLOCK(bp->b_bufobj);
2133
2134         /*
2135          * If this buffer is marked for background writing and we
2136          * do not have to wait for it, make a copy and write the
2137          * copy so as to leave this buffer ready for further use.
2138          *
2139          * This optimization eats a lot of memory.  If we have a page
2140          * or buffer shortfall we can't do it.
2141          */
2142         if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
2143             (bp->b_flags & B_ASYNC) &&
2144             !vm_page_count_severe() &&
2145             !buf_dirty_count_severe()) {
2146                 KASSERT(bp->b_iodone == NULL,
2147                     ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
2148
2149                 /* get a new block */
2150                 newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
2151                 if (newbp == NULL)
2152                         goto normal_write;
2153
2154                 KASSERT(buf_mapped(bp), ("Unmapped cg"));
2155                 memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
2156                 BO_LOCK(bp->b_bufobj);
2157                 bp->b_vflags |= BV_BKGRDINPROG;
2158                 BO_UNLOCK(bp->b_bufobj);
2159                 newbp->b_xflags |=
2160                     (bp->b_xflags & BX_FSPRIV) | BX_BKGRDMARKER;
2161                 newbp->b_lblkno = bp->b_lblkno;
2162                 newbp->b_blkno = bp->b_blkno;
2163                 newbp->b_offset = bp->b_offset;
2164                 newbp->b_iodone = ffs_backgroundwritedone;
2165                 newbp->b_flags |= B_ASYNC;
2166                 newbp->b_flags &= ~B_INVAL;
2167                 pbgetvp(bp->b_vp, newbp);
2168
2169 #ifdef SOFTUPDATES
2170                 /*
2171                  * Move over the dependencies.  If there are rollbacks,
2172                  * leave the parent buffer dirtied as it will need to
2173                  * be written again.
2174                  */
2175                 if (LIST_EMPTY(&bp->b_dep) ||
2176                     softdep_move_dependencies(bp, newbp) == 0)
2177                         bundirty(bp);
2178 #else
2179                 bundirty(bp);
2180 #endif
2181
2182                 /*
2183                  * Initiate write on the copy, release the original.  The
2184                  * BKGRDINPROG flag prevents it from going away until
2185                  * the background write completes.
2186                  */
2187                 bqrelse(bp);
2188                 bp = newbp;
2189         } else
2190                 /* Mark the buffer clean */
2191                 bundirty(bp);
2192
2193
2194         /* Let the normal bufwrite do the rest for us */
2195 normal_write:
2196         return (bufwrite(bp));
2197 }
2198
2199
2200 static void
2201 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
2202 {
2203         struct vnode *vp;
2204         struct buf *tbp;
2205         int error, nocopy;
2206
2207         vp = bo2vnode(bo);
2208         if (bp->b_iocmd == BIO_WRITE) {
2209                 if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
2210                     bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
2211                     (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
2212                         panic("ffs_geom_strategy: bad I/O");
2213                 nocopy = bp->b_flags & B_NOCOPY;
2214                 bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
2215                 if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
2216                     vp->v_rdev->si_snapdata != NULL) {
2217                         if ((bp->b_flags & B_CLUSTER) != 0) {
2218                                 runningbufwakeup(bp);
2219                                 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
2220                                               b_cluster.cluster_entry) {
2221                                         error = ffs_copyonwrite(vp, tbp);
2222                                         if (error != 0 &&
2223                                             error != EOPNOTSUPP) {
2224                                                 bp->b_error = error;
2225                                                 bp->b_ioflags |= BIO_ERROR;
2226                                                 bufdone(bp);
2227                                                 return;
2228                                         }
2229                                 }
2230                                 bp->b_runningbufspace = bp->b_bufsize;
2231                                 atomic_add_long(&runningbufspace,
2232                                                bp->b_runningbufspace);
2233                         } else {
2234                                 error = ffs_copyonwrite(vp, bp);
2235                                 if (error != 0 && error != EOPNOTSUPP) {
2236                                         bp->b_error = error;
2237                                         bp->b_ioflags |= BIO_ERROR;
2238                                         bufdone(bp);
2239                                         return;
2240                                 }
2241                         }
2242                 }
2243 #ifdef SOFTUPDATES
2244                 if ((bp->b_flags & B_CLUSTER) != 0) {
2245                         TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
2246                                       b_cluster.cluster_entry) {
2247                                 if (!LIST_EMPTY(&tbp->b_dep))
2248                                         buf_start(tbp);
2249                         }
2250                 } else {
2251                         if (!LIST_EMPTY(&bp->b_dep))
2252                                 buf_start(bp);
2253                 }
2254
2255 #endif
2256                 /*
2257                  * Check for metadata that needs check-hashes and update them.
2258                  */
2259                 switch (bp->b_xflags & BX_FSPRIV) {
2260                 case BX_CYLGRP:
2261                         ((struct cg *)bp->b_data)->cg_ckhash = 0;
2262                         ((struct cg *)bp->b_data)->cg_ckhash =
2263                             calculate_crc32c(~0L, bp->b_data, bp->b_bcount);
2264                         break;
2265
2266                 case BX_SUPERBLOCK:
2267                 case BX_INODE:
2268                 case BX_INDIR:
2269                 case BX_DIR:
2270                         printf("Check-hash write is unimplemented!!!\n");
2271                         break;
2272
2273                 case 0:
2274                         break;
2275
2276                 default:
2277                         printf("multiple buffer types 0x%b\n",
2278                             (u_int)(bp->b_xflags & BX_FSPRIV),
2279                             PRINT_UFS_BUF_XFLAGS);
2280                         break;
2281                 }
2282         }
2283         g_vfs_strategy(bo, bp);
2284 }
2285
2286 int
2287 ffs_own_mount(const struct mount *mp)
2288 {
2289
2290         if (mp->mnt_op == &ufs_vfsops)
2291                 return (1);
2292         return (0);
2293 }
2294
2295 #ifdef  DDB
2296 #ifdef SOFTUPDATES
2297
2298 /* defined in ffs_softdep.c */
2299 extern void db_print_ffs(struct ufsmount *ump);
2300
2301 DB_SHOW_COMMAND(ffs, db_show_ffs)
2302 {
2303         struct mount *mp;
2304         struct ufsmount *ump;
2305
2306         if (have_addr) {
2307                 ump = VFSTOUFS((struct mount *)addr);
2308                 db_print_ffs(ump);
2309                 return;
2310         }
2311
2312         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2313                 if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
2314                         db_print_ffs(VFSTOUFS(mp));
2315         }
2316 }
2317
2318 #endif  /* SOFTUPDATES */
2319 #endif  /* DDB */