sys/kern/vfs_default.c

   1 /*-
   2  * Copyright (c) 1989, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * This code is derived from software contributed
   6  * to Berkeley by John Heidemann of the UCLA Ficus project.
   7  *
   8  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 4. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34
  35 #include <sys/cdefs.h>
  36 __FBSDID("$FreeBSD$");
  37
  38 #include <sys/param.h>
  39 #include <sys/systm.h>
  40 #include <sys/bio.h>
  41 #include <sys/buf.h>
  42 #include <sys/conf.h>
  43 #include <sys/kernel.h>
  44 #include <sys/limits.h>
  45 #include <sys/lock.h>
  46 #include <sys/malloc.h>
  47 #include <sys/mount.h>
  48 #include <sys/mutex.h>
  49 #include <sys/unistd.h>
  50 #include <sys/vnode.h>
  51 #include <sys/poll.h>
  52
  53 #include <vm/vm.h>
  54 #include <vm/vm_object.h>
  55 #include <vm/vm_extern.h>
  56 #include <vm/pmap.h>
  57 #include <vm/vm_map.h>
  58 #include <vm/vm_page.h>
  59 #include <vm/vm_pager.h>
  60 #include <vm/vnode_pager.h>
  61
  62 static int      vop_nolookup(struct vop_lookup_args *);
  63 static int      vop_nostrategy(struct vop_strategy_args *);
  64
  65 /*
  66  * This vnode table stores what we want to do if the filesystem doesn't
  67  * implement a particular VOP.
  68  *
  69  * If there is no specific entry here, we will return EOPNOTSUPP.
  70  *
  71  */
  72
  73 struct vop_vector default_vnodeops = {
  74         .vop_default =          NULL,
  75         .vop_bypass =           VOP_EOPNOTSUPP,
  76
  77         .vop_advlock =          VOP_EINVAL,
  78         .vop_bmap =             vop_stdbmap,
  79         .vop_close =            VOP_NULL,
  80         .vop_fsync =            VOP_NULL,
  81         .vop_getpages =         vop_stdgetpages,
  82         .vop_getwritemount =    vop_stdgetwritemount,
  83         .vop_inactive =         VOP_NULL,
  84         .vop_ioctl =            VOP_ENOTTY,
  85         .vop_islocked =         vop_stdislocked,
  86         .vop_lease =            VOP_NULL,
  87         .vop_lock =             vop_stdlock,
  88         .vop_lookup =           vop_nolookup,
  89         .vop_open =             VOP_NULL,
  90         .vop_pathconf =         VOP_EINVAL,
  91         .vop_poll =             vop_nopoll,
  92         .vop_putpages =         vop_stdputpages,
  93         .vop_readlink =         VOP_EINVAL,
  94         .vop_revoke =           VOP_PANIC,
  95         .vop_strategy =         vop_nostrategy,
  96         .vop_unlock =           vop_stdunlock,
  97 };
  98
  99 /*
 100  * Series of placeholder functions for various error returns for
 101  * VOPs.
 102  */
 103
 104 int
 105 vop_eopnotsupp(struct vop_generic_args *ap)
 106 {
 107         /*
 108         printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
 109         */
 110
 111         return (EOPNOTSUPP);
 112 }
 113
 114 int
 115 vop_ebadf(struct vop_generic_args *ap)
 116 {
 117
 118         return (EBADF);
 119 }
 120
 121 int
 122 vop_enotty(struct vop_generic_args *ap)
 123 {
 124
 125         return (ENOTTY);
 126 }
 127
 128 int
 129 vop_einval(struct vop_generic_args *ap)
 130 {
 131
 132         return (EINVAL);
 133 }
 134
 135 int
 136 vop_null(struct vop_generic_args *ap)
 137 {
 138
 139         return (0);
 140 }
 141
 142 /*
 143  * Helper function to panic on some bad VOPs in some filesystems.
 144  */
 145 int
 146 vop_panic(struct vop_generic_args *ap)
 147 {
 148
 149         panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
 150 }
 151
 152 /*
 153  * vop_std<something> and vop_no<something> are default functions for use by
 154  * filesystems that need the "default reasonable" implementation for a
 155  * particular operation.
 156  *
 157  * The documentation for the operations they implement exists (if it exists)
 158  * in the VOP_<SOMETHING>(9) manpage (all uppercase).
 159  */
 160
 161 /*
 162  * Default vop for filesystems that do not support name lookup
 163  */
 164 static int
 165 vop_nolookup(ap)
 166         struct vop_lookup_args /* {
 167                 struct vnode *a_dvp;
 168                 struct vnode **a_vpp;
 169                 struct componentname *a_cnp;
 170         } */ *ap;
 171 {
 172
 173         *ap->a_vpp = NULL;
 174         return (ENOTDIR);
 175 }
 176
 177 /*
 178  *      vop_nostrategy:
 179  *
 180  *      Strategy routine for VFS devices that have none.
 181  *
 182  *      BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
 183  *      routine.  Typically this is done for a BIO_READ strategy call.
 184  *      Typically B_INVAL is assumed to already be clear prior to a write
 185  *      and should not be cleared manually unless you just made the buffer
 186  *      invalid.  BIO_ERROR should be cleared either way.
 187  */
 188
 189 static int
 190 vop_nostrategy (struct vop_strategy_args *ap)
 191 {
 192         printf("No strategy for buffer at %p\n", ap->a_bp);
 193         vprint("vnode", ap->a_vp);
 194         ap->a_bp->b_ioflags |= BIO_ERROR;
 195         ap->a_bp->b_error = EOPNOTSUPP;
 196         bufdone(ap->a_bp);
 197         return (EOPNOTSUPP);
 198 }
 199
 200 /*
 201  * vop_stdpathconf:
 202  *
 203  * Standard implementation of POSIX pathconf, to get information about limits
 204  * for a filesystem.
 205  * Override per filesystem for the case where the filesystem has smaller
 206  * limits.
 207  */
 208 int
 209 vop_stdpathconf(ap)
 210         struct vop_pathconf_args /* {
 211         struct vnode *a_vp;
 212         int a_name;
 213         int *a_retval;
 214         } */ *ap;
 215 {
 216
 217         switch (ap->a_name) {
 218                 case _PC_LINK_MAX:
 219                         *ap->a_retval = LINK_MAX;
 220                         return (0);
 221                 case _PC_MAX_CANON:
 222                         *ap->a_retval = MAX_CANON;
 223                         return (0);
 224                 case _PC_MAX_INPUT:
 225                         *ap->a_retval = MAX_INPUT;
 226                         return (0);
 227                 case _PC_PIPE_BUF:
 228                         *ap->a_retval = PIPE_BUF;
 229                         return (0);
 230                 case _PC_CHOWN_RESTRICTED:
 231                         *ap->a_retval = 1;
 232                         return (0);
 233                 case _PC_VDISABLE:
 234                         *ap->a_retval = _POSIX_VDISABLE;
 235                         return (0);
 236                 default:
 237                         return (EINVAL);
 238         }
 239         /* NOTREACHED */
 240 }
 241
 242 /*
 243  * Standard lock, unlock and islocked functions.
 244  */
 245 int
 246 vop_stdlock(ap)
 247         struct vop_lock_args /* {
 248                 struct vnode *a_vp;
 249                 int a_flags;
 250                 struct thread *a_td;
 251         } */ *ap;
 252 {
 253         struct vnode *vp = ap->a_vp;
 254
 255 #ifndef DEBUG_LOCKS
 256         return (lockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp), ap->a_td));
 257 #else
 258         return (debuglockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
 259             ap->a_td, "vop_stdlock", vp->filename, vp->line));
 260 #endif
 261 }
 262
 263 /* See above. */
 264 int
 265 vop_stdunlock(ap)
 266         struct vop_unlock_args /* {
 267                 struct vnode *a_vp;
 268                 int a_flags;
 269                 struct thread *a_td;
 270         } */ *ap;
 271 {
 272         struct vnode *vp = ap->a_vp;
 273
 274         return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp),
 275             ap->a_td));
 276 }
 277
 278 /* See above. */
 279 int
 280 vop_stdislocked(ap)
 281         struct vop_islocked_args /* {
 282                 struct vnode *a_vp;
 283                 struct thread *a_td;
 284         } */ *ap;
 285 {
 286
 287         return (lockstatus(ap->a_vp->v_vnlock, ap->a_td));
 288 }
 289
 290 /*
 291  * Return true for select/poll.
 292  */
 293 int
 294 vop_nopoll(ap)
 295         struct vop_poll_args /* {
 296                 struct vnode *a_vp;
 297                 int  a_events;
 298                 struct ucred *a_cred;
 299                 struct thread *a_td;
 300         } */ *ap;
 301 {
 302         /*
 303          * Return true for read/write.  If the user asked for something
 304          * special, return POLLNVAL, so that clients have a way of
 305          * determining reliably whether or not the extended
 306          * functionality is present without hard-coding knowledge
 307          * of specific filesystem implementations.
 308          * Stay in sync with kern_conf.c::no_poll().
 309          */
 310         if (ap->a_events & ~POLLSTANDARD)
 311                 return (POLLNVAL);
 312
 313         return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
 314 }
 315
 316 /*
 317  * Implement poll for local filesystems that support it.
 318  */
 319 int
 320 vop_stdpoll(ap)
 321         struct vop_poll_args /* {
 322                 struct vnode *a_vp;
 323                 int  a_events;
 324                 struct ucred *a_cred;
 325                 struct thread *a_td;
 326         } */ *ap;
 327 {
 328         if (ap->a_events & ~POLLSTANDARD)
 329                 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
 330         return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
 331 }
 332
 333 /*
 334  * Return our mount point, as we will take charge of the writes.
 335  */
 336 int
 337 vop_stdgetwritemount(ap)
 338         struct vop_getwritemount_args /* {
 339                 struct vnode *a_vp;
 340                 struct mount **a_mpp;
 341         } */ *ap;
 342 {
 343
 344         *(ap->a_mpp) = ap->a_vp->v_mount;
 345         return (0);
 346 }
 347
 348 /* XXX Needs good comment and VOP_BMAP(9) manpage */
 349 int
 350 vop_stdbmap(ap)
 351         struct vop_bmap_args /* {
 352                 struct vnode *a_vp;
 353                 daddr_t  a_bn;
 354                 struct bufobj **a_bop;
 355                 daddr_t *a_bnp;
 356                 int *a_runp;
 357                 int *a_runb;
 358         } */ *ap;
 359 {
 360
 361         if (ap->a_bop != NULL)
 362                 *ap->a_bop = &ap->a_vp->v_bufobj;
 363         if (ap->a_bnp != NULL)
 364                 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
 365         if (ap->a_runp != NULL)
 366                 *ap->a_runp = 0;
 367         if (ap->a_runb != NULL)
 368                 *ap->a_runb = 0;
 369         return (0);
 370 }
 371
 372 int
 373 vop_stdfsync(ap)
 374         struct vop_fsync_args /* {
 375                 struct vnode *a_vp;
 376                 struct ucred *a_cred;
 377                 int a_waitfor;
 378                 struct thread *a_td;
 379         } */ *ap;
 380 {
 381         struct vnode *vp = ap->a_vp;
 382         struct buf *bp;
 383         struct bufobj *bo;
 384         struct buf *nbp;
 385         int error = 0;
 386         int maxretry = 1000;     /* large, arbitrarily chosen */
 387
 388         VI_LOCK(vp);
 389 loop1:
 390         /*
 391          * MARK/SCAN initialization to avoid infinite loops.
 392          */
 393         TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) {
 394                 bp->b_vflags &= ~BV_SCANNED;
 395                 bp->b_error = 0;
 396         }
 397
 398         /*
 399          * Flush all dirty buffers associated with a vnode.
 400          */
 401 loop2:
 402         TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
 403                 if ((bp->b_vflags & BV_SCANNED) != 0)
 404                         continue;
 405                 bp->b_vflags |= BV_SCANNED;
 406                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
 407                         continue;
 408                 VI_UNLOCK(vp);
 409                 if ((bp->b_flags & B_DELWRI) == 0)
 410                         panic("fsync: not dirty");
 411                 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
 412                         vfs_bio_awrite(bp);
 413                 } else {
 414                         bremfree(bp);
 415                         bawrite(bp);
 416                 }
 417                 VI_LOCK(vp);
 418                 goto loop2;
 419         }
 420
 421         /*
 422          * If synchronous the caller expects us to completely resolve all
 423          * dirty buffers in the system.  Wait for in-progress I/O to
 424          * complete (which could include background bitmap writes), then
 425          * retry if dirty blocks still exist.
 426          */
 427         if (ap->a_waitfor == MNT_WAIT) {
 428                 bo = &vp->v_bufobj;
 429                 bufobj_wwait(bo, 0, 0);
 430                 if (bo->bo_dirty.bv_cnt > 0) {
 431                         /*
 432                          * If we are unable to write any of these buffers
 433                          * then we fail now rather than trying endlessly
 434                          * to write them out.
 435                          */
 436                         TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
 437                                 if ((error = bp->b_error) == 0)
 438                                         continue;
 439                         if (error == 0 && --maxretry >= 0)
 440                                 goto loop1;
 441                         error = EAGAIN;
 442                 }
 443         }
 444         VI_UNLOCK(vp);
 445         if (error == EAGAIN)
 446                 vprint("fsync: giving up on dirty", vp);
 447
 448         return (error);
 449 }
 450
 451 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
 452 int
 453 vop_stdgetpages(ap)
 454         struct vop_getpages_args /* {
 455                 struct vnode *a_vp;
 456                 vm_page_t *a_m;
 457                 int a_count;
 458                 int a_reqpage;
 459                 vm_ooffset_t a_offset;
 460         } */ *ap;
 461 {
 462
 463         return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
 464             ap->a_count, ap->a_reqpage);
 465 }
 466
 467 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
 468 int
 469 vop_stdputpages(ap)
 470         struct vop_putpages_args /* {
 471                 struct vnode *a_vp;
 472                 vm_page_t *a_m;
 473                 int a_count;
 474                 int a_sync;
 475                 int *a_rtvals;
 476                 vm_ooffset_t a_offset;
 477         } */ *ap;
 478 {
 479
 480         return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 481              ap->a_sync, ap->a_rtvals);
 482 }
 483
 484 /*
 485  * vfs default ops
 486  * used to fill the vfs function table to get reasonable default return values.
 487  */
 488 int
 489 vfs_stdroot (mp, flags, vpp, td)
 490         struct mount *mp;
 491         int flags;
 492         struct vnode **vpp;
 493         struct thread *td;
 494 {
 495
 496         return (EOPNOTSUPP);
 497 }
 498
 499 int
 500 vfs_stdstatfs (mp, sbp, td)
 501         struct mount *mp;
 502         struct statfs *sbp;
 503         struct thread *td;
 504 {
 505
 506         return (EOPNOTSUPP);
 507 }
 508
 509 int
 510 vfs_stdvptofh (vp, fhp)
 511         struct vnode *vp;
 512         struct fid *fhp;
 513 {
 514
 515         return (EOPNOTSUPP);
 516 }
 517
 518 int
 519 vfs_stdquotactl (mp, cmds, uid, arg, td)
 520         struct mount *mp;
 521         int cmds;
 522         uid_t uid;
 523         caddr_t arg;
 524         struct thread *td;
 525 {
 526
 527         return (EOPNOTSUPP);
 528 }
 529
 530 int
 531 vfs_stdsync(mp, waitfor, td)
 532         struct mount *mp;
 533         int waitfor;
 534         struct thread *td;
 535 {
 536         struct vnode *vp, *nvp;
 537         int error, lockreq, allerror = 0;
 538
 539         lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
 540         if (waitfor != MNT_WAIT)
 541                 lockreq |= LK_NOWAIT;
 542         /*
 543          * Force stale buffer cache information to be flushed.
 544          */
 545         MNT_ILOCK(mp);
 546 loop:
 547         MNT_VNODE_FOREACH(vp, mp, nvp) {
 548
 549                 VI_LOCK(vp);
 550                 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
 551                         VI_UNLOCK(vp);
 552                         continue;
 553                 }
 554                 MNT_IUNLOCK(mp);
 555
 556                 if ((error = vget(vp, lockreq, td)) != 0) {
 557                         MNT_ILOCK(mp);
 558                         if (error == ENOENT)
 559                                 goto loop;
 560                         continue;
 561                 }
 562                 error = VOP_FSYNC(vp, waitfor, td);
 563                 if (error)
 564                         allerror = error;
 565
 566                 VOP_UNLOCK(vp, 0, td);
 567                 vrele(vp);
 568                 MNT_ILOCK(mp);
 569         }
 570         MNT_IUNLOCK(mp);
 571         return (allerror);
 572 }
 573
 574 int
 575 vfs_stdnosync (mp, waitfor, td)
 576         struct mount *mp;
 577         int waitfor;
 578         struct thread *td;
 579 {
 580
 581         return (0);
 582 }
 583
 584 int
 585 vfs_stdvget (mp, ino, flags, vpp)
 586         struct mount *mp;
 587         ino_t ino;
 588         int flags;
 589         struct vnode **vpp;
 590 {
 591
 592         return (EOPNOTSUPP);
 593 }
 594
 595 int
 596 vfs_stdfhtovp (mp, fhp, vpp)
 597         struct mount *mp;
 598         struct fid *fhp;
 599         struct vnode **vpp;
 600 {
 601
 602         return (EOPNOTSUPP);
 603 }
 604
 605 int
 606 vfs_stdinit (vfsp)
 607         struct vfsconf *vfsp;
 608 {
 609
 610         return (0);
 611 }
 612
 613 int
 614 vfs_stduninit (vfsp)
 615         struct vfsconf *vfsp;
 616 {
 617
 618         return(0);
 619 }
 620
 621 int
 622 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname, td)
 623         struct mount *mp;
 624         int cmd;
 625         struct vnode *filename_vp;
 626         int attrnamespace;
 627         const char *attrname;
 628         struct thread *td;
 629 {
 630
 631         if (filename_vp != NULL)
 632                 VOP_UNLOCK(filename_vp, 0, td);
 633         return (EOPNOTSUPP);
 634 }
 635
 636 int
 637 vfs_stdsysctl(mp, op, req)
 638         struct mount *mp;
 639         fsctlop_t op;
 640         struct sysctl_req *req;
 641 {
 642
 643         return (EOPNOTSUPP);
 644 }
 645
 646 /* end of vfs default ops */