sys/kern/vfs_default.c

   1 /*-
   2  * Copyright (c) 1989, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * This code is derived from software contributed
   6  * to Berkeley by John Heidemann of the UCLA Ficus project.
   7  *
   8  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 4. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34
  35 #include <sys/cdefs.h>
  36 __FBSDID("$FreeBSD$");
  37
  38 #include <sys/param.h>
  39 #include <sys/systm.h>
  40 #include <sys/bio.h>
  41 #include <sys/buf.h>
  42 #include <sys/conf.h>
  43 #include <sys/event.h>
  44 #include <sys/kernel.h>
  45 #include <sys/limits.h>
  46 #include <sys/lock.h>
  47 #include <sys/lockf.h>
  48 #include <sys/malloc.h>
  49 #include <sys/mount.h>
  50 #include <sys/mutex.h>
  51 #include <sys/namei.h>
  52 #include <sys/fcntl.h>
  53 #include <sys/unistd.h>
  54 #include <sys/vnode.h>
  55 #include <sys/dirent.h>
  56 #include <sys/poll.h>
  57
  58 #include <vm/vm.h>
  59 #include <vm/vm_object.h>
  60 #include <vm/vm_extern.h>
  61 #include <vm/pmap.h>
  62 #include <vm/vm_map.h>
  63 #include <vm/vm_page.h>
  64 #include <vm/vm_pager.h>
  65 #include <vm/vnode_pager.h>
  66
  67 static int      vop_nolookup(struct vop_lookup_args *);
  68 static int      vop_nostrategy(struct vop_strategy_args *);
  69 static int      get_next_dirent(struct vnode *vp, struct dirent **dpp,
  70                                 char *dirbuf, int dirbuflen, off_t *off,
  71                                 char **cpos, int *len, int *eofflag,
  72                                 struct thread *td);
  73 static int      dirent_exists(struct vnode *vp, const char *dirname,
  74                               struct thread *td);
  75
  76 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
  77
  78 /*
  79  * This vnode table stores what we want to do if the filesystem doesn't
  80  * implement a particular VOP.
  81  *
  82  * If there is no specific entry here, we will return EOPNOTSUPP.
  83  *
  84  */
  85
  86 struct vop_vector default_vnodeops = {
  87         .vop_default =          NULL,
  88         .vop_bypass =           VOP_EOPNOTSUPP,
  89
  90         .vop_advlock =          vop_stdadvlock,
  91         .vop_advlockasync =     vop_stdadvlockasync,
  92         .vop_bmap =             vop_stdbmap,
  93         .vop_close =            VOP_NULL,
  94         .vop_fsync =            VOP_NULL,
  95         .vop_getpages =         vop_stdgetpages,
  96         .vop_getwritemount =    vop_stdgetwritemount,
  97         .vop_inactive =         VOP_NULL,
  98         .vop_ioctl =            VOP_ENOTTY,
  99         .vop_kqfilter =         vop_stdkqfilter,
 100         .vop_islocked =         vop_stdislocked,
 101         .vop_lease =            VOP_NULL,
 102         .vop_lock1 =            vop_stdlock,
 103         .vop_lookup =           vop_nolookup,
 104         .vop_open =             VOP_NULL,
 105         .vop_pathconf =         VOP_EINVAL,
 106         .vop_poll =             vop_nopoll,
 107         .vop_putpages =         vop_stdputpages,
 108         .vop_readlink =         VOP_EINVAL,
 109         .vop_revoke =           VOP_PANIC,
 110         .vop_strategy =         vop_nostrategy,
 111         .vop_unlock =           vop_stdunlock,
 112         .vop_vptocnp =          vop_stdvptocnp,
 113         .vop_vptofh =           vop_stdvptofh,
 114 };
 115
 116 /*
 117  * Series of placeholder functions for various error returns for
 118  * VOPs.
 119  */
 120
 121 int
 122 vop_eopnotsupp(struct vop_generic_args *ap)
 123 {
 124         /*
 125         printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
 126         */
 127
 128         return (EOPNOTSUPP);
 129 }
 130
 131 int
 132 vop_ebadf(struct vop_generic_args *ap)
 133 {
 134
 135         return (EBADF);
 136 }
 137
 138 int
 139 vop_enotty(struct vop_generic_args *ap)
 140 {
 141
 142         return (ENOTTY);
 143 }
 144
 145 int
 146 vop_einval(struct vop_generic_args *ap)
 147 {
 148
 149         return (EINVAL);
 150 }
 151
 152 int
 153 vop_enoent(struct vop_generic_args *ap)
 154 {
 155
 156         return (ENOENT);
 157 }
 158
 159 int
 160 vop_null(struct vop_generic_args *ap)
 161 {
 162
 163         return (0);
 164 }
 165
 166 /*
 167  * Helper function to panic on some bad VOPs in some filesystems.
 168  */
 169 int
 170 vop_panic(struct vop_generic_args *ap)
 171 {
 172
 173         panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
 174 }
 175
 176 /*
 177  * vop_std<something> and vop_no<something> are default functions for use by
 178  * filesystems that need the "default reasonable" implementation for a
 179  * particular operation.
 180  *
 181  * The documentation for the operations they implement exists (if it exists)
 182  * in the VOP_<SOMETHING>(9) manpage (all uppercase).
 183  */
 184
 185 /*
 186  * Default vop for filesystems that do not support name lookup
 187  */
 188 static int
 189 vop_nolookup(ap)
 190         struct vop_lookup_args /* {
 191                 struct vnode *a_dvp;
 192                 struct vnode **a_vpp;
 193                 struct componentname *a_cnp;
 194         } */ *ap;
 195 {
 196
 197         *ap->a_vpp = NULL;
 198         return (ENOTDIR);
 199 }
 200
 201 /*
 202  *      vop_nostrategy:
 203  *
 204  *      Strategy routine for VFS devices that have none.
 205  *
 206  *      BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
 207  *      routine.  Typically this is done for a BIO_READ strategy call.
 208  *      Typically B_INVAL is assumed to already be clear prior to a write
 209  *      and should not be cleared manually unless you just made the buffer
 210  *      invalid.  BIO_ERROR should be cleared either way.
 211  */
 212
 213 static int
 214 vop_nostrategy (struct vop_strategy_args *ap)
 215 {
 216         printf("No strategy for buffer at %p\n", ap->a_bp);
 217         vprint("vnode", ap->a_vp);
 218         ap->a_bp->b_ioflags |= BIO_ERROR;
 219         ap->a_bp->b_error = EOPNOTSUPP;
 220         bufdone(ap->a_bp);
 221         return (EOPNOTSUPP);
 222 }
 223
 224 static int
 225 get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
 226                 int dirbuflen, off_t *off, char **cpos, int *len,
 227                 int *eofflag, struct thread *td)
 228 {
 229         int error, reclen;
 230         struct uio uio;
 231         struct iovec iov;
 232         struct dirent *dp;
 233
 234         KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
 235         KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
 236
 237         if (*len == 0) {
 238                 iov.iov_base = dirbuf;
 239                 iov.iov_len = dirbuflen;
 240
 241                 uio.uio_iov = &iov;
 242                 uio.uio_iovcnt = 1;
 243                 uio.uio_offset = *off;
 244                 uio.uio_resid = dirbuflen;
 245                 uio.uio_segflg = UIO_SYSSPACE;
 246                 uio.uio_rw = UIO_READ;
 247                 uio.uio_td = td;
 248
 249                 *eofflag = 0;
 250
 251 #ifdef MAC
 252                 error = mac_vnode_check_readdir(td->td_ucred, vp);
 253                 if (error == 0)
 254 #endif
 255                         error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
 256                                 NULL, NULL);
 257                 if (error)
 258                         return (error);
 259
 260                 *off = uio.uio_offset;
 261
 262                 *cpos = dirbuf;
 263                 *len = (dirbuflen - uio.uio_resid);
 264         }
 265
 266         dp = (struct dirent *)(*cpos);
 267         reclen = dp->d_reclen;
 268         *dpp = dp;
 269
 270         /* check for malformed directory.. */
 271         if (reclen < DIRENT_MINSIZE)
 272                 return (EINVAL);
 273
 274         *cpos += reclen;
 275         *len -= reclen;
 276
 277         return (0);
 278 }
 279
 280 /*
 281  * Check if a named file exists in a given directory vnode.
 282  */
 283 static int
 284 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
 285 {
 286         char *dirbuf, *cpos;
 287         int error, eofflag, dirbuflen, len, found;
 288         off_t off;
 289         struct dirent *dp;
 290         struct vattr va;
 291
 292         KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
 293         KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
 294
 295         found = 0;
 296
 297         error = VOP_GETATTR(vp, &va, td->td_ucred);
 298         if (error)
 299                 return (found);
 300
 301         dirbuflen = DEV_BSIZE;
 302         if (dirbuflen < va.va_blocksize)
 303                 dirbuflen = va.va_blocksize;
 304         dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
 305
 306         off = 0;
 307         len = 0;
 308         do {
 309                 error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
 310                                         &cpos, &len, &eofflag, td);
 311                 if (error)
 312                         goto out;
 313
 314                 if ((dp->d_type != DT_WHT) &&
 315                     !strcmp(dp->d_name, dirname)) {
 316                         found = 1;
 317                         goto out;
 318                 }
 319         } while (len > 0 || !eofflag);
 320
 321 out:
 322         free(dirbuf, M_TEMP);
 323         return (found);
 324 }
 325
 326 /*
 327  * Advisory record locking support
 328  */
 329 int
 330 vop_stdadvlock(struct vop_advlock_args *ap)
 331 {
 332         struct vnode *vp;
 333         struct ucred *cred;
 334         struct vattr vattr;
 335         int error;
 336
 337         vp = ap->a_vp;
 338         cred = curthread->td_ucred;
 339         vn_lock(vp, LK_SHARED | LK_RETRY);
 340         error = VOP_GETATTR(vp, &vattr, cred);
 341         VOP_UNLOCK(vp, 0);
 342         if (error)
 343                 return (error);
 344
 345         return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
 346 }
 347
 348 int
 349 vop_stdadvlockasync(struct vop_advlockasync_args *ap)
 350 {
 351         struct vnode *vp;
 352         struct ucred *cred;
 353         struct vattr vattr;
 354         int error;
 355
 356         vp = ap->a_vp;
 357         cred = curthread->td_ucred;
 358         vn_lock(vp, LK_SHARED | LK_RETRY);
 359         error = VOP_GETATTR(vp, &vattr, cred);
 360         VOP_UNLOCK(vp, 0);
 361         if (error)
 362                 return (error);
 363
 364         return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
 365 }
 366
 367 /*
 368  * vop_stdpathconf:
 369  *
 370  * Standard implementation of POSIX pathconf, to get information about limits
 371  * for a filesystem.
 372  * Override per filesystem for the case where the filesystem has smaller
 373  * limits.
 374  */
 375 int
 376 vop_stdpathconf(ap)
 377         struct vop_pathconf_args /* {
 378         struct vnode *a_vp;
 379         int a_name;
 380         int *a_retval;
 381         } */ *ap;
 382 {
 383
 384         switch (ap->a_name) {
 385                 case _PC_NAME_MAX:
 386                         *ap->a_retval = NAME_MAX;
 387                         return (0);
 388                 case _PC_PATH_MAX:
 389                         *ap->a_retval = PATH_MAX;
 390                         return (0);
 391                 case _PC_LINK_MAX:
 392                         *ap->a_retval = LINK_MAX;
 393                         return (0);
 394                 case _PC_MAX_CANON:
 395                         *ap->a_retval = MAX_CANON;
 396                         return (0);
 397                 case _PC_MAX_INPUT:
 398                         *ap->a_retval = MAX_INPUT;
 399                         return (0);
 400                 case _PC_PIPE_BUF:
 401                         *ap->a_retval = PIPE_BUF;
 402                         return (0);
 403                 case _PC_CHOWN_RESTRICTED:
 404                         *ap->a_retval = 1;
 405                         return (0);
 406                 case _PC_VDISABLE:
 407                         *ap->a_retval = _POSIX_VDISABLE;
 408                         return (0);
 409                 default:
 410                         return (EINVAL);
 411         }
 412         /* NOTREACHED */
 413 }
 414
 415 /*
 416  * Standard lock, unlock and islocked functions.
 417  */
 418 int
 419 vop_stdlock(ap)
 420         struct vop_lock1_args /* {
 421                 struct vnode *a_vp;
 422                 int a_flags;
 423                 char *file;
 424                 int line;
 425         } */ *ap;
 426 {
 427         struct vnode *vp = ap->a_vp;
 428
 429         return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
 430             LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file,
 431             ap->a_line));
 432 }
 433
 434 /* See above. */
 435 int
 436 vop_stdunlock(ap)
 437         struct vop_unlock_args /* {
 438                 struct vnode *a_vp;
 439                 int a_flags;
 440         } */ *ap;
 441 {
 442         struct vnode *vp = ap->a_vp;
 443
 444         return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp)));
 445 }
 446
 447 /* See above. */
 448 int
 449 vop_stdislocked(ap)
 450         struct vop_islocked_args /* {
 451                 struct vnode *a_vp;
 452         } */ *ap;
 453 {
 454
 455         return (lockstatus(ap->a_vp->v_vnlock));
 456 }
 457
 458 /*
 459  * Return true for select/poll.
 460  */
 461 int
 462 vop_nopoll(ap)
 463         struct vop_poll_args /* {
 464                 struct vnode *a_vp;
 465                 int  a_events;
 466                 struct ucred *a_cred;
 467                 struct thread *a_td;
 468         } */ *ap;
 469 {
 470
 471         return (poll_no_poll(ap->a_events));
 472 }
 473
 474 /*
 475  * Implement poll for local filesystems that support it.
 476  */
 477 int
 478 vop_stdpoll(ap)
 479         struct vop_poll_args /* {
 480                 struct vnode *a_vp;
 481                 int  a_events;
 482                 struct ucred *a_cred;
 483                 struct thread *a_td;
 484         } */ *ap;
 485 {
 486         if (ap->a_events & ~POLLSTANDARD)
 487                 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
 488         return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
 489 }
 490
 491 /*
 492  * Return our mount point, as we will take charge of the writes.
 493  */
 494 int
 495 vop_stdgetwritemount(ap)
 496         struct vop_getwritemount_args /* {
 497                 struct vnode *a_vp;
 498                 struct mount **a_mpp;
 499         } */ *ap;
 500 {
 501         struct mount *mp;
 502
 503         /*
 504          * XXX Since this is called unlocked we may be recycled while
 505          * attempting to ref the mount.  If this is the case or mountpoint
 506          * will be set to NULL.  We only have to prevent this call from
 507          * returning with a ref to an incorrect mountpoint.  It is not
 508          * harmful to return with a ref to our previous mountpoint.
 509          */
 510         mp = ap->a_vp->v_mount;
 511         if (mp != NULL) {
 512                 vfs_ref(mp);
 513                 if (mp != ap->a_vp->v_mount) {
 514                         vfs_rel(mp);
 515                         mp = NULL;
 516                 }
 517         }
 518         *(ap->a_mpp) = mp;
 519         return (0);
 520 }
 521
 522 /* XXX Needs good comment and VOP_BMAP(9) manpage */
 523 int
 524 vop_stdbmap(ap)
 525         struct vop_bmap_args /* {
 526                 struct vnode *a_vp;
 527                 daddr_t  a_bn;
 528                 struct bufobj **a_bop;
 529                 daddr_t *a_bnp;
 530                 int *a_runp;
 531                 int *a_runb;
 532         } */ *ap;
 533 {
 534
 535         if (ap->a_bop != NULL)
 536                 *ap->a_bop = &ap->a_vp->v_bufobj;
 537         if (ap->a_bnp != NULL)
 538                 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
 539         if (ap->a_runp != NULL)
 540                 *ap->a_runp = 0;
 541         if (ap->a_runb != NULL)
 542                 *ap->a_runb = 0;
 543         return (0);
 544 }
 545
 546 int
 547 vop_stdfsync(ap)
 548         struct vop_fsync_args /* {
 549                 struct vnode *a_vp;
 550                 struct ucred *a_cred;
 551                 int a_waitfor;
 552                 struct thread *a_td;
 553         } */ *ap;
 554 {
 555         struct vnode *vp = ap->a_vp;
 556         struct buf *bp;
 557         struct bufobj *bo;
 558         struct buf *nbp;
 559         int error = 0;
 560         int maxretry = 1000;     /* large, arbitrarily chosen */
 561
 562         bo = &vp->v_bufobj;
 563         BO_LOCK(bo);
 564 loop1:
 565         /*
 566          * MARK/SCAN initialization to avoid infinite loops.
 567          */
 568         TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
 569                 bp->b_vflags &= ~BV_SCANNED;
 570                 bp->b_error = 0;
 571         }
 572
 573         /*
 574          * Flush all dirty buffers associated with a vnode.
 575          */
 576 loop2:
 577         TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 578                 if ((bp->b_vflags & BV_SCANNED) != 0)
 579                         continue;
 580                 bp->b_vflags |= BV_SCANNED;
 581                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
 582                         continue;
 583                 BO_UNLOCK(bo);
 584                 KASSERT(bp->b_bufobj == bo,
 585                     ("bp %p wrong b_bufobj %p should be %p",
 586                     bp, bp->b_bufobj, bo));
 587                 if ((bp->b_flags & B_DELWRI) == 0)
 588                         panic("fsync: not dirty");
 589                 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
 590                         vfs_bio_awrite(bp);
 591                 } else {
 592                         bremfree(bp);
 593                         bawrite(bp);
 594                 }
 595                 BO_LOCK(bo);
 596                 goto loop2;
 597         }
 598
 599         /*
 600          * If synchronous the caller expects us to completely resolve all
 601          * dirty buffers in the system.  Wait for in-progress I/O to
 602          * complete (which could include background bitmap writes), then
 603          * retry if dirty blocks still exist.
 604          */
 605         if (ap->a_waitfor == MNT_WAIT) {
 606                 bufobj_wwait(bo, 0, 0);
 607                 if (bo->bo_dirty.bv_cnt > 0) {
 608                         /*
 609                          * If we are unable to write any of these buffers
 610                          * then we fail now rather than trying endlessly
 611                          * to write them out.
 612                          */
 613                         TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
 614                                 if ((error = bp->b_error) == 0)
 615                                         continue;
 616                         if (error == 0 && --maxretry >= 0)
 617                                 goto loop1;
 618                         error = EAGAIN;
 619                 }
 620         }
 621         BO_UNLOCK(bo);
 622         if (error == EAGAIN)
 623                 vprint("fsync: giving up on dirty", vp);
 624
 625         return (error);
 626 }
 627
 628 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
 629 int
 630 vop_stdgetpages(ap)
 631         struct vop_getpages_args /* {
 632                 struct vnode *a_vp;
 633                 vm_page_t *a_m;
 634                 int a_count;
 635                 int a_reqpage;
 636                 vm_ooffset_t a_offset;
 637         } */ *ap;
 638 {
 639
 640         return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
 641             ap->a_count, ap->a_reqpage);
 642 }
 643
 644 int
 645 vop_stdkqfilter(struct vop_kqfilter_args *ap)
 646 {
 647         return vfs_kqfilter(ap);
 648 }
 649
 650 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
 651 int
 652 vop_stdputpages(ap)
 653         struct vop_putpages_args /* {
 654                 struct vnode *a_vp;
 655                 vm_page_t *a_m;
 656                 int a_count;
 657                 int a_sync;
 658                 int *a_rtvals;
 659                 vm_ooffset_t a_offset;
 660         } */ *ap;
 661 {
 662
 663         return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 664              ap->a_sync, ap->a_rtvals);
 665 }
 666
 667 int
 668 vop_stdvptofh(struct vop_vptofh_args *ap)
 669 {
 670         return (EOPNOTSUPP);
 671 }
 672
 673 int
 674 vop_stdvptocnp(struct vop_vptocnp_args *ap)
 675 {
 676         struct vnode *vp = ap->a_vp;
 677         struct vnode **dvp = ap->a_vpp;
 678         char *buf = ap->a_buf;
 679         int *buflen = ap->a_buflen;
 680         char *dirbuf, *cpos;
 681         int i, error, eofflag, dirbuflen, flags, locked, len, covered;
 682         off_t off;
 683         ino_t fileno;
 684         struct vattr va;
 685         struct nameidata nd;
 686         struct thread *td;
 687         struct dirent *dp;
 688         struct vnode *mvp;
 689
 690         i = *buflen;
 691         error = 0;
 692         covered = 0;
 693         td = curthread;
 694
 695         if (vp->v_type != VDIR)
 696                 return (ENOENT);
 697
 698         error = VOP_GETATTR(vp, &va, td->td_ucred);
 699         if (error)
 700                 return (error);
 701
 702         VREF(vp);
 703         locked = VOP_ISLOCKED(vp);
 704         VOP_UNLOCK(vp, 0);
 705         NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
 706             "..", vp, td);
 707         flags = FREAD;
 708         error = vn_open(&nd, &flags, 0, NULL);
 709         if (error) {
 710                 vn_lock(vp, locked | LK_RETRY);
 711                 return (error);
 712         }
 713         NDFREE(&nd, NDF_ONLY_PNBUF);
 714
 715         mvp = *dvp = nd.ni_vp;
 716
 717         if (vp->v_mount != (*dvp)->v_mount &&
 718             ((*dvp)->v_vflag & VV_ROOT) &&
 719             ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
 720                 *dvp = (*dvp)->v_mount->mnt_vnodecovered;
 721                 VREF(mvp);
 722                 VOP_UNLOCK(mvp, 0);
 723                 vn_close(mvp, FREAD, td->td_ucred, td);
 724                 VREF(*dvp);
 725                 vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
 726                 covered = 1;
 727         }
 728
 729         fileno = va.va_fileid;
 730
 731         dirbuflen = DEV_BSIZE;
 732         if (dirbuflen < va.va_blocksize)
 733                 dirbuflen = va.va_blocksize;
 734         dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
 735
 736         if ((*dvp)->v_type != VDIR) {
 737                 error = ENOENT;
 738                 goto out;
 739         }
 740
 741         off = 0;
 742         len = 0;
 743         do {
 744                 /* call VOP_READDIR of parent */
 745                 error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
 746                                         &cpos, &len, &eofflag, td);
 747                 if (error)
 748                         goto out;
 749
 750                 if ((dp->d_type != DT_WHT) &&
 751                     (dp->d_fileno == fileno)) {
 752                         if (covered) {
 753                                 VOP_UNLOCK(*dvp, 0);
 754                                 vn_lock(mvp, LK_EXCLUSIVE | LK_RETRY);
 755                                 if (dirent_exists(mvp, dp->d_name, td)) {
 756                                         error = ENOENT;
 757                                         VOP_UNLOCK(mvp, 0);
 758                                         vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
 759                                         goto out;
 760                                 }
 761                                 VOP_UNLOCK(mvp, 0);
 762                                 vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
 763                         }
 764                         i -= dp->d_namlen;
 765
 766                         if (i < 0) {
 767                                 error = ENOMEM;
 768                                 goto out;
 769                         }
 770                         bcopy(dp->d_name, buf + i, dp->d_namlen);
 771                         error = 0;
 772                         goto out;
 773                 }
 774         } while (len > 0 || !eofflag);
 775         error = ENOENT;
 776
 777 out:
 778         free(dirbuf, M_TEMP);
 779         if (!error) {
 780                 *buflen = i;
 781                 vhold(*dvp);
 782         }
 783         if (covered) {
 784                 vput(*dvp);
 785                 vrele(mvp);
 786         } else {
 787                 VOP_UNLOCK(mvp, 0);
 788                 vn_close(mvp, FREAD, td->td_ucred, td);
 789         }
 790         vn_lock(vp, locked | LK_RETRY);
 791         return (error);
 792 }
 793
 794 /*
 795  * vfs default ops
 796  * used to fill the vfs function table to get reasonable default return values.
 797  */
 798 int
 799 vfs_stdroot (mp, flags, vpp, td)
 800         struct mount *mp;
 801         int flags;
 802         struct vnode **vpp;
 803         struct thread *td;
 804 {
 805
 806         return (EOPNOTSUPP);
 807 }
 808
 809 int
 810 vfs_stdstatfs (mp, sbp, td)
 811         struct mount *mp;
 812         struct statfs *sbp;
 813         struct thread *td;
 814 {
 815
 816         return (EOPNOTSUPP);
 817 }
 818
 819 int
 820 vfs_stdquotactl (mp, cmds, uid, arg, td)
 821         struct mount *mp;
 822         int cmds;
 823         uid_t uid;
 824         void *arg;
 825         struct thread *td;
 826 {
 827
 828         return (EOPNOTSUPP);
 829 }
 830
 831 int
 832 vfs_stdsync(mp, waitfor, td)
 833         struct mount *mp;
 834         int waitfor;
 835         struct thread *td;
 836 {
 837         struct vnode *vp, *mvp;
 838         int error, lockreq, allerror = 0;
 839
 840         lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
 841         if (waitfor != MNT_WAIT)
 842                 lockreq |= LK_NOWAIT;
 843         /*
 844          * Force stale buffer cache information to be flushed.
 845          */
 846         MNT_ILOCK(mp);
 847 loop:
 848         MNT_VNODE_FOREACH(vp, mp, mvp) {
 849                 /* bv_cnt is an acceptable race here. */
 850                 if (vp->v_bufobj.bo_dirty.bv_cnt == 0)
 851                         continue;
 852                 VI_LOCK(vp);
 853                 MNT_IUNLOCK(mp);
 854                 if ((error = vget(vp, lockreq, td)) != 0) {
 855                         MNT_ILOCK(mp);
 856                         if (error == ENOENT) {
 857                                 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
 858                                 goto loop;
 859                         }
 860                         continue;
 861                 }
 862                 error = VOP_FSYNC(vp, waitfor, td);
 863                 if (error)
 864                         allerror = error;
 865
 866                 /* Do not turn this into vput.  td is not always curthread. */
 867                 VOP_UNLOCK(vp, 0);
 868                 vrele(vp);
 869                 MNT_ILOCK(mp);
 870         }
 871         MNT_IUNLOCK(mp);
 872         return (allerror);
 873 }
 874
 875 int
 876 vfs_stdnosync (mp, waitfor, td)
 877         struct mount *mp;
 878         int waitfor;
 879         struct thread *td;
 880 {
 881
 882         return (0);
 883 }
 884
 885 int
 886 vfs_stdvget (mp, ino, flags, vpp)
 887         struct mount *mp;
 888         ino_t ino;
 889         int flags;
 890         struct vnode **vpp;
 891 {
 892
 893         return (EOPNOTSUPP);
 894 }
 895
 896 int
 897 vfs_stdfhtovp (mp, fhp, vpp)
 898         struct mount *mp;
 899         struct fid *fhp;
 900         struct vnode **vpp;
 901 {
 902
 903         return (EOPNOTSUPP);
 904 }
 905
 906 int
 907 vfs_stdinit (vfsp)
 908         struct vfsconf *vfsp;
 909 {
 910
 911         return (0);
 912 }
 913
 914 int
 915 vfs_stduninit (vfsp)
 916         struct vfsconf *vfsp;
 917 {
 918
 919         return(0);
 920 }
 921
 922 int
 923 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname, td)
 924         struct mount *mp;
 925         int cmd;
 926         struct vnode *filename_vp;
 927         int attrnamespace;
 928         const char *attrname;
 929         struct thread *td;
 930 {
 931
 932         if (filename_vp != NULL)
 933                 VOP_UNLOCK(filename_vp, 0);
 934         return (EOPNOTSUPP);
 935 }
 936
 937 int
 938 vfs_stdsysctl(mp, op, req)
 939         struct mount *mp;
 940         fsctlop_t op;
 941         struct sysctl_req *req;
 942 {
 943
 944         return (EOPNOTSUPP);
 945 }
 946
 947 /* end of vfs default ops */