sys/nfsclient/nfs_vnops.c

   1 /*-
   2  * Copyright (c) 1989, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * This code is derived from software contributed to Berkeley by
   6  * Rick Macklem at The University of Guelph.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  * 4. Neither the name of the University nor the names of its contributors
  17  *    may be used to endorse or promote products derived from this software
  18  *    without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  *
  32  *      @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
  33  */
  34
  35 #include <sys/cdefs.h>
  36 __FBSDID("$FreeBSD$");
  37
  38 /*
  39  * vnode op calls for Sun NFS version 2 and 3
  40  */
  41
  42 #include "opt_inet.h"
  43 #include "opt_kdtrace.h"
  44
  45 #include <sys/param.h>
  46 #include <sys/kernel.h>
  47 #include <sys/systm.h>
  48 #include <sys/resourcevar.h>
  49 #include <sys/proc.h>
  50 #include <sys/mount.h>
  51 #include <sys/bio.h>
  52 #include <sys/buf.h>
  53 #include <sys/malloc.h>
  54 #include <sys/mbuf.h>
  55 #include <sys/namei.h>
  56 #include <sys/socket.h>
  57 #include <sys/vnode.h>
  58 #include <sys/dirent.h>
  59 #include <sys/fcntl.h>
  60 #include <sys/lockf.h>
  61 #include <sys/stat.h>
  62 #include <sys/sysctl.h>
  63 #include <sys/signalvar.h>
  64
  65 #include <vm/vm.h>
  66 #include <vm/vm_object.h>
  67 #include <vm/vm_extern.h>
  68 #include <vm/vm_object.h>
  69
  70 #include <fs/fifofs/fifo.h>
  71
  72 #include <nfs/nfsproto.h>
  73 #include <nfsclient/nfs.h>
  74 #include <nfsclient/nfsnode.h>
  75 #include <nfsclient/nfsmount.h>
  76 #include <nfsclient/nfs_kdtrace.h>
  77 #include <nfsclient/nfs_lock.h>
  78 #include <nfs/xdr_subs.h>
  79 #include <nfsclient/nfsm_subs.h>
  80
  81 #include <net/if.h>
  82 #include <netinet/in.h>
  83 #include <netinet/in_var.h>
  84
  85 #include <machine/stdarg.h>
  86
  87 #ifdef KDTRACE_HOOKS
  88 #include <sys/dtrace_bsd.h>
  89
  90 dtrace_nfsclient_accesscache_flush_probe_func_t
  91     dtrace_nfsclient_accesscache_flush_done_probe;
  92 uint32_t nfsclient_accesscache_flush_done_id;
  93
  94 dtrace_nfsclient_accesscache_get_probe_func_t
  95     dtrace_nfsclient_accesscache_get_hit_probe,
  96     dtrace_nfsclient_accesscache_get_miss_probe;
  97 uint32_t nfsclient_accesscache_get_hit_id;
  98 uint32_t nfsclient_accesscache_get_miss_id;
  99
 100 dtrace_nfsclient_accesscache_load_probe_func_t
 101     dtrace_nfsclient_accesscache_load_done_probe;
 102 uint32_t nfsclient_accesscache_load_done_id;
 103 #endif /* !KDTRACE_HOOKS */
 104
 105 /* Defs */
 106 #define TRUE    1
 107 #define FALSE   0
 108
 109 /*
 110  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
 111  * calls are not in getblk() and brelse() so that they would not be necessary
 112  * here.
 113  */
 114 #ifndef B_VMIO
 115 #define vfs_busy_pages(bp, f)
 116 #endif
 117
 118 static vop_read_t       nfsfifo_read;
 119 static vop_write_t      nfsfifo_write;
 120 static vop_close_t      nfsfifo_close;
 121 static int      nfs_flush(struct vnode *, int, int);
 122 static int      nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *);
 123 static vop_lookup_t     nfs_lookup;
 124 static vop_create_t     nfs_create;
 125 static vop_mknod_t      nfs_mknod;
 126 static vop_open_t       nfs_open;
 127 static vop_close_t      nfs_close;
 128 static vop_access_t     nfs_access;
 129 static vop_getattr_t    nfs_getattr;
 130 static vop_setattr_t    nfs_setattr;
 131 static vop_read_t       nfs_read;
 132 static vop_fsync_t      nfs_fsync;
 133 static vop_remove_t     nfs_remove;
 134 static vop_link_t       nfs_link;
 135 static vop_rename_t     nfs_rename;
 136 static vop_mkdir_t      nfs_mkdir;
 137 static vop_rmdir_t      nfs_rmdir;
 138 static vop_symlink_t    nfs_symlink;
 139 static vop_readdir_t    nfs_readdir;
 140 static vop_strategy_t   nfs_strategy;
 141 static  int     nfs_lookitup(struct vnode *, const char *, int,
 142                     struct ucred *, struct thread *, struct nfsnode **);
 143 static  int     nfs_sillyrename(struct vnode *, struct vnode *,
 144                     struct componentname *);
 145 static vop_access_t     nfsspec_access;
 146 static vop_readlink_t   nfs_readlink;
 147 static vop_print_t      nfs_print;
 148 static vop_advlock_t    nfs_advlock;
 149 static vop_advlockasync_t nfs_advlockasync;
 150
 151 /*
 152  * Global vfs data structures for nfs
 153  */
 154 struct vop_vector nfs_vnodeops = {
 155         .vop_default =          &default_vnodeops,
 156         .vop_access =           nfs_access,
 157         .vop_advlock =          nfs_advlock,
 158         .vop_advlockasync =     nfs_advlockasync,
 159         .vop_close =            nfs_close,
 160         .vop_create =           nfs_create,
 161         .vop_fsync =            nfs_fsync,
 162         .vop_getattr =          nfs_getattr,
 163         .vop_getpages =         nfs_getpages,
 164         .vop_putpages =         nfs_putpages,
 165         .vop_inactive =         nfs_inactive,
 166         .vop_link =             nfs_link,
 167         .vop_lookup =           nfs_lookup,
 168         .vop_mkdir =            nfs_mkdir,
 169         .vop_mknod =            nfs_mknod,
 170         .vop_open =             nfs_open,
 171         .vop_print =            nfs_print,
 172         .vop_read =             nfs_read,
 173         .vop_readdir =          nfs_readdir,
 174         .vop_readlink =         nfs_readlink,
 175         .vop_reclaim =          nfs_reclaim,
 176         .vop_remove =           nfs_remove,
 177         .vop_rename =           nfs_rename,
 178         .vop_rmdir =            nfs_rmdir,
 179         .vop_setattr =          nfs_setattr,
 180         .vop_strategy =         nfs_strategy,
 181         .vop_symlink =          nfs_symlink,
 182         .vop_write =            nfs_write,
 183 };
 184
 185 struct vop_vector nfs_fifoops = {
 186         .vop_default =          &fifo_specops,
 187         .vop_access =           nfsspec_access,
 188         .vop_close =            nfsfifo_close,
 189         .vop_fsync =            nfs_fsync,
 190         .vop_getattr =          nfs_getattr,
 191         .vop_inactive =         nfs_inactive,
 192         .vop_print =            nfs_print,
 193         .vop_read =             nfsfifo_read,
 194         .vop_reclaim =          nfs_reclaim,
 195         .vop_setattr =          nfs_setattr,
 196         .vop_write =            nfsfifo_write,
 197 };
 198
 199 static int      nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
 200                              struct componentname *cnp, struct vattr *vap);
 201 static int      nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
 202                               struct ucred *cred, struct thread *td);
 203 static int      nfs_renamerpc(struct vnode *fdvp, const char *fnameptr,
 204                               int fnamelen, struct vnode *tdvp,
 205                               const char *tnameptr, int tnamelen,
 206                               struct ucred *cred, struct thread *td);
 207 static int      nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
 208                              struct sillyrename *sp);
 209
 210 /*
 211  * Global variables
 212  */
 213 struct mtx      nfs_iod_mtx;
 214 struct proc     *nfs_iodwant[NFS_MAXASYNCDAEMON];
 215 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
 216 int              nfs_numasync = 0;
 217 vop_advlock_t   *nfs_advlock_p = nfs_dolock;
 218 vop_reclaim_t   *nfs_reclaim_p = NULL;
 219 #define DIRHDSIZ        (sizeof (struct dirent) - (MAXNAMLEN + 1))
 220
 221 SYSCTL_DECL(_vfs_nfs);
 222
 223 static int      nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
 224 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
 225            &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
 226
 227 static int      nfs_prime_access_cache = 0;
 228 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
 229            &nfs_prime_access_cache, 0,
 230            "Prime NFS ACCESS cache when fetching attributes");
 231
 232 static int      nfsv3_commit_on_close = 0;
 233 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
 234            &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
 235
 236 static int      nfs_clean_pages_on_close = 1;
 237 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
 238            &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
 239
 240 int nfs_directio_enable = 0;
 241 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
 242            &nfs_directio_enable, 0, "Enable NFS directio");
 243
 244 /*
 245  * This sysctl allows other processes to mmap a file that has been opened
 246  * O_DIRECT by a process.  In general, having processes mmap the file while
 247  * Direct IO is in progress can lead to Data Inconsistencies.  But, we allow
 248  * this by default to prevent DoS attacks - to prevent a malicious user from
 249  * opening up files O_DIRECT preventing other users from mmap'ing these
 250  * files.  "Protected" environments where stricter consistency guarantees are
 251  * required can disable this knob.  The process that opened the file O_DIRECT
 252  * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
 253  * meaningful.
 254  */
 255 int nfs_directio_allow_mmap = 1;
 256 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
 257            &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
 258
 259 #if 0
 260 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
 261            &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
 262
 263 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
 264            &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
 265 #endif
 266
 267 #define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY          \
 268                          | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE     \
 269                          | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
 270
 271 /*
 272  * SMP Locking Note :
 273  * The list of locks after the description of the lock is the ordering
 274  * of other locks acquired with the lock held.
 275  * np->n_mtx : Protects the fields in the nfsnode.
 276        VM Object Lock
 277        VI_MTX (acquired indirectly)
 278  * nmp->nm_mtx : Protects the fields in the nfsmount.
 279        rep->r_mtx
 280  * nfs_iod_mtx : Global lock, protects shared nfsiod state.
 281  * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
 282        nmp->nm_mtx
 283        rep->r_mtx
 284  * rep->r_mtx : Protects the fields in an nfsreq.
 285  */
 286
 287 static int
 288 nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
 289     struct ucred *cred, uint32_t *retmode)
 290 {
 291         const int v3 = 1;
 292         u_int32_t *tl;
 293         int error = 0, attrflag, i, lrupos;
 294
 295         struct mbuf *mreq, *mrep, *md, *mb;
 296         caddr_t bpos, dpos;
 297         u_int32_t rmode;
 298         struct nfsnode *np = VTONFS(vp);
 299
 300         nfsstats.rpccnt[NFSPROC_ACCESS]++;
 301         mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
 302         mb = mreq;
 303         bpos = mtod(mb, caddr_t);
 304         nfsm_fhtom(vp, v3);
 305         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
 306         *tl = txdr_unsigned(wmode);
 307         nfsm_request(vp, NFSPROC_ACCESS, td, cred);
 308         nfsm_postop_attr(vp, attrflag);
 309         if (!error) {
 310                 lrupos = 0;
 311                 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 312                 rmode = fxdr_unsigned(u_int32_t, *tl);
 313                 mtx_lock(&np->n_mtx);
 314                 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
 315                         if (np->n_accesscache[i].uid == cred->cr_uid) {
 316                                 np->n_accesscache[i].mode = rmode;
 317                                 np->n_accesscache[i].stamp = time_second;
 318                                 break;
 319                         }
 320                         if (i > 0 && np->n_accesscache[i].stamp <
 321                             np->n_accesscache[lrupos].stamp)
 322                                 lrupos = i;
 323                 }
 324                 if (i == NFS_ACCESSCACHESIZE) {
 325                         np->n_accesscache[lrupos].uid = cred->cr_uid;
 326                         np->n_accesscache[lrupos].mode = rmode;
 327                         np->n_accesscache[lrupos].stamp = time_second;
 328                 }
 329                 mtx_unlock(&np->n_mtx);
 330                 if (retmode != NULL)
 331                         *retmode = rmode;
 332                 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0);
 333         }
 334         m_freem(mrep);
 335 nfsmout:
 336 #ifdef KDTRACE_HOOKS
 337         if (error) {
 338                 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0,
 339                     error);
 340         }
 341 #endif
 342         return (error);
 343 }
 344
 345 /*
 346  * nfs access vnode op.
 347  * For nfs version 2, just return ok. File accesses may fail later.
 348  * For nfs version 3, use the access rpc to check accessibility. If file modes
 349  * are changed on the server, accesses might still fail later.
 350  */
 351 static int
 352 nfs_access(struct vop_access_args *ap)
 353 {
 354         struct vnode *vp = ap->a_vp;
 355         int error = 0, i, gotahit;
 356         u_int32_t mode, rmode, wmode;
 357         int v3 = NFS_ISV3(vp);
 358         struct nfsnode *np = VTONFS(vp);
 359
 360         /*
 361          * Disallow write attempts on filesystems mounted read-only;
 362          * unless the file is a socket, fifo, or a block or character
 363          * device resident on the filesystem.
 364          */
 365         if ((ap->a_accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 366                 switch (vp->v_type) {
 367                 case VREG:
 368                 case VDIR:
 369                 case VLNK:
 370                         return (EROFS);
 371                 default:
 372                         break;
 373                 }
 374         }
 375         /*
 376          * For nfs v3, check to see if we have done this recently, and if
 377          * so return our cached result instead of making an ACCESS call.
 378          * If not, do an access rpc, otherwise you are stuck emulating
 379          * ufs_access() locally using the vattr. This may not be correct,
 380          * since the server may apply other access criteria such as
 381          * client uid-->server uid mapping that we do not know about.
 382          */
 383         if (v3) {
 384                 if (ap->a_accmode & VREAD)
 385                         mode = NFSV3ACCESS_READ;
 386                 else
 387                         mode = 0;
 388                 if (vp->v_type != VDIR) {
 389                         if (ap->a_accmode & VWRITE)
 390                                 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
 391                         if (ap->a_accmode & VEXEC)
 392                                 mode |= NFSV3ACCESS_EXECUTE;
 393                 } else {
 394                         if (ap->a_accmode & VWRITE)
 395                                 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
 396                                          NFSV3ACCESS_DELETE);
 397                         if (ap->a_accmode & VEXEC)
 398                                 mode |= NFSV3ACCESS_LOOKUP;
 399                 }
 400                 /* XXX safety belt, only make blanket request if caching */
 401                 if (nfsaccess_cache_timeout > 0) {
 402                         wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
 403                                 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
 404                                 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
 405                 } else {
 406                         wmode = mode;
 407                 }
 408
 409                 /*
 410                  * Does our cached result allow us to give a definite yes to
 411                  * this request?
 412                  */
 413                 gotahit = 0;
 414                 mtx_lock(&np->n_mtx);
 415                 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
 416                         if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) {
 417                                 if (time_second < (np->n_accesscache[i].stamp +
 418                                     nfsaccess_cache_timeout) &&
 419                                     (np->n_accesscache[i].mode & mode) == mode) {
 420                                         nfsstats.accesscache_hits++;
 421                                         gotahit = 1;
 422                                 }
 423                                 break;
 424                         }
 425                 }
 426                 mtx_unlock(&np->n_mtx);
 427 #ifdef KDTRACE_HOOKS
 428                 if (gotahit)
 429                         KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp,
 430                             ap->a_cred->cr_uid, mode);
 431                 else
 432                         KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp,
 433                             ap->a_cred->cr_uid, mode);
 434 #endif
 435                 if (gotahit == 0) {
 436                         /*
 437                          * Either a no, or a don't know.  Go to the wire.
 438                          */
 439                         nfsstats.accesscache_misses++;
 440                         error = nfs3_access_otw(vp, wmode, ap->a_td, ap->a_cred,
 441                             &rmode);
 442                         if (!error) {
 443                                 if ((rmode & mode) != mode)
 444                                         error = EACCES;
 445                         }
 446                 }
 447                 return (error);
 448         } else {
 449                 if ((error = nfsspec_access(ap)) != 0) {
 450                         return (error);
 451                 }
 452                 /*
 453                  * Attempt to prevent a mapped root from accessing a file
 454                  * which it shouldn't.  We try to read a byte from the file
 455                  * if the user is root and the file is not zero length.
 456                  * After calling nfsspec_access, we should have the correct
 457                  * file size cached.
 458                  */
 459                 mtx_lock(&np->n_mtx);
 460                 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD)
 461                     && VTONFS(vp)->n_size > 0) {
 462                         struct iovec aiov;
 463                         struct uio auio;
 464                         char buf[1];
 465
 466                         mtx_unlock(&np->n_mtx);
 467                         aiov.iov_base = buf;
 468                         aiov.iov_len = 1;
 469                         auio.uio_iov = &aiov;
 470                         auio.uio_iovcnt = 1;
 471                         auio.uio_offset = 0;
 472                         auio.uio_resid = 1;
 473                         auio.uio_segflg = UIO_SYSSPACE;
 474                         auio.uio_rw = UIO_READ;
 475                         auio.uio_td = ap->a_td;
 476
 477                         if (vp->v_type == VREG)
 478                                 error = nfs_readrpc(vp, &auio, ap->a_cred);
 479                         else if (vp->v_type == VDIR) {
 480                                 char* bp;
 481                                 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
 482                                 aiov.iov_base = bp;
 483                                 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
 484                                 error = nfs_readdirrpc(vp, &auio, ap->a_cred);
 485                                 free(bp, M_TEMP);
 486                         } else if (vp->v_type == VLNK)
 487                                 error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
 488                         else
 489                                 error = EACCES;
 490                 } else
 491                         mtx_unlock(&np->n_mtx);
 492                 return (error);
 493         }
 494 }
 495
 496 int nfs_otw_getattr_avoid = 0;
 497
 498 /*
 499  * nfs open vnode op
 500  * Check to see if the type is ok
 501  * and that deletion is not in progress.
 502  * For paged in text files, you will need to flush the page cache
 503  * if consistency is lost.
 504  */
 505 /* ARGSUSED */
 506 static int
 507 nfs_open(struct vop_open_args *ap)
 508 {
 509         struct vnode *vp = ap->a_vp;
 510         struct nfsnode *np = VTONFS(vp);
 511         struct vattr vattr;
 512         int error;
 513         int fmode = ap->a_mode;
 514
 515         if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
 516                 return (EOPNOTSUPP);
 517
 518         /*
 519          * Get a valid lease. If cached data is stale, flush it.
 520          */
 521         mtx_lock(&np->n_mtx);
 522         if (np->n_flag & NMODIFIED) {
 523                 mtx_unlock(&np->n_mtx);
 524                 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 525                 if (error == EINTR || error == EIO)
 526                         return (error);
 527                 np->n_attrstamp = 0;
 528                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 529                 if (vp->v_type == VDIR)
 530                         np->n_direofoffset = 0;
 531                 error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 532                 if (error)
 533                         return (error);
 534                 mtx_lock(&np->n_mtx);
 535                 np->n_mtime = vattr.va_mtime;
 536                 mtx_unlock(&np->n_mtx);
 537         } else {
 538                 struct thread *td = curthread;
 539
 540                 if (np->n_ac_ts_syscalls != td->td_syscalls ||
 541                     np->n_ac_ts_tid != td->td_tid ||
 542                     td->td_proc == NULL ||
 543                     np->n_ac_ts_pid != td->td_proc->p_pid) {
 544                         np->n_attrstamp = 0;
 545                         KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 546                 }
 547                 mtx_unlock(&np->n_mtx);
 548                 error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 549                 if (error)
 550                         return (error);
 551                 mtx_lock(&np->n_mtx);
 552                 if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
 553                         if (vp->v_type == VDIR)
 554                                 np->n_direofoffset = 0;
 555                         mtx_unlock(&np->n_mtx);
 556                         error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 557                         if (error == EINTR || error == EIO) {
 558                                 return (error);
 559                         }
 560                         mtx_lock(&np->n_mtx);
 561                         np->n_mtime = vattr.va_mtime;
 562                 }
 563                 mtx_unlock(&np->n_mtx);
 564         }
 565         /*
 566          * If the object has >= 1 O_DIRECT active opens, we disable caching.
 567          */
 568         if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
 569                 if (np->n_directio_opens == 0) {
 570                         error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 571                         if (error)
 572                                 return (error);
 573                         mtx_lock(&np->n_mtx);
 574                         np->n_flag |= NNONCACHE;
 575                         mtx_unlock(&np->n_mtx);
 576                 }
 577                 np->n_directio_opens++;
 578         }
 579         vnode_create_vobject(vp, vattr.va_size, ap->a_td);
 580         return (0);
 581 }
 582
 583 /*
 584  * nfs close vnode op
 585  * What an NFS client should do upon close after writing is a debatable issue.
 586  * Most NFS clients push delayed writes to the server upon close, basically for
 587  * two reasons:
 588  * 1 - So that any write errors may be reported back to the client process
 589  *     doing the close system call. By far the two most likely errors are
 590  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
 591  * 2 - To put a worst case upper bound on cache inconsistency between
 592  *     multiple clients for the file.
 593  * There is also a consistency problem for Version 2 of the protocol w.r.t.
 594  * not being able to tell if other clients are writing a file concurrently,
 595  * since there is no way of knowing if the changed modify time in the reply
 596  * is only due to the write for this client.
 597  * (NFS Version 3 provides weak cache consistency data in the reply that
 598  *  should be sufficient to detect and handle this case.)
 599  *
 600  * The current code does the following:
 601  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
 602  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
 603  *                     or commit them (this satisfies 1 and 2 except for the
 604  *                     case where the server crashes after this close but
 605  *                     before the commit RPC, which is felt to be "good
 606  *                     enough". Changing the last argument to nfs_flush() to
 607  *                     a 1 would force a commit operation, if it is felt a
 608  *                     commit is necessary now.
 609  */
 610 /* ARGSUSED */
 611 static int
 612 nfs_close(struct vop_close_args *ap)
 613 {
 614         struct vnode *vp = ap->a_vp;
 615         struct nfsnode *np = VTONFS(vp);
 616         int error = 0;
 617         int fmode = ap->a_fflag;
 618
 619         if (vp->v_type == VREG) {
 620             /*
 621              * Examine and clean dirty pages, regardless of NMODIFIED.
 622              * This closes a major hole in close-to-open consistency.
 623              * We want to push out all dirty pages (and buffers) on
 624              * close, regardless of whether they were dirtied by
 625              * mmap'ed writes or via write().
 626              */
 627             if (nfs_clean_pages_on_close && vp->v_object) {
 628                 VM_OBJECT_LOCK(vp->v_object);
 629                 vm_object_page_clean(vp->v_object, 0, 0, 0);
 630                 VM_OBJECT_UNLOCK(vp->v_object);
 631             }
 632             mtx_lock(&np->n_mtx);
 633             if (np->n_flag & NMODIFIED) {
 634                 mtx_unlock(&np->n_mtx);
 635                 if (NFS_ISV3(vp)) {
 636                     /*
 637                      * Under NFSv3 we have dirty buffers to dispose of.  We
 638                      * must flush them to the NFS server.  We have the option
 639                      * of waiting all the way through the commit rpc or just
 640                      * waiting for the initial write.  The default is to only
 641                      * wait through the initial write so the data is in the
 642                      * server's cache, which is roughly similar to the state
 643                      * a standard disk subsystem leaves the file in on close().
 644                      *
 645                      * We cannot clear the NMODIFIED bit in np->n_flag due to
 646                      * potential races with other processes, and certainly
 647                      * cannot clear it if we don't commit.
 648                      */
 649                     int cm = nfsv3_commit_on_close ? 1 : 0;
 650                     error = nfs_flush(vp, MNT_WAIT, cm);
 651                     /* np->n_flag &= ~NMODIFIED; */
 652                 } else
 653                     error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 654                 mtx_lock(&np->n_mtx);
 655             }
 656             if (np->n_flag & NWRITEERR) {
 657                 np->n_flag &= ~NWRITEERR;
 658                 error = np->n_error;
 659             }
 660             mtx_unlock(&np->n_mtx);
 661         }
 662         if (nfs_directio_enable)
 663                 KASSERT((np->n_directio_asyncwr == 0),
 664                         ("nfs_close: dirty unflushed (%d) directio buffers\n",
 665                          np->n_directio_asyncwr));
 666         if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
 667                 mtx_lock(&np->n_mtx);
 668                 KASSERT((np->n_directio_opens > 0),
 669                         ("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
 670                 np->n_directio_opens--;
 671                 if (np->n_directio_opens == 0)
 672                         np->n_flag &= ~NNONCACHE;
 673                 mtx_unlock(&np->n_mtx);
 674         }
 675         return (error);
 676 }
 677
 678 /*
 679  * nfs getattr call from vfs.
 680  */
 681 static int
 682 nfs_getattr(struct vop_getattr_args *ap)
 683 {
 684         struct vnode *vp = ap->a_vp;
 685         struct nfsnode *np = VTONFS(vp);
 686         struct thread *td = curthread;
 687         struct vattr *vap = ap->a_vap;
 688         struct vattr vattr;
 689         caddr_t bpos, dpos;
 690         int error = 0;
 691         struct mbuf *mreq, *mrep, *md, *mb;
 692         int v3 = NFS_ISV3(vp);
 693
 694         /*
 695          * Update local times for special files.
 696          */
 697         mtx_lock(&np->n_mtx);
 698         if (np->n_flag & (NACC | NUPD))
 699                 np->n_flag |= NCHG;
 700         mtx_unlock(&np->n_mtx);
 701         /*
 702          * First look in the cache.
 703          */
 704         if (nfs_getattrcache(vp, &vattr) == 0)
 705                 goto nfsmout;
 706         if (v3 && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) {
 707                 nfsstats.accesscache_misses++;
 708                 nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, ap->a_cred, NULL);
 709                 if (nfs_getattrcache(vp, &vattr) == 0)
 710                         goto nfsmout;
 711         }
 712         nfsstats.rpccnt[NFSPROC_GETATTR]++;
 713         mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
 714         mb = mreq;
 715         bpos = mtod(mb, caddr_t);
 716         nfsm_fhtom(vp, v3);
 717         nfsm_request(vp, NFSPROC_GETATTR, td, ap->a_cred);
 718         if (!error) {
 719                 nfsm_loadattr(vp, &vattr);
 720         }
 721         m_freem(mrep);
 722 nfsmout:
 723         vap->va_type = vattr.va_type;
 724         vap->va_mode = vattr.va_mode;
 725         vap->va_nlink = vattr.va_nlink;
 726         vap->va_uid = vattr.va_uid;
 727         vap->va_gid = vattr.va_gid;
 728         vap->va_fsid = vattr.va_fsid;
 729         vap->va_fileid = vattr.va_fileid;
 730         vap->va_size = vattr.va_size;
 731         vap->va_blocksize = vattr.va_blocksize;
 732         vap->va_atime = vattr.va_atime;
 733         vap->va_mtime = vattr.va_mtime;
 734         vap->va_ctime = vattr.va_ctime;
 735         vap->va_gen = vattr.va_gen;
 736         vap->va_flags = vattr.va_flags;
 737         vap->va_rdev = vattr.va_rdev;
 738         vap->va_bytes = vattr.va_bytes;
 739         vap->va_filerev = vattr.va_filerev;
 740
 741         return (error);
 742 }
 743
 744 /*
 745  * nfs setattr call.
 746  */
 747 static int
 748 nfs_setattr(struct vop_setattr_args *ap)
 749 {
 750         struct vnode *vp = ap->a_vp;
 751         struct nfsnode *np = VTONFS(vp);
 752         struct vattr *vap = ap->a_vap;
 753         struct thread *td = curthread;
 754         int error = 0;
 755         u_quad_t tsize;
 756
 757 #ifndef nolint
 758         tsize = (u_quad_t)0;
 759 #endif
 760
 761         /*
 762          * Setting of flags is not supported.
 763          */
 764         if (vap->va_flags != VNOVAL)
 765                 return (EOPNOTSUPP);
 766
 767         /*
 768          * Disallow write attempts if the filesystem is mounted read-only.
 769          */
 770         if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 771             vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 772             vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
 773             (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 774                 error = EROFS;
 775                 goto out;
 776         }
 777         if (vap->va_size != VNOVAL) {
 778                 switch (vp->v_type) {
 779                 case VDIR:
 780                         return (EISDIR);
 781                 case VCHR:
 782                 case VBLK:
 783                 case VSOCK:
 784                 case VFIFO:
 785                         if (vap->va_mtime.tv_sec == VNOVAL &&
 786                             vap->va_atime.tv_sec == VNOVAL &&
 787                             vap->va_mode == (mode_t)VNOVAL &&
 788                             vap->va_uid == (uid_t)VNOVAL &&
 789                             vap->va_gid == (gid_t)VNOVAL)
 790                                 return (0);
 791                         vap->va_size = VNOVAL;
 792                         break;
 793                 default:
 794                         /*
 795                          * Disallow write attempts if the filesystem is
 796                          * mounted read-only.
 797                          */
 798                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
 799                                 return (EROFS);
 800                         /*
 801                          *  We run vnode_pager_setsize() early (why?),
 802                          * we must set np->n_size now to avoid vinvalbuf
 803                          * V_SAVE races that might setsize a lower
 804                          * value.
 805                          */
 806                         mtx_lock(&np->n_mtx);
 807                         tsize = np->n_size;
 808                         mtx_unlock(&np->n_mtx);
 809                         error = nfs_meta_setsize(vp, ap->a_cred, td,
 810                             vap->va_size);
 811                         mtx_lock(&np->n_mtx);
 812                         if (np->n_flag & NMODIFIED) {
 813                             tsize = np->n_size;
 814                             mtx_unlock(&np->n_mtx);
 815                             if (vap->va_size == 0)
 816                                 error = nfs_vinvalbuf(vp, 0, td, 1);
 817                             else
 818                                 error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
 819                             if (error) {
 820                                 vnode_pager_setsize(vp, tsize);
 821                                 goto out;
 822                             }
 823                         } else
 824                             mtx_unlock(&np->n_mtx);
 825                         /*
 826                          * np->n_size has already been set to vap->va_size
 827                          * in nfs_meta_setsize(). We must set it again since
 828                          * nfs_loadattrcache() could be called through
 829                          * nfs_meta_setsize() and could modify np->n_size.
 830                          */
 831                         mtx_lock(&np->n_mtx);
 832                         np->n_vattr.va_size = np->n_size = vap->va_size;
 833                         mtx_unlock(&np->n_mtx);
 834                 };
 835         } else {
 836                 mtx_lock(&np->n_mtx);
 837                 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) &&
 838                     (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
 839                         mtx_unlock(&np->n_mtx);
 840                         if ((error = nfs_vinvalbuf(vp, V_SAVE, td, 1)) != 0 &&
 841                             (error == EINTR || error == EIO))
 842                                 return error;
 843                 } else
 844                         mtx_unlock(&np->n_mtx);
 845         }
 846         error = nfs_setattrrpc(vp, vap, ap->a_cred);
 847         if (error && vap->va_size != VNOVAL) {
 848                 mtx_lock(&np->n_mtx);
 849                 np->n_size = np->n_vattr.va_size = tsize;
 850                 vnode_pager_setsize(vp, tsize);
 851                 mtx_unlock(&np->n_mtx);
 852         }
 853 out:
 854         return (error);
 855 }
 856
 857 /*
 858  * Do an nfs setattr rpc.
 859  */
 860 static int
 861 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred)
 862 {
 863         struct nfsv2_sattr *sp;
 864         struct nfsnode *np = VTONFS(vp);
 865         caddr_t bpos, dpos;
 866         u_int32_t *tl;
 867         int error = 0, i, wccflag = NFSV3_WCCRATTR;
 868         struct mbuf *mreq, *mrep, *md, *mb;
 869         int v3 = NFS_ISV3(vp);
 870
 871         nfsstats.rpccnt[NFSPROC_SETATTR]++;
 872         mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
 873         mb = mreq;
 874         bpos = mtod(mb, caddr_t);
 875         nfsm_fhtom(vp, v3);
 876         if (v3) {
 877                 nfsm_v3attrbuild(vap, TRUE);
 878                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
 879                 *tl = nfs_false;
 880         } else {
 881                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
 882                 if (vap->va_mode == (mode_t)VNOVAL)
 883                         sp->sa_mode = nfs_xdrneg1;
 884                 else
 885                         sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
 886                 if (vap->va_uid == (uid_t)VNOVAL)
 887                         sp->sa_uid = nfs_xdrneg1;
 888                 else
 889                         sp->sa_uid = txdr_unsigned(vap->va_uid);
 890                 if (vap->va_gid == (gid_t)VNOVAL)
 891                         sp->sa_gid = nfs_xdrneg1;
 892                 else
 893                         sp->sa_gid = txdr_unsigned(vap->va_gid);
 894                 sp->sa_size = txdr_unsigned(vap->va_size);
 895                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 896                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 897         }
 898         nfsm_request(vp, NFSPROC_SETATTR, curthread, cred);
 899         if (v3) {
 900                 mtx_lock(&np->n_mtx);
 901                 for (i = 0; i < NFS_ACCESSCACHESIZE; i++)
 902                         np->n_accesscache[i].stamp = 0;
 903                 mtx_unlock(&np->n_mtx);
 904                 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp);
 905                 nfsm_wcc_data(vp, wccflag);
 906         } else
 907                 nfsm_loadattr(vp, NULL);
 908         m_freem(mrep);
 909 nfsmout:
 910         return (error);
 911 }
 912
 913 /*
 914  * nfs lookup call, one step at a time...
 915  * First look in cache
 916  * If not found, unlock the directory nfsnode and do the rpc
 917  */
 918 static int
 919 nfs_lookup(struct vop_lookup_args *ap)
 920 {
 921         struct componentname *cnp = ap->a_cnp;
 922         struct vnode *dvp = ap->a_dvp;
 923         struct vnode **vpp = ap->a_vpp;
 924         struct mount *mp = dvp->v_mount;
 925         struct vattr vattr;
 926         int flags = cnp->cn_flags;
 927         struct vnode *newvp;
 928         struct nfsmount *nmp;
 929         caddr_t bpos, dpos;
 930         struct mbuf *mreq, *mrep, *md, *mb;
 931         long len;
 932         nfsfh_t *fhp;
 933         struct nfsnode *np;
 934         int error = 0, attrflag, fhsize, ltype;
 935         int v3 = NFS_ISV3(dvp);
 936         struct thread *td = cnp->cn_thread;
 937
 938         *vpp = NULLVP;
 939         if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
 940             (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 941                 return (EROFS);
 942         if (dvp->v_type != VDIR)
 943                 return (ENOTDIR);
 944         nmp = VFSTONFS(mp);
 945         np = VTONFS(dvp);
 946         if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) {
 947                 *vpp = NULLVP;
 948                 return (error);
 949         }
 950         error = cache_lookup(dvp, vpp, cnp);
 951         if (error > 0 && error != ENOENT)
 952                 return (error);
 953         if (error == -1) {
 954                 /*
 955                  * We only accept a positive hit in the cache if the
 956                  * change time of the file matches our cached copy.
 957                  * Otherwise, we discard the cache entry and fallback
 958                  * to doing a lookup RPC.
 959                  */
 960                 newvp = *vpp;
 961                 if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred)
 962                     && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
 963                         nfsstats.lookupcache_hits++;
 964                         if (cnp->cn_nameiop != LOOKUP &&
 965                             (flags & ISLASTCN))
 966                                 cnp->cn_flags |= SAVENAME;
 967                         return (0);
 968                 }
 969                 cache_purge(newvp);
 970                 if (dvp != newvp)
 971                         vput(newvp);
 972                 else
 973                         vrele(newvp);
 974                 *vpp = NULLVP;
 975         } else if (error == ENOENT) {
 976                 if (dvp->v_iflag & VI_DOOMED)
 977                         return (ENOENT);
 978                 /*
 979                  * We only accept a negative hit in the cache if the
 980                  * modification time of the parent directory matches
 981                  * our cached copy.  Otherwise, we discard all of the
 982                  * negative cache entries for this directory.
 983                  */
 984                 if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
 985                     vattr.va_mtime.tv_sec == np->n_dmtime) {
 986                         nfsstats.lookupcache_hits++;
 987                         return (ENOENT);
 988                 }
 989                 cache_purge_negative(dvp);
 990                 mtx_lock(&np->n_mtx);
 991                 np->n_dmtime = 0;
 992                 mtx_unlock(&np->n_mtx);
 993         }
 994         error = 0;
 995         newvp = NULLVP;
 996         nfsstats.lookupcache_misses++;
 997         nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 998         len = cnp->cn_namelen;
 999         mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
1000                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
1001         mb = mreq;
1002         bpos = mtod(mb, caddr_t);
1003         nfsm_fhtom(dvp, v3);
1004         nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
1005         nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred);
1006         if (error) {
1007                 if (v3) {
1008                         nfsm_postop_attr(dvp, attrflag);
1009                         m_freem(mrep);
1010                 }
1011                 goto nfsmout;
1012         }
1013         nfsm_getfh(fhp, fhsize, v3);
1014
1015         /*
1016          * Handle RENAME case...
1017          */
1018         if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
1019                 if (NFS_CMPFH(np, fhp, fhsize)) {
1020                         m_freem(mrep);
1021                         return (EISDIR);
1022                 }
1023                 error = nfs_nget(mp, fhp, fhsize, &np, LK_EXCLUSIVE);
1024                 if (error) {
1025                         m_freem(mrep);
1026                         return (error);
1027                 }
1028                 newvp = NFSTOV(np);
1029                 if (v3) {
1030                         nfsm_postop_attr(newvp, attrflag);
1031                         nfsm_postop_attr(dvp, attrflag);
1032                 } else
1033                         nfsm_loadattr(newvp, NULL);
1034                 *vpp = newvp;
1035                 m_freem(mrep);
1036                 cnp->cn_flags |= SAVENAME;
1037                 return (0);
1038         }
1039
1040         if (flags & ISDOTDOT) {
1041                 ltype = VOP_ISLOCKED(dvp);
1042                 error = vfs_busy(mp, MBF_NOWAIT);
1043                 if (error != 0) {
1044                         vfs_ref(mp);
1045                         VOP_UNLOCK(dvp, 0);
1046                         error = vfs_busy(mp, 0);
1047                         vn_lock(dvp, ltype | LK_RETRY);
1048                         vfs_rel(mp);
1049                         if (error == 0 && (dvp->v_iflag & VI_DOOMED)) {
1050                                 vfs_unbusy(mp);
1051                                 error = ENOENT;
1052                         }
1053                         if (error != 0) {
1054                                 m_freem(mrep);
1055                                 return (error);
1056                         }
1057                 }
1058                 VOP_UNLOCK(dvp, 0);
1059                 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags);
1060                 if (error == 0)
1061                         newvp = NFSTOV(np);
1062                 vfs_unbusy(mp);
1063                 if (newvp != dvp)
1064                         vn_lock(dvp, ltype | LK_RETRY);
1065                 if (dvp->v_iflag & VI_DOOMED) {
1066                         if (error == 0) {
1067                                 if (newvp == dvp)
1068                                         vrele(newvp);
1069                                 else
1070                                         vput(newvp);
1071                         }
1072                         error = ENOENT;
1073                 }
1074                 if (error) {
1075                         m_freem(mrep);
1076                         return (error);
1077                 }
1078         } else if (NFS_CMPFH(np, fhp, fhsize)) {
1079                 VREF(dvp);
1080                 newvp = dvp;
1081         } else {
1082                 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags);
1083                 if (error) {
1084                         m_freem(mrep);
1085                         return (error);
1086                 }
1087                 newvp = NFSTOV(np);
1088         }
1089         if (v3) {
1090                 nfsm_postop_attr(newvp, attrflag);
1091                 nfsm_postop_attr(dvp, attrflag);
1092         } else
1093                 nfsm_loadattr(newvp, NULL);
1094         if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1095                 cnp->cn_flags |= SAVENAME;
1096         if ((cnp->cn_flags & MAKEENTRY) &&
1097             (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
1098                 np->n_ctime = np->n_vattr.va_ctime.tv_sec;
1099                 cache_enter(dvp, newvp, cnp);
1100         }
1101         *vpp = newvp;
1102         m_freem(mrep);
1103 nfsmout:
1104         if (error) {
1105                 if (newvp != NULLVP) {
1106                         vput(newvp);
1107                         *vpp = NULLVP;
1108                 }
1109
1110                 if (error != ENOENT)
1111                         goto done;
1112
1113                 /* The requested file was not found. */
1114                 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
1115                     (flags & ISLASTCN)) {
1116                         /*
1117                          * XXX: UFS does a full VOP_ACCESS(dvp,
1118                          * VWRITE) here instead of just checking
1119                          * MNT_RDONLY.
1120                          */
1121                         if (mp->mnt_flag & MNT_RDONLY)
1122                                 return (EROFS);
1123                         cnp->cn_flags |= SAVENAME;
1124                         return (EJUSTRETURN);
1125                 }
1126
1127                 if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) {
1128                         /*
1129                          * Maintain n_dmtime as the modification time
1130                          * of the parent directory when the oldest -ve
1131                          * name cache entry for this directory was
1132                          * added.
1133                          */
1134                         mtx_lock(&np->n_mtx);
1135                         if (np->n_dmtime == 0)
1136                                 np->n_dmtime = np->n_vattr.va_mtime.tv_sec;
1137                         mtx_unlock(&np->n_mtx);
1138                         cache_enter(dvp, NULL, cnp);
1139                 }
1140                 return (ENOENT);
1141         }
1142 done:
1143         return (error);
1144 }
1145
1146 /*
1147  * nfs read call.
1148  * Just call nfs_bioread() to do the work.
1149  */
1150 static int
1151 nfs_read(struct vop_read_args *ap)
1152 {
1153         struct vnode *vp = ap->a_vp;
1154
1155         switch (vp->v_type) {
1156         case VREG:
1157                 return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
1158         case VDIR:
1159                 return (EISDIR);
1160         default:
1161                 return (EOPNOTSUPP);
1162         }
1163 }
1164
1165 /*
1166  * nfs readlink call
1167  */
1168 static int
1169 nfs_readlink(struct vop_readlink_args *ap)
1170 {
1171         struct vnode *vp = ap->a_vp;
1172
1173         if (vp->v_type != VLNK)
1174                 return (EINVAL);
1175         return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
1176 }
1177
1178 /*
1179  * Do a readlink rpc.
1180  * Called by nfs_doio() from below the buffer cache.
1181  */
1182 int
1183 nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1184 {
1185         caddr_t bpos, dpos;
1186         int error = 0, len, attrflag;
1187         struct mbuf *mreq, *mrep, *md, *mb;
1188         int v3 = NFS_ISV3(vp);
1189
1190         nfsstats.rpccnt[NFSPROC_READLINK]++;
1191         mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
1192         mb = mreq;
1193         bpos = mtod(mb, caddr_t);
1194         nfsm_fhtom(vp, v3);
1195         nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred);
1196         if (v3)
1197                 nfsm_postop_attr(vp, attrflag);
1198         if (!error) {
1199                 nfsm_strsiz(len, NFS_MAXPATHLEN);
1200                 if (len == NFS_MAXPATHLEN) {
1201                         struct nfsnode *np = VTONFS(vp);
1202                         mtx_lock(&np->n_mtx);
1203                         if (np->n_size && np->n_size < NFS_MAXPATHLEN)
1204                                 len = np->n_size;
1205                         mtx_unlock(&np->n_mtx);
1206                 }
1207                 nfsm_mtouio(uiop, len);
1208         }
1209         m_freem(mrep);
1210 nfsmout:
1211         return (error);
1212 }
1213
1214 /*
1215  * nfs read rpc call
1216  * Ditto above
1217  */
1218 int
1219 nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1220 {
1221         u_int32_t *tl;
1222         caddr_t bpos, dpos;
1223         struct mbuf *mreq, *mrep, *md, *mb;
1224         struct nfsmount *nmp;
1225         int error = 0, len, retlen, tsiz, eof, attrflag;
1226         int v3 = NFS_ISV3(vp);
1227         int rsize;
1228
1229 #ifndef nolint
1230         eof = 0;
1231 #endif
1232         nmp = VFSTONFS(vp->v_mount);
1233         tsiz = uiop->uio_resid;
1234         mtx_lock(&nmp->nm_mtx);
1235         if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
1236                 mtx_unlock(&nmp->nm_mtx);
1237                 return (EFBIG);
1238         }
1239         rsize = nmp->nm_rsize;
1240         mtx_unlock(&nmp->nm_mtx);
1241         while (tsiz > 0) {
1242                 nfsstats.rpccnt[NFSPROC_READ]++;
1243                 len = (tsiz > rsize) ? rsize : tsiz;
1244                 mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
1245                 mb = mreq;
1246                 bpos = mtod(mb, caddr_t);
1247                 nfsm_fhtom(vp, v3);
1248                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3);
1249                 if (v3) {
1250                         txdr_hyper(uiop->uio_offset, tl);
1251                         *(tl + 2) = txdr_unsigned(len);
1252                 } else {
1253                         *tl++ = txdr_unsigned(uiop->uio_offset);
1254                         *tl++ = txdr_unsigned(len);
1255                         *tl = 0;
1256                 }
1257                 nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred);
1258                 if (v3) {
1259                         nfsm_postop_attr(vp, attrflag);
1260                         if (error) {
1261                                 m_freem(mrep);
1262                                 goto nfsmout;
1263                         }
1264                         tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
1265                         eof = fxdr_unsigned(int, *(tl + 1));
1266                 } else {
1267                         nfsm_loadattr(vp, NULL);
1268                 }
1269                 nfsm_strsiz(retlen, rsize);
1270                 nfsm_mtouio(uiop, retlen);
1271                 m_freem(mrep);
1272                 tsiz -= retlen;
1273                 if (v3) {
1274                         if (eof || retlen == 0) {
1275                                 tsiz = 0;
1276                         }
1277                 } else if (retlen < len) {
1278                         tsiz = 0;
1279                 }
1280         }
1281 nfsmout:
1282         return (error);
1283 }
1284
1285 /*
1286  * nfs write call
1287  */
1288 int
1289 nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
1290              int *iomode, int *must_commit)
1291 {
1292         u_int32_t *tl;
1293         int32_t backup;
1294         caddr_t bpos, dpos;
1295         struct mbuf *mreq, *mrep, *md, *mb;
1296         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1297         int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1298         int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
1299         int wsize;
1300
1301 #ifndef DIAGNOSTIC
1302         if (uiop->uio_iovcnt != 1)
1303                 panic("nfs: writerpc iovcnt > 1");
1304 #endif
1305         *must_commit = 0;
1306         tsiz = uiop->uio_resid;
1307         mtx_lock(&nmp->nm_mtx);
1308         if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
1309                 mtx_unlock(&nmp->nm_mtx);
1310                 return (EFBIG);
1311         }
1312         wsize = nmp->nm_wsize;
1313         mtx_unlock(&nmp->nm_mtx);
1314         while (tsiz > 0) {
1315                 nfsstats.rpccnt[NFSPROC_WRITE]++;
1316                 len = (tsiz > wsize) ? wsize : tsiz;
1317                 mreq = nfsm_reqhead(vp, NFSPROC_WRITE,
1318                         NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
1319                 mb = mreq;
1320                 bpos = mtod(mb, caddr_t);
1321                 nfsm_fhtom(vp, v3);
1322                 if (v3) {
1323                         tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
1324                         txdr_hyper(uiop->uio_offset, tl);
1325                         tl += 2;
1326                         *tl++ = txdr_unsigned(len);
1327                         *tl++ = txdr_unsigned(*iomode);
1328                         *tl = txdr_unsigned(len);
1329                 } else {
1330                         u_int32_t x;
1331
1332                         tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
1333                         /* Set both "begin" and "current" to non-garbage. */
1334                         x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1335                         *tl++ = x;      /* "begin offset" */
1336                         *tl++ = x;      /* "current offset" */
1337                         x = txdr_unsigned(len);
1338                         *tl++ = x;      /* total to this offset */
1339                         *tl = x;        /* size of this write */
1340                 }
1341                 nfsm_uiotom(uiop, len);
1342                 nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred);
1343                 if (v3) {
1344                         wccflag = NFSV3_WCCCHK;
1345                         nfsm_wcc_data(vp, wccflag);
1346                         if (!error) {
1347                                 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED
1348                                         + NFSX_V3WRITEVERF);
1349                                 rlen = fxdr_unsigned(int, *tl++);
1350                                 if (rlen == 0) {
1351                                         error = NFSERR_IO;
1352                                         m_freem(mrep);
1353                                         break;
1354                                 } else if (rlen < len) {
1355                                         backup = len - rlen;
1356                                         uiop->uio_iov->iov_base =
1357                                             (char *)uiop->uio_iov->iov_base -
1358                                             backup;
1359                                         uiop->uio_iov->iov_len += backup;
1360                                         uiop->uio_offset -= backup;
1361                                         uiop->uio_resid += backup;
1362                                         len = rlen;
1363                                 }
1364                                 commit = fxdr_unsigned(int, *tl++);
1365
1366                                 /*
1367                                  * Return the lowest committment level
1368                                  * obtained by any of the RPCs.
1369                                  */
1370                                 if (committed == NFSV3WRITE_FILESYNC)
1371                                         committed = commit;
1372                                 else if (committed == NFSV3WRITE_DATASYNC &&
1373                                         commit == NFSV3WRITE_UNSTABLE)
1374                                         committed = commit;
1375                                 mtx_lock(&nmp->nm_mtx);
1376                                 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
1377                                     bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1378                                         NFSX_V3WRITEVERF);
1379                                     nmp->nm_state |= NFSSTA_HASWRITEVERF;
1380                                 } else if (bcmp((caddr_t)tl,
1381                                     (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
1382                                     *must_commit = 1;
1383                                     bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1384                                         NFSX_V3WRITEVERF);
1385                                 }
1386                                 mtx_unlock(&nmp->nm_mtx);
1387                         }
1388                 } else {
1389                         nfsm_loadattr(vp, NULL);
1390                 }
1391                 if (wccflag) {
1392                         mtx_lock(&(VTONFS(vp))->n_mtx);
1393                         VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
1394                         mtx_unlock(&(VTONFS(vp))->n_mtx);
1395                 }
1396                 m_freem(mrep);
1397                 if (error)
1398                         break;
1399                 tsiz -= len;
1400         }
1401 nfsmout:
1402         if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC)
1403                 committed = NFSV3WRITE_FILESYNC;
1404         *iomode = committed;
1405         if (error)
1406                 uiop->uio_resid = tsiz;
1407         return (error);
1408 }
1409
1410 /*
1411  * nfs mknod rpc
1412  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1413  * mode set to specify the file type and the size field for rdev.
1414  */
1415 static int
1416 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
1417     struct vattr *vap)
1418 {
1419         struct nfsv2_sattr *sp;
1420         u_int32_t *tl;
1421         struct vnode *newvp = NULL;
1422         struct nfsnode *np = NULL;
1423         struct vattr vattr;
1424         caddr_t bpos, dpos;
1425         int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1426         struct mbuf *mreq, *mrep, *md, *mb;
1427         u_int32_t rdev;
1428         int v3 = NFS_ISV3(dvp);
1429
1430         if (vap->va_type == VCHR || vap->va_type == VBLK)
1431                 rdev = txdr_unsigned(vap->va_rdev);
1432         else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
1433                 rdev = nfs_xdrneg1;
1434         else {
1435                 return (EOPNOTSUPP);
1436         }
1437         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
1438                 return (error);
1439         nfsstats.rpccnt[NFSPROC_MKNOD]++;
1440         mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
1441                 + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1442         mb = mreq;
1443         bpos = mtod(mb, caddr_t);
1444         nfsm_fhtom(dvp, v3);
1445         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1446         if (v3) {
1447                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
1448                 *tl++ = vtonfsv3_type(vap->va_type);
1449                 nfsm_v3attrbuild(vap, FALSE);
1450                 if (vap->va_type == VCHR || vap->va_type == VBLK) {
1451                         tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
1452                         *tl++ = txdr_unsigned(major(vap->va_rdev));
1453                         *tl = txdr_unsigned(minor(vap->va_rdev));
1454                 }
1455         } else {
1456                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1457                 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1458                 sp->sa_uid = nfs_xdrneg1;
1459                 sp->sa_gid = nfs_xdrneg1;
1460                 sp->sa_size = rdev;
1461                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1462                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1463         }
1464         nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred);
1465         if (!error) {
1466                 nfsm_mtofh(dvp, newvp, v3, gotvp);
1467                 if (!gotvp) {
1468                         if (newvp) {
1469                                 vput(newvp);
1470                                 newvp = NULL;
1471                         }
1472                         error = nfs_lookitup(dvp, cnp->cn_nameptr,
1473                             cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
1474                         if (!error)
1475                                 newvp = NFSTOV(np);
1476                 }
1477         }
1478         if (v3)
1479                 nfsm_wcc_data(dvp, wccflag);
1480         m_freem(mrep);
1481 nfsmout:
1482         if (error) {
1483                 if (newvp)
1484                         vput(newvp);
1485         } else {
1486                 if (cnp->cn_flags & MAKEENTRY)
1487                         cache_enter(dvp, newvp, cnp);
1488                 *vpp = newvp;
1489         }
1490         mtx_lock(&(VTONFS(dvp))->n_mtx);
1491         VTONFS(dvp)->n_flag |= NMODIFIED;
1492         if (!wccflag) {
1493                 VTONFS(dvp)->n_attrstamp = 0;
1494                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
1495         }
1496         mtx_unlock(&(VTONFS(dvp))->n_mtx);
1497         return (error);
1498 }
1499
1500 /*
1501  * nfs mknod vop
1502  * just call nfs_mknodrpc() to do the work.
1503  */
1504 /* ARGSUSED */
1505 static int
1506 nfs_mknod(struct vop_mknod_args *ap)
1507 {
1508         return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
1509 }
1510
1511 static u_long create_verf;
1512 /*
1513  * nfs file create call
1514  */
1515 static int
1516 nfs_create(struct vop_create_args *ap)
1517 {
1518         struct vnode *dvp = ap->a_dvp;
1519         struct vattr *vap = ap->a_vap;
1520         struct componentname *cnp = ap->a_cnp;
1521         struct nfsv2_sattr *sp;
1522         u_int32_t *tl;
1523         struct nfsnode *np = NULL;
1524         struct vnode *newvp = NULL;
1525         caddr_t bpos, dpos;
1526         int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1527         struct mbuf *mreq, *mrep, *md, *mb;
1528         struct vattr vattr;
1529         int v3 = NFS_ISV3(dvp);
1530
1531         /*
1532          * Oops, not for me..
1533          */
1534         if (vap->va_type == VSOCK)
1535                 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
1536
1537         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
1538                 return (error);
1539         if (vap->va_vaflags & VA_EXCLUSIVE)
1540                 fmode |= O_EXCL;
1541 again:
1542         nfsstats.rpccnt[NFSPROC_CREATE]++;
1543         mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
1544                 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1545         mb = mreq;
1546         bpos = mtod(mb, caddr_t);
1547         nfsm_fhtom(dvp, v3);
1548         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1549         if (v3) {
1550                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
1551                 if (fmode & O_EXCL) {
1552                         *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1553                         tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF);
1554 #ifdef INET
1555                         IN_IFADDR_RLOCK();
1556                         if (!TAILQ_EMPTY(&V_in_ifaddrhead))
1557                                 *tl++ = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr.s_addr;
1558                         else
1559 #endif
1560                                 *tl++ = create_verf;
1561 #ifdef INET
1562                         IN_IFADDR_RUNLOCK();
1563 #endif
1564                         *tl = ++create_verf;
1565                 } else {
1566                         *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1567                         nfsm_v3attrbuild(vap, FALSE);
1568                 }
1569         } else {
1570                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1571                 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1572                 sp->sa_uid = nfs_xdrneg1;
1573                 sp->sa_gid = nfs_xdrneg1;
1574                 sp->sa_size = 0;
1575                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1576                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1577         }
1578         nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred);
1579         if (!error) {
1580                 nfsm_mtofh(dvp, newvp, v3, gotvp);
1581                 if (!gotvp) {
1582                         if (newvp) {
1583                                 vput(newvp);
1584                                 newvp = NULL;
1585                         }
1586                         error = nfs_lookitup(dvp, cnp->cn_nameptr,
1587                             cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
1588                         if (!error)
1589                                 newvp = NFSTOV(np);
1590                 }
1591         }
1592         if (v3)
1593                 nfsm_wcc_data(dvp, wccflag);
1594         m_freem(mrep);
1595 nfsmout:
1596         if (error) {
1597                 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1598                         fmode &= ~O_EXCL;
1599                         goto again;
1600                 }
1601                 if (newvp)
1602                         vput(newvp);
1603         } else if (v3 && (fmode & O_EXCL)) {
1604                 /*
1605                  * We are normally called with only a partially initialized
1606                  * VAP.  Since the NFSv3 spec says that server may use the
1607                  * file attributes to store the verifier, the spec requires
1608                  * us to do a SETATTR RPC. FreeBSD servers store the verifier
1609                  * in atime, but we can't really assume that all servers will
1610                  * so we ensure that our SETATTR sets both atime and mtime.
1611                  */
1612                 if (vap->va_mtime.tv_sec == VNOVAL)
1613                         vfs_timestamp(&vap->va_mtime);
1614                 if (vap->va_atime.tv_sec == VNOVAL)
1615                         vap->va_atime = vap->va_mtime;
1616                 error = nfs_setattrrpc(newvp, vap, cnp->cn_cred);
1617                 if (error)
1618                         vput(newvp);
1619         }
1620         if (!error) {
1621                 if (cnp->cn_flags & MAKEENTRY)
1622                         cache_enter(dvp, newvp, cnp);
1623                 *ap->a_vpp = newvp;
1624         }
1625         mtx_lock(&(VTONFS(dvp))->n_mtx);
1626         VTONFS(dvp)->n_flag |= NMODIFIED;
1627         if (!wccflag) {
1628                 VTONFS(dvp)->n_attrstamp = 0;
1629                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
1630         }
1631         mtx_unlock(&(VTONFS(dvp))->n_mtx);
1632         return (error);
1633 }
1634
1635 /*
1636  * nfs file remove call
1637  * To try and make nfs semantics closer to ufs semantics, a file that has
1638  * other processes using the vnode is renamed instead of removed and then
1639  * removed later on the last close.
1640  * - If v_usecount > 1
1641  *        If a rename is not already in the works
1642  *           call nfs_sillyrename() to set it up
1643  *     else
1644  *        do the remove rpc
1645  */
1646 static int
1647 nfs_remove(struct vop_remove_args *ap)
1648 {
1649         struct vnode *vp = ap->a_vp;
1650         struct vnode *dvp = ap->a_dvp;
1651         struct componentname *cnp = ap->a_cnp;
1652         struct nfsnode *np = VTONFS(vp);
1653         int error = 0;
1654         struct vattr vattr;
1655
1656 #ifndef DIAGNOSTIC
1657         if ((cnp->cn_flags & HASBUF) == 0)
1658                 panic("nfs_remove: no name");
1659         if (vrefcnt(vp) < 1)
1660                 panic("nfs_remove: bad v_usecount");
1661 #endif
1662         if (vp->v_type == VDIR)
1663                 error = EPERM;
1664         else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
1665             !VOP_GETATTR(vp, &vattr, cnp->cn_cred) && vattr.va_nlink > 1)) {
1666                 /*
1667                  * Purge the name cache so that the chance of a lookup for
1668                  * the name succeeding while the remove is in progress is
1669                  * minimized. Without node locking it can still happen, such
1670                  * that an I/O op returns ESTALE, but since you get this if
1671                  * another host removes the file..
1672                  */
1673                 cache_purge(vp);
1674                 /*
1675                  * throw away biocache buffers, mainly to avoid
1676                  * unnecessary delayed writes later.
1677                  */
1678                 error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1);
1679                 /* Do the rpc */
1680                 if (error != EINTR && error != EIO)
1681                         error = nfs_removerpc(dvp, cnp->cn_nameptr,
1682                                 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
1683                 /*
1684                  * Kludge City: If the first reply to the remove rpc is lost..
1685                  *   the reply to the retransmitted request will be ENOENT
1686                  *   since the file was in fact removed
1687                  *   Therefore, we cheat and return success.
1688                  */
1689                 if (error == ENOENT)
1690                         error = 0;
1691         } else if (!np->n_sillyrename)
1692                 error = nfs_sillyrename(dvp, vp, cnp);
1693         np->n_attrstamp = 0;
1694         KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
1695         return (error);
1696 }
1697
1698 /*
1699  * nfs file remove rpc called from nfs_inactive
1700  */
1701 int
1702 nfs_removeit(struct sillyrename *sp)
1703 {
1704         /*
1705          * Make sure that the directory vnode is still valid.
1706          * XXX we should lock sp->s_dvp here.
1707          */
1708         if (sp->s_dvp->v_type == VBAD)
1709                 return (0);
1710         return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
1711                 NULL));
1712 }
1713
1714 /*
1715  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1716  */
1717 static int
1718 nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
1719     struct ucred *cred, struct thread *td)
1720 {
1721         caddr_t bpos, dpos;
1722         int error = 0, wccflag = NFSV3_WCCRATTR;
1723         struct mbuf *mreq, *mrep, *md, *mb;
1724         int v3 = NFS_ISV3(dvp);
1725
1726         nfsstats.rpccnt[NFSPROC_REMOVE]++;
1727         mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE,
1728                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
1729         mb = mreq;
1730         bpos = mtod(mb, caddr_t);
1731         nfsm_fhtom(dvp, v3);
1732         nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
1733         nfsm_request(dvp, NFSPROC_REMOVE, td, cred);
1734         if (v3)
1735                 nfsm_wcc_data(dvp, wccflag);
1736         m_freem(mrep);
1737 nfsmout:
1738         mtx_lock(&(VTONFS(dvp))->n_mtx);
1739         VTONFS(dvp)->n_flag |= NMODIFIED;
1740         if (!wccflag) {
1741                 VTONFS(dvp)->n_attrstamp = 0;
1742                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
1743         }
1744         mtx_unlock(&(VTONFS(dvp))->n_mtx);
1745         return (error);
1746 }
1747
1748 /*
1749  * nfs file rename call
1750  */
1751 static int
1752 nfs_rename(struct vop_rename_args *ap)
1753 {
1754         struct vnode *fvp = ap->a_fvp;
1755         struct vnode *tvp = ap->a_tvp;
1756         struct vnode *fdvp = ap->a_fdvp;
1757         struct vnode *tdvp = ap->a_tdvp;
1758         struct componentname *tcnp = ap->a_tcnp;
1759         struct componentname *fcnp = ap->a_fcnp;
1760         int error;
1761
1762 #ifndef DIAGNOSTIC
1763         if ((tcnp->cn_flags & HASBUF) == 0 ||
1764             (fcnp->cn_flags & HASBUF) == 0)
1765                 panic("nfs_rename: no name");
1766 #endif
1767         /* Check for cross-device rename */
1768         if ((fvp->v_mount != tdvp->v_mount) ||
1769             (tvp && (fvp->v_mount != tvp->v_mount))) {
1770                 error = EXDEV;
1771                 goto out;
1772         }
1773
1774         if (fvp == tvp) {
1775                 nfs_printf("nfs_rename: fvp == tvp (can't happen)\n");
1776                 error = 0;
1777                 goto out;
1778         }
1779         if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
1780                 goto out;
1781
1782         /*
1783          * We have to flush B_DELWRI data prior to renaming
1784          * the file.  If we don't, the delayed-write buffers
1785          * can be flushed out later after the file has gone stale
1786          * under NFSV3.  NFSV2 does not have this problem because
1787          * ( as far as I can tell ) it flushes dirty buffers more
1788          * often.
1789          *
1790          * Skip the rename operation if the fsync fails, this can happen
1791          * due to the server's volume being full, when we pushed out data
1792          * that was written back to our cache earlier. Not checking for
1793          * this condition can result in potential (silent) data loss.
1794          */
1795         error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
1796         VOP_UNLOCK(fvp, 0);
1797         if (!error && tvp)
1798                 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
1799         if (error)
1800                 goto out;
1801
1802         /*
1803          * If the tvp exists and is in use, sillyrename it before doing the
1804          * rename of the new file over it.
1805          * XXX Can't sillyrename a directory.
1806          */
1807         if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
1808                 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
1809                 vput(tvp);
1810                 tvp = NULL;
1811         }
1812
1813         error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
1814                 tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
1815                 tcnp->cn_thread);
1816
1817         if (fvp->v_type == VDIR) {
1818                 if (tvp != NULL && tvp->v_type == VDIR)
1819                         cache_purge(tdvp);
1820                 cache_purge(fdvp);
1821         }
1822
1823 out:
1824         if (tdvp == tvp)
1825                 vrele(tdvp);
1826         else
1827                 vput(tdvp);
1828         if (tvp)
1829                 vput(tvp);
1830         vrele(fdvp);
1831         vrele(fvp);
1832         /*
1833          * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
1834          */
1835         if (error == ENOENT)
1836                 error = 0;
1837         return (error);
1838 }
1839
1840 /*
1841  * nfs file rename rpc called from nfs_remove() above
1842  */
1843 static int
1844 nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
1845     struct sillyrename *sp)
1846 {
1847
1848         return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp,
1849             sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread));
1850 }
1851
1852 /*
1853  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
1854  */
1855 static int
1856 nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen,
1857     struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred,
1858     struct thread *td)
1859 {
1860         caddr_t bpos, dpos;
1861         int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
1862         struct mbuf *mreq, *mrep, *md, *mb;
1863         int v3 = NFS_ISV3(fdvp);
1864
1865         nfsstats.rpccnt[NFSPROC_RENAME]++;
1866         mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME,
1867                 (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
1868                 nfsm_rndup(tnamelen));
1869         mb = mreq;
1870         bpos = mtod(mb, caddr_t);
1871         nfsm_fhtom(fdvp, v3);
1872         nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
1873         nfsm_fhtom(tdvp, v3);
1874         nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
1875         nfsm_request(fdvp, NFSPROC_RENAME, td, cred);
1876         if (v3) {
1877                 nfsm_wcc_data(fdvp, fwccflag);
1878                 nfsm_wcc_data(tdvp, twccflag);
1879         }
1880         m_freem(mrep);
1881 nfsmout:
1882         mtx_lock(&(VTONFS(fdvp))->n_mtx);
1883         VTONFS(fdvp)->n_flag |= NMODIFIED;
1884         mtx_unlock(&(VTONFS(fdvp))->n_mtx);
1885         mtx_lock(&(VTONFS(tdvp))->n_mtx);
1886         VTONFS(tdvp)->n_flag |= NMODIFIED;
1887         mtx_unlock(&(VTONFS(tdvp))->n_mtx);
1888         if (!fwccflag) {
1889                 VTONFS(fdvp)->n_attrstamp = 0;
1890                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp);
1891         }
1892         if (!twccflag) {
1893                 VTONFS(tdvp)->n_attrstamp = 0;
1894                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
1895         }
1896         return (error);
1897 }
1898
1899 /*
1900  * nfs hard link create call
1901  */
1902 static int
1903 nfs_link(struct vop_link_args *ap)
1904 {
1905         struct vnode *vp = ap->a_vp;
1906         struct vnode *tdvp = ap->a_tdvp;
1907         struct componentname *cnp = ap->a_cnp;
1908         caddr_t bpos, dpos;
1909         int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
1910         struct mbuf *mreq, *mrep, *md, *mb;
1911         int v3;
1912
1913         if (vp->v_mount != tdvp->v_mount) {
1914                 return (EXDEV);
1915         }
1916
1917         /*
1918          * Push all writes to the server, so that the attribute cache
1919          * doesn't get "out of sync" with the server.
1920          * XXX There should be a better way!
1921          */
1922         VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
1923
1924         v3 = NFS_ISV3(vp);
1925         nfsstats.rpccnt[NFSPROC_LINK]++;
1926         mreq = nfsm_reqhead(vp, NFSPROC_LINK,
1927                 NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1928         mb = mreq;
1929         bpos = mtod(mb, caddr_t);
1930         nfsm_fhtom(vp, v3);
1931         nfsm_fhtom(tdvp, v3);
1932         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1933         nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred);
1934         if (v3) {
1935                 nfsm_postop_attr(vp, attrflag);
1936                 nfsm_wcc_data(tdvp, wccflag);
1937         }
1938         m_freem(mrep);
1939 nfsmout:
1940         mtx_lock(&(VTONFS(tdvp))->n_mtx);
1941         VTONFS(tdvp)->n_flag |= NMODIFIED;
1942         mtx_unlock(&(VTONFS(tdvp))->n_mtx);
1943         if (!attrflag) {
1944                 VTONFS(vp)->n_attrstamp = 0;
1945                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
1946         }
1947         if (!wccflag) {
1948                 VTONFS(tdvp)->n_attrstamp = 0;
1949                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
1950         }
1951         return (error);
1952 }
1953
1954 /*
1955  * nfs symbolic link create call
1956  */
1957 static int
1958 nfs_symlink(struct vop_symlink_args *ap)
1959 {
1960         struct vnode *dvp = ap->a_dvp;
1961         struct vattr *vap = ap->a_vap;
1962         struct componentname *cnp = ap->a_cnp;
1963         struct nfsv2_sattr *sp;
1964         caddr_t bpos, dpos;
1965         int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
1966         struct mbuf *mreq, *mrep, *md, *mb;
1967         struct vnode *newvp = NULL;
1968         int v3 = NFS_ISV3(dvp);
1969
1970         nfsstats.rpccnt[NFSPROC_SYMLINK]++;
1971         slen = strlen(ap->a_target);
1972         mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
1973             nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
1974         mb = mreq;
1975         bpos = mtod(mb, caddr_t);
1976         nfsm_fhtom(dvp, v3);
1977         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1978         if (v3) {
1979                 nfsm_v3attrbuild(vap, FALSE);
1980         }
1981         nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
1982         if (!v3) {
1983                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1984                 sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
1985                 sp->sa_uid = nfs_xdrneg1;
1986                 sp->sa_gid = nfs_xdrneg1;
1987                 sp->sa_size = nfs_xdrneg1;
1988                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1989                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1990         }
1991
1992         /*
1993          * Issue the NFS request and get the rpc response.
1994          *
1995          * Only NFSv3 responses returning an error of 0 actually return
1996          * a file handle that can be converted into newvp without having
1997          * to do an extra lookup rpc.
1998          */
1999         nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred);
2000         if (v3) {
2001                 if (error == 0)
2002                         nfsm_mtofh(dvp, newvp, v3, gotvp);
2003                 nfsm_wcc_data(dvp, wccflag);
2004         }
2005
2006         /*
2007          * out code jumps -> here, mrep is also freed.
2008          */
2009
2010         m_freem(mrep);
2011 nfsmout:
2012
2013         /*
2014          * If we do not have an error and we could not extract the newvp from
2015          * the response due to the request being NFSv2, we have to do a
2016          * lookup in order to obtain a newvp to return.
2017          */
2018         if (error == 0 && newvp == NULL) {
2019                 struct nfsnode *np = NULL;
2020
2021                 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
2022                     cnp->cn_cred, cnp->cn_thread, &np);
2023                 if (!error)
2024                         newvp = NFSTOV(np);
2025         }
2026         if (error) {
2027                 if (newvp)
2028                         vput(newvp);
2029         } else {
2030                 *ap->a_vpp = newvp;
2031         }
2032         mtx_lock(&(VTONFS(dvp))->n_mtx);
2033         VTONFS(dvp)->n_flag |= NMODIFIED;
2034         mtx_unlock(&(VTONFS(dvp))->n_mtx);
2035         if (!wccflag) {
2036                 VTONFS(dvp)->n_attrstamp = 0;
2037                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
2038         }
2039         return (error);
2040 }
2041
2042 /*
2043  * nfs make dir call
2044  */
2045 static int
2046 nfs_mkdir(struct vop_mkdir_args *ap)
2047 {
2048         struct vnode *dvp = ap->a_dvp;
2049         struct vattr *vap = ap->a_vap;
2050         struct componentname *cnp = ap->a_cnp;
2051         struct nfsv2_sattr *sp;
2052         int len;
2053         struct nfsnode *np = NULL;
2054         struct vnode *newvp = NULL;
2055         caddr_t bpos, dpos;
2056         int error = 0, wccflag = NFSV3_WCCRATTR;
2057         int gotvp = 0;
2058         struct mbuf *mreq, *mrep, *md, *mb;
2059         struct vattr vattr;
2060         int v3 = NFS_ISV3(dvp);
2061
2062         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
2063                 return (error);
2064         len = cnp->cn_namelen;
2065         nfsstats.rpccnt[NFSPROC_MKDIR]++;
2066         mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR,
2067           NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
2068         mb = mreq;
2069         bpos = mtod(mb, caddr_t);
2070         nfsm_fhtom(dvp, v3);
2071         nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
2072         if (v3) {
2073                 nfsm_v3attrbuild(vap, FALSE);
2074         } else {
2075                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
2076                 sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
2077                 sp->sa_uid = nfs_xdrneg1;
2078                 sp->sa_gid = nfs_xdrneg1;
2079                 sp->sa_size = nfs_xdrneg1;
2080                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2081                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2082         }
2083         nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred);
2084         if (!error)
2085                 nfsm_mtofh(dvp, newvp, v3, gotvp);
2086         if (v3)
2087                 nfsm_wcc_data(dvp, wccflag);
2088         m_freem(mrep);
2089 nfsmout:
2090         mtx_lock(&(VTONFS(dvp))->n_mtx);
2091         VTONFS(dvp)->n_flag |= NMODIFIED;
2092         mtx_unlock(&(VTONFS(dvp))->n_mtx);
2093         if (!wccflag) {
2094                 VTONFS(dvp)->n_attrstamp = 0;
2095                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
2096         }
2097         if (error == 0 && newvp == NULL) {
2098                 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
2099                         cnp->cn_thread, &np);
2100                 if (!error) {
2101                         newvp = NFSTOV(np);
2102                         if (newvp->v_type != VDIR)
2103                                 error = EEXIST;
2104                 }
2105         }
2106         if (error) {
2107                 if (newvp)
2108                         vput(newvp);
2109         } else
2110                 *ap->a_vpp = newvp;
2111         return (error);
2112 }
2113
2114 /*
2115  * nfs remove directory call
2116  */
2117 static int
2118 nfs_rmdir(struct vop_rmdir_args *ap)
2119 {
2120         struct vnode *vp = ap->a_vp;
2121         struct vnode *dvp = ap->a_dvp;
2122         struct componentname *cnp = ap->a_cnp;
2123         caddr_t bpos, dpos;
2124         int error = 0, wccflag = NFSV3_WCCRATTR;
2125         struct mbuf *mreq, *mrep, *md, *mb;
2126         int v3 = NFS_ISV3(dvp);
2127
2128         if (dvp == vp)
2129                 return (EINVAL);
2130         nfsstats.rpccnt[NFSPROC_RMDIR]++;
2131         mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR,
2132                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
2133         mb = mreq;
2134         bpos = mtod(mb, caddr_t);
2135         nfsm_fhtom(dvp, v3);
2136         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
2137         nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred);
2138         if (v3)
2139                 nfsm_wcc_data(dvp, wccflag);
2140         m_freem(mrep);
2141 nfsmout:
2142         mtx_lock(&(VTONFS(dvp))->n_mtx);
2143         VTONFS(dvp)->n_flag |= NMODIFIED;
2144         mtx_unlock(&(VTONFS(dvp))->n_mtx);
2145         if (!wccflag) {
2146                 VTONFS(dvp)->n_attrstamp = 0;
2147                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
2148         }
2149         cache_purge(dvp);
2150         cache_purge(vp);
2151         /*
2152          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2153          */
2154         if (error == ENOENT)
2155                 error = 0;
2156         return (error);
2157 }
2158
2159 /*
2160  * nfs readdir call
2161  */
2162 static int
2163 nfs_readdir(struct vop_readdir_args *ap)
2164 {
2165         struct vnode *vp = ap->a_vp;
2166         struct nfsnode *np = VTONFS(vp);
2167         struct uio *uio = ap->a_uio;
2168         int tresid, error = 0;
2169         struct vattr vattr;
2170
2171         if (vp->v_type != VDIR)
2172                 return(EPERM);
2173
2174         /*
2175          * First, check for hit on the EOF offset cache
2176          */
2177         if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
2178             (np->n_flag & NMODIFIED) == 0) {
2179                 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) {
2180                         mtx_lock(&np->n_mtx);
2181                         if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
2182                                 mtx_unlock(&np->n_mtx);
2183                                 nfsstats.direofcache_hits++;
2184                                 goto out;
2185                         } else
2186                                 mtx_unlock(&np->n_mtx);
2187                 }
2188         }
2189
2190         /*
2191          * Call nfs_bioread() to do the real work.
2192          */
2193         tresid = uio->uio_resid;
2194         error = nfs_bioread(vp, uio, 0, ap->a_cred);
2195
2196         if (!error && uio->uio_resid == tresid) {
2197                 nfsstats.direofcache_misses++;
2198         }
2199 out:
2200         return (error);
2201 }
2202
2203 /*
2204  * Readdir rpc call.
2205  * Called from below the buffer cache by nfs_doio().
2206  */
2207 int
2208 nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
2209 {
2210         int len, left;
2211         struct dirent *dp = NULL;
2212         u_int32_t *tl;
2213         caddr_t cp;
2214         nfsuint64 *cookiep;
2215         caddr_t bpos, dpos;
2216         struct mbuf *mreq, *mrep, *md, *mb;
2217         nfsuint64 cookie;
2218         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2219         struct nfsnode *dnp = VTONFS(vp);
2220         u_quad_t fileno;
2221         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2222         int attrflag;
2223         int v3 = NFS_ISV3(vp);
2224
2225 #ifndef DIAGNOSTIC
2226         if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2227                 (uiop->uio_resid & (DIRBLKSIZ - 1)))
2228                 panic("nfs readdirrpc bad uio");
2229 #endif
2230
2231         /*
2232          * If there is no cookie, assume directory was stale.
2233          */
2234         nfs_dircookie_lock(dnp);
2235         cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2236         if (cookiep) {
2237                 cookie = *cookiep;
2238                 nfs_dircookie_unlock(dnp);
2239         } else {
2240                 nfs_dircookie_unlock(dnp);
2241                 return (NFSERR_BAD_COOKIE);
2242         }
2243
2244         /*
2245          * Loop around doing readdir rpc's of size nm_readdirsize
2246          * truncated to a multiple of DIRBLKSIZ.
2247          * The stopping criteria is EOF or buffer full.
2248          */
2249         while (more_dirs && bigenough) {
2250                 nfsstats.rpccnt[NFSPROC_READDIR]++;
2251                 mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
2252                         NFSX_READDIR(v3));
2253                 mb = mreq;
2254                 bpos = mtod(mb, caddr_t);
2255                 nfsm_fhtom(vp, v3);
2256                 if (v3) {
2257                         tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
2258                         *tl++ = cookie.nfsuquad[0];
2259                         *tl++ = cookie.nfsuquad[1];
2260                         mtx_lock(&dnp->n_mtx);
2261                         *tl++ = dnp->n_cookieverf.nfsuquad[0];
2262                         *tl++ = dnp->n_cookieverf.nfsuquad[1];
2263                         mtx_unlock(&dnp->n_mtx);
2264                 } else {
2265                         tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
2266                         *tl++ = cookie.nfsuquad[0];
2267                 }
2268                 *tl = txdr_unsigned(nmp->nm_readdirsize);
2269                 nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred);
2270                 if (v3) {
2271                         nfsm_postop_attr(vp, attrflag);
2272                         if (!error) {
2273                                 tl = nfsm_dissect(u_int32_t *,
2274                                     2 * NFSX_UNSIGNED);
2275                                 mtx_lock(&dnp->n_mtx);
2276                                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
2277                                 dnp->n_cookieverf.nfsuquad[1] = *tl;
2278                                 mtx_unlock(&dnp->n_mtx);
2279                         } else {
2280                                 m_freem(mrep);
2281                                 goto nfsmout;
2282                         }
2283                 }
2284                 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2285                 more_dirs = fxdr_unsigned(int, *tl);
2286
2287                 /* loop thru the dir entries, doctoring them to 4bsd form */
2288                 while (more_dirs && bigenough) {
2289                         if (v3) {
2290                                 tl = nfsm_dissect(u_int32_t *,
2291                                     3 * NFSX_UNSIGNED);
2292                                 fileno = fxdr_hyper(tl);
2293                                 len = fxdr_unsigned(int, *(tl + 2));
2294                         } else {
2295                                 tl = nfsm_dissect(u_int32_t *,
2296                                     2 * NFSX_UNSIGNED);
2297                                 fileno = fxdr_unsigned(u_quad_t, *tl++);
2298                                 len = fxdr_unsigned(int, *tl);
2299                         }
2300                         if (len <= 0 || len > NFS_MAXNAMLEN) {
2301                                 error = EBADRPC;
2302                                 m_freem(mrep);
2303                                 goto nfsmout;
2304                         }
2305                         tlen = nfsm_rndup(len);
2306                         if (tlen == len)
2307                                 tlen += 4;      /* To ensure null termination */
2308                         left = DIRBLKSIZ - blksiz;
2309                         if ((tlen + DIRHDSIZ) > left) {
2310                                 dp->d_reclen += left;
2311                                 uiop->uio_iov->iov_base =
2312                                     (char *)uiop->uio_iov->iov_base + left;
2313                                 uiop->uio_iov->iov_len -= left;
2314                                 uiop->uio_offset += left;
2315                                 uiop->uio_resid -= left;
2316                                 blksiz = 0;
2317                         }
2318                         if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2319                                 bigenough = 0;
2320                         if (bigenough) {
2321                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
2322                                 dp->d_fileno = (int)fileno;
2323                                 dp->d_namlen = len;
2324                                 dp->d_reclen = tlen + DIRHDSIZ;
2325                                 dp->d_type = DT_UNKNOWN;
2326                                 blksiz += dp->d_reclen;
2327                                 if (blksiz == DIRBLKSIZ)
2328                                         blksiz = 0;
2329                                 uiop->uio_offset += DIRHDSIZ;
2330                                 uiop->uio_resid -= DIRHDSIZ;
2331                                 uiop->uio_iov->iov_base =
2332                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
2333                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
2334                                 nfsm_mtouio(uiop, len);
2335                                 cp = uiop->uio_iov->iov_base;
2336                                 tlen -= len;
2337                                 *cp = '\0';     /* null terminate */
2338                                 uiop->uio_iov->iov_base =
2339                                     (char *)uiop->uio_iov->iov_base + tlen;
2340                                 uiop->uio_iov->iov_len -= tlen;
2341                                 uiop->uio_offset += tlen;
2342                                 uiop->uio_resid -= tlen;
2343                         } else
2344                                 nfsm_adv(nfsm_rndup(len));
2345                         if (v3) {
2346                                 tl = nfsm_dissect(u_int32_t *,
2347                                     3 * NFSX_UNSIGNED);
2348                         } else {
2349                                 tl = nfsm_dissect(u_int32_t *,
2350                                     2 * NFSX_UNSIGNED);
2351                         }
2352                         if (bigenough) {
2353                                 cookie.nfsuquad[0] = *tl++;
2354                                 if (v3)
2355                                         cookie.nfsuquad[1] = *tl++;
2356                         } else if (v3)
2357                                 tl += 2;
2358                         else
2359                                 tl++;
2360                         more_dirs = fxdr_unsigned(int, *tl);
2361                 }
2362                 /*
2363                  * If at end of rpc data, get the eof boolean
2364                  */
2365                 if (!more_dirs) {
2366                         tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2367                         more_dirs = (fxdr_unsigned(int, *tl) == 0);
2368                 }
2369                 m_freem(mrep);
2370         }
2371         /*
2372          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2373          * by increasing d_reclen for the last record.
2374          */
2375         if (blksiz > 0) {
2376                 left = DIRBLKSIZ - blksiz;
2377                 dp->d_reclen += left;
2378                 uiop->uio_iov->iov_base =
2379                     (char *)uiop->uio_iov->iov_base + left;
2380                 uiop->uio_iov->iov_len -= left;
2381                 uiop->uio_offset += left;
2382                 uiop->uio_resid -= left;
2383         }
2384
2385         /*
2386          * We are now either at the end of the directory or have filled the
2387          * block.
2388          */
2389         if (bigenough)
2390                 dnp->n_direofoffset = uiop->uio_offset;
2391         else {
2392                 if (uiop->uio_resid > 0)
2393                         nfs_printf("EEK! readdirrpc resid > 0\n");
2394                 nfs_dircookie_lock(dnp);
2395                 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2396                 *cookiep = cookie;
2397                 nfs_dircookie_unlock(dnp);
2398         }
2399 nfsmout:
2400         return (error);
2401 }
2402
2403 /*
2404  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2405  */
2406 int
2407 nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
2408 {
2409         int len, left;
2410         struct dirent *dp;
2411         u_int32_t *tl;
2412         caddr_t cp;
2413         struct vnode *newvp;
2414         nfsuint64 *cookiep;
2415         caddr_t bpos, dpos, dpossav1, dpossav2;
2416         struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2;
2417         struct nameidata nami, *ndp = &nami;
2418         struct componentname *cnp = &ndp->ni_cnd;
2419         nfsuint64 cookie;
2420         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2421         struct nfsnode *dnp = VTONFS(vp), *np;
2422         nfsfh_t *fhp;
2423         u_quad_t fileno;
2424         int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2425         int attrflag, fhsize;
2426
2427 #ifndef nolint
2428         dp = NULL;
2429 #endif
2430 #ifndef DIAGNOSTIC
2431         if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2432                 (uiop->uio_resid & (DIRBLKSIZ - 1)))
2433                 panic("nfs readdirplusrpc bad uio");
2434 #endif
2435         ndp->ni_dvp = vp;
2436         newvp = NULLVP;
2437
2438         /*
2439          * If there is no cookie, assume directory was stale.
2440          */
2441         nfs_dircookie_lock(dnp);
2442         cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2443         if (cookiep) {
2444                 cookie = *cookiep;
2445                 nfs_dircookie_unlock(dnp);
2446         } else {
2447                 nfs_dircookie_unlock(dnp);
2448                 return (NFSERR_BAD_COOKIE);
2449         }
2450         /*
2451          * Loop around doing readdir rpc's of size nm_readdirsize
2452          * truncated to a multiple of DIRBLKSIZ.
2453          * The stopping criteria is EOF or buffer full.
2454          */
2455         while (more_dirs && bigenough) {
2456                 nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2457                 mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
2458                         NFSX_FH(1) + 6 * NFSX_UNSIGNED);
2459                 mb = mreq;
2460                 bpos = mtod(mb, caddr_t);
2461                 nfsm_fhtom(vp, 1);
2462                 tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED);
2463                 *tl++ = cookie.nfsuquad[0];
2464                 *tl++ = cookie.nfsuquad[1];
2465                 mtx_lock(&dnp->n_mtx);
2466                 *tl++ = dnp->n_cookieverf.nfsuquad[0];
2467                 *tl++ = dnp->n_cookieverf.nfsuquad[1];
2468                 mtx_unlock(&dnp->n_mtx);
2469                 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
2470                 *tl = txdr_unsigned(nmp->nm_rsize);
2471                 nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred);
2472                 nfsm_postop_attr(vp, attrflag);
2473                 if (error) {
2474                         m_freem(mrep);
2475                         goto nfsmout;
2476                 }
2477                 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2478                 mtx_lock(&dnp->n_mtx);
2479                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
2480                 dnp->n_cookieverf.nfsuquad[1] = *tl++;
2481                 mtx_unlock(&dnp->n_mtx);
2482                 more_dirs = fxdr_unsigned(int, *tl);
2483
2484                 /* loop thru the dir entries, doctoring them to 4bsd form */
2485                 while (more_dirs && bigenough) {
2486                         tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2487                         fileno = fxdr_hyper(tl);
2488                         len = fxdr_unsigned(int, *(tl + 2));
2489                         if (len <= 0 || len > NFS_MAXNAMLEN) {
2490                                 error = EBADRPC;
2491                                 m_freem(mrep);
2492                                 goto nfsmout;
2493                         }
2494                         tlen = nfsm_rndup(len);
2495                         if (tlen == len)
2496                                 tlen += 4;      /* To ensure null termination*/
2497                         left = DIRBLKSIZ - blksiz;
2498                         if ((tlen + DIRHDSIZ) > left) {
2499                                 dp->d_reclen += left;
2500                                 uiop->uio_iov->iov_base =
2501                                     (char *)uiop->uio_iov->iov_base + left;
2502                                 uiop->uio_iov->iov_len -= left;
2503                                 uiop->uio_offset += left;
2504                                 uiop->uio_resid -= left;
2505                                 blksiz = 0;
2506                         }
2507                         if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2508                                 bigenough = 0;
2509                         if (bigenough) {
2510                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
2511                                 dp->d_fileno = (int)fileno;
2512                                 dp->d_namlen = len;
2513                                 dp->d_reclen = tlen + DIRHDSIZ;
2514                                 dp->d_type = DT_UNKNOWN;
2515                                 blksiz += dp->d_reclen;
2516                                 if (blksiz == DIRBLKSIZ)
2517                                         blksiz = 0;
2518                                 uiop->uio_offset += DIRHDSIZ;
2519                                 uiop->uio_resid -= DIRHDSIZ;
2520                                 uiop->uio_iov->iov_base =
2521                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
2522                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
2523                                 cnp->cn_nameptr = uiop->uio_iov->iov_base;
2524                                 cnp->cn_namelen = len;
2525                                 nfsm_mtouio(uiop, len);
2526                                 cp = uiop->uio_iov->iov_base;
2527                                 tlen -= len;
2528                                 *cp = '\0';
2529                                 uiop->uio_iov->iov_base =
2530                                     (char *)uiop->uio_iov->iov_base + tlen;
2531                                 uiop->uio_iov->iov_len -= tlen;
2532                                 uiop->uio_offset += tlen;
2533                                 uiop->uio_resid -= tlen;
2534                         } else
2535                                 nfsm_adv(nfsm_rndup(len));
2536                         tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2537                         if (bigenough) {
2538                                 cookie.nfsuquad[0] = *tl++;
2539                                 cookie.nfsuquad[1] = *tl++;
2540                         } else
2541                                 tl += 2;
2542
2543                         /*
2544                          * Since the attributes are before the file handle
2545                          * (sigh), we must skip over the attributes and then
2546                          * come back and get them.
2547                          */
2548                         attrflag = fxdr_unsigned(int, *tl);
2549                         if (attrflag) {
2550                             dpossav1 = dpos;
2551                             mdsav1 = md;
2552                             nfsm_adv(NFSX_V3FATTR);
2553                             tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2554                             doit = fxdr_unsigned(int, *tl);
2555                             /*
2556                              * Skip loading the attrs for "..". There's a
2557                              * race between loading the attrs here and
2558                              * lookups that look for the directory currently
2559                              * being read (in the parent). We try to acquire
2560                              * the exclusive lock on ".." here, owning the
2561                              * lock on the directory being read. Lookup will
2562                              * hold the lock on ".." and try to acquire the
2563                              * lock on the directory being read.
2564                              *
2565                              * There are other ways of fixing this, one would
2566                              * be to do a trylock on the ".." vnode and skip
2567                              * loading the attrs on ".." if it happens to be
2568                              * locked by another process. But skipping the
2569                              * attrload on ".." seems the easiest option.
2570                              */
2571                             if (strcmp(dp->d_name, "..") == 0) {
2572                                     doit = 0;
2573                                     /*
2574                                      * We've already skipped over the attrs,
2575                                      * skip over the filehandle. And store d_type
2576                                      * as VDIR.
2577                                      */
2578                                     tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2579                                     i = fxdr_unsigned(int, *tl);
2580                                     nfsm_adv(nfsm_rndup(i));
2581                                     dp->d_type = IFTODT(VTTOIF(VDIR));
2582                             }
2583                             if (doit) {
2584                                 nfsm_getfh(fhp, fhsize, 1);
2585                                 if (NFS_CMPFH(dnp, fhp, fhsize)) {
2586                                     VREF(vp);
2587                                     newvp = vp;
2588                                     np = dnp;
2589                                 } else {
2590                                     error = nfs_nget(vp->v_mount, fhp,
2591                                         fhsize, &np, LK_EXCLUSIVE);
2592                                     if (error)
2593                                         doit = 0;
2594                                     else
2595                                         newvp = NFSTOV(np);
2596                                 }
2597                             }
2598                             if (doit && bigenough) {
2599                                 dpossav2 = dpos;
2600                                 dpos = dpossav1;
2601                                 mdsav2 = md;
2602                                 md = mdsav1;
2603                                 nfsm_loadattr(newvp, NULL);
2604                                 dpos = dpossav2;
2605                                 md = mdsav2;
2606                                 dp->d_type =
2607                                     IFTODT(VTTOIF(np->n_vattr.va_type));
2608                                 ndp->ni_vp = newvp;
2609                                 /* Update n_ctime, so subsequent lookup doesn't purge entry */
2610                                 np->n_ctime = np->n_vattr.va_ctime.tv_sec;
2611                                 cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
2612                             }
2613                         } else {
2614                             /* Just skip over the file handle */
2615                             tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2616                             i = fxdr_unsigned(int, *tl);
2617                             if (i) {
2618                                     tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2619                                     fhsize = fxdr_unsigned(int, *tl);
2620                                     nfsm_adv(nfsm_rndup(fhsize));
2621                             }
2622                         }
2623                         if (newvp != NULLVP) {
2624                             if (newvp == vp)
2625                                 vrele(newvp);
2626                             else
2627                                 vput(newvp);
2628                             newvp = NULLVP;
2629                         }
2630                         tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2631                         more_dirs = fxdr_unsigned(int, *tl);
2632                 }
2633                 /*
2634                  * If at end of rpc data, get the eof boolean
2635                  */
2636                 if (!more_dirs) {
2637                         tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2638                         more_dirs = (fxdr_unsigned(int, *tl) == 0);
2639                 }
2640                 m_freem(mrep);
2641         }
2642         /*
2643          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2644          * by increasing d_reclen for the last record.
2645          */
2646         if (blksiz > 0) {
2647                 left = DIRBLKSIZ - blksiz;
2648                 dp->d_reclen += left;
2649                 uiop->uio_iov->iov_base =
2650                     (char *)uiop->uio_iov->iov_base + left;
2651                 uiop->uio_iov->iov_len -= left;
2652                 uiop->uio_offset += left;
2653                 uiop->uio_resid -= left;
2654         }
2655
2656         /*
2657          * We are now either at the end of the directory or have filled the
2658          * block.
2659          */
2660         if (bigenough)
2661                 dnp->n_direofoffset = uiop->uio_offset;
2662         else {
2663                 if (uiop->uio_resid > 0)
2664                         nfs_printf("EEK! readdirplusrpc resid > 0\n");
2665                 nfs_dircookie_lock(dnp);
2666                 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2667                 *cookiep = cookie;
2668                 nfs_dircookie_unlock(dnp);
2669         }
2670 nfsmout:
2671         if (newvp != NULLVP) {
2672                 if (newvp == vp)
2673                         vrele(newvp);
2674                 else
2675                         vput(newvp);
2676                 newvp = NULLVP;
2677         }
2678         return (error);
2679 }
2680
2681 /*
2682  * Silly rename. To make the NFS filesystem that is stateless look a little
2683  * more like the "ufs" a remove of an active vnode is translated to a rename
2684  * to a funny looking filename that is removed by nfs_inactive on the
2685  * nfsnode. There is the potential for another process on a different client
2686  * to create the same funny name between the nfs_lookitup() fails and the
2687  * nfs_rename() completes, but...
2688  */
2689 static int
2690 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
2691 {
2692         struct sillyrename *sp;
2693         struct nfsnode *np;
2694         int error;
2695         short pid;
2696         unsigned int lticks;
2697
2698         cache_purge(dvp);
2699         np = VTONFS(vp);
2700 #ifndef DIAGNOSTIC
2701         if (vp->v_type == VDIR)
2702                 panic("nfs: sillyrename dir");
2703 #endif
2704         sp = malloc(sizeof (struct sillyrename),
2705                 M_NFSREQ, M_WAITOK);
2706         sp->s_cred = crhold(cnp->cn_cred);
2707         sp->s_dvp = dvp;
2708         sp->s_removeit = nfs_removeit;
2709         VREF(dvp);
2710
2711         /*
2712          * Fudge together a funny name.
2713          * Changing the format of the funny name to accomodate more
2714          * sillynames per directory.
2715          * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is
2716          * CPU ticks since boot.
2717          */
2718         pid = cnp->cn_thread->td_proc->p_pid;
2719         lticks = (unsigned int)ticks;
2720         for ( ; ; ) {
2721                 sp->s_namlen = sprintf(sp->s_name,
2722                                        ".nfs.%08x.%04x4.4", lticks,
2723                                        pid);
2724                 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2725                                  cnp->cn_thread, NULL))
2726                         break;
2727                 lticks++;
2728         }
2729         error = nfs_renameit(dvp, cnp, sp);
2730         if (error)
2731                 goto bad;
2732         error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2733                 cnp->cn_thread, &np);
2734         np->n_sillyrename = sp;
2735         return (0);
2736 bad:
2737         vrele(sp->s_dvp);
2738         crfree(sp->s_cred);
2739         free((caddr_t)sp, M_NFSREQ);
2740         return (error);
2741 }
2742
2743 /*
2744  * Look up a file name and optionally either update the file handle or
2745  * allocate an nfsnode, depending on the value of npp.
2746  * npp == NULL  --> just do the lookup
2747  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
2748  *                      handled too
2749  * *npp != NULL --> update the file handle in the vnode
2750  */
2751 static int
2752 nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
2753     struct thread *td, struct nfsnode **npp)
2754 {
2755         struct vnode *newvp = NULL;
2756         struct nfsnode *np, *dnp = VTONFS(dvp);
2757         caddr_t bpos, dpos;
2758         int error = 0, fhlen, attrflag;
2759         struct mbuf *mreq, *mrep, *md, *mb;
2760         nfsfh_t *nfhp;
2761         int v3 = NFS_ISV3(dvp);
2762
2763         nfsstats.rpccnt[NFSPROC_LOOKUP]++;
2764         mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
2765                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
2766         mb = mreq;
2767         bpos = mtod(mb, caddr_t);
2768         nfsm_fhtom(dvp, v3);
2769         nfsm_strtom(name, len, NFS_MAXNAMLEN);
2770         nfsm_request(dvp, NFSPROC_LOOKUP, td, cred);
2771         if (npp && !error) {
2772                 nfsm_getfh(nfhp, fhlen, v3);
2773                 if (*npp) {
2774                     np = *npp;
2775                     if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
2776                         free((caddr_t)np->n_fhp, M_NFSBIGFH);
2777                         np->n_fhp = &np->n_fh;
2778                     } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
2779                         np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK);
2780                     bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
2781                     np->n_fhsize = fhlen;
2782                     newvp = NFSTOV(np);
2783                 } else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
2784                     VREF(dvp);
2785                     newvp = dvp;
2786                 } else {
2787                     error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
2788                     if (error) {
2789                         m_freem(mrep);
2790                         return (error);
2791                     }
2792                     newvp = NFSTOV(np);
2793                 }
2794                 if (v3) {
2795                         nfsm_postop_attr(newvp, attrflag);
2796                         if (!attrflag && *npp == NULL) {
2797                                 m_freem(mrep);
2798                                 if (newvp == dvp)
2799                                         vrele(newvp);
2800                                 else
2801                                         vput(newvp);
2802                                 return (ENOENT);
2803                         }
2804                 } else
2805                         nfsm_loadattr(newvp, NULL);
2806         }
2807         m_freem(mrep);
2808 nfsmout:
2809         if (npp && *npp == NULL) {
2810                 if (error) {
2811                         if (newvp) {
2812                                 if (newvp == dvp)
2813                                         vrele(newvp);
2814                                 else
2815                                         vput(newvp);
2816                         }
2817                 } else
2818                         *npp = np;
2819         }
2820         return (error);
2821 }
2822
2823 /*
2824  * Nfs Version 3 commit rpc
2825  */
2826 int
2827 nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
2828            struct thread *td)
2829 {
2830         u_int32_t *tl;
2831         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2832         caddr_t bpos, dpos;
2833         int error = 0, wccflag = NFSV3_WCCRATTR;
2834         struct mbuf *mreq, *mrep, *md, *mb;
2835
2836         mtx_lock(&nmp->nm_mtx);
2837         if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
2838                 mtx_unlock(&nmp->nm_mtx);
2839                 return (0);
2840         }
2841         mtx_unlock(&nmp->nm_mtx);
2842         nfsstats.rpccnt[NFSPROC_COMMIT]++;
2843         mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
2844         mb = mreq;
2845         bpos = mtod(mb, caddr_t);
2846         nfsm_fhtom(vp, 1);
2847         tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED);
2848         txdr_hyper(offset, tl);
2849         tl += 2;
2850         *tl = txdr_unsigned(cnt);
2851         nfsm_request(vp, NFSPROC_COMMIT, td, cred);
2852         nfsm_wcc_data(vp, wccflag);
2853         if (!error) {
2854                 tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF);
2855                 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
2856                         NFSX_V3WRITEVERF)) {
2857                         bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
2858                                 NFSX_V3WRITEVERF);
2859                         error = NFSERR_STALEWRITEVERF;
2860                 }
2861         }
2862         m_freem(mrep);
2863 nfsmout:
2864         return (error);
2865 }
2866
2867 /*
2868  * Strategy routine.
2869  * For async requests when nfsiod(s) are running, queue the request by
2870  * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
2871  * request.
2872  */
2873 static int
2874 nfs_strategy(struct vop_strategy_args *ap)
2875 {
2876         struct buf *bp = ap->a_bp;
2877         struct ucred *cr;
2878
2879         KASSERT(!(bp->b_flags & B_DONE),
2880             ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
2881         BUF_ASSERT_HELD(bp);
2882
2883         if (bp->b_iocmd == BIO_READ)
2884                 cr = bp->b_rcred;
2885         else
2886                 cr = bp->b_wcred;
2887
2888         /*
2889          * If the op is asynchronous and an i/o daemon is waiting
2890          * queue the request, wake it up and wait for completion
2891          * otherwise just do it ourselves.
2892          */
2893         if ((bp->b_flags & B_ASYNC) == 0 ||
2894             nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread))
2895                 (void)nfs_doio(ap->a_vp, bp, cr, curthread);
2896         return (0);
2897 }
2898
2899 /*
2900  * fsync vnode op. Just call nfs_flush() with commit == 1.
2901  */
2902 /* ARGSUSED */
2903 static int
2904 nfs_fsync(struct vop_fsync_args *ap)
2905 {
2906
2907         return (nfs_flush(ap->a_vp, ap->a_waitfor, 1));
2908 }
2909
2910 /*
2911  * Flush all the blocks associated with a vnode.
2912  *      Walk through the buffer pool and push any dirty pages
2913  *      associated with the vnode.
2914  */
2915 static int
2916 nfs_flush(struct vnode *vp, int waitfor, int commit)
2917 {
2918         struct nfsnode *np = VTONFS(vp);
2919         struct buf *bp;
2920         int i;
2921         struct buf *nbp;
2922         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2923         int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
2924         int passone = 1;
2925         u_quad_t off, endoff, toff;
2926         struct ucred* wcred = NULL;
2927         struct buf **bvec = NULL;
2928         struct bufobj *bo;
2929         struct thread *td = curthread;
2930 #ifndef NFS_COMMITBVECSIZ
2931 #define NFS_COMMITBVECSIZ       20
2932 #endif
2933         struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
2934         int bvecsize = 0, bveccount;
2935
2936         if (nmp->nm_flag & NFSMNT_INT)
2937                 slpflag = NFS_PCATCH;
2938         if (!commit)
2939                 passone = 0;
2940         bo = &vp->v_bufobj;
2941         /*
2942          * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
2943          * server, but has not been committed to stable storage on the server
2944          * yet. On the first pass, the byte range is worked out and the commit
2945          * rpc is done. On the second pass, nfs_writebp() is called to do the
2946          * job.
2947          */
2948 again:
2949         off = (u_quad_t)-1;
2950         endoff = 0;
2951         bvecpos = 0;
2952         if (NFS_ISV3(vp) && commit) {
2953                 if (bvec != NULL && bvec != bvec_on_stack)
2954                         free(bvec, M_TEMP);
2955                 /*
2956                  * Count up how many buffers waiting for a commit.
2957                  */
2958                 bveccount = 0;
2959                 BO_LOCK(bo);
2960                 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
2961                         if (!BUF_ISLOCKED(bp) &&
2962                             (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
2963                                 == (B_DELWRI | B_NEEDCOMMIT))
2964                                 bveccount++;
2965                 }
2966                 /*
2967                  * Allocate space to remember the list of bufs to commit.  It is
2968                  * important to use M_NOWAIT here to avoid a race with nfs_write.
2969                  * If we can't get memory (for whatever reason), we will end up
2970                  * committing the buffers one-by-one in the loop below.
2971                  */
2972                 if (bveccount > NFS_COMMITBVECSIZ) {
2973                         /*
2974                          * Release the vnode interlock to avoid a lock
2975                          * order reversal.
2976                          */
2977                         BO_UNLOCK(bo);
2978                         bvec = (struct buf **)
2979                                 malloc(bveccount * sizeof(struct buf *),
2980                                        M_TEMP, M_NOWAIT);
2981                         BO_LOCK(bo);
2982                         if (bvec == NULL) {
2983                                 bvec = bvec_on_stack;
2984                                 bvecsize = NFS_COMMITBVECSIZ;
2985                         } else
2986                                 bvecsize = bveccount;
2987                 } else {
2988                         bvec = bvec_on_stack;
2989                         bvecsize = NFS_COMMITBVECSIZ;
2990                 }
2991                 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
2992                         if (bvecpos >= bvecsize)
2993                                 break;
2994                         if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
2995                                 nbp = TAILQ_NEXT(bp, b_bobufs);
2996                                 continue;
2997                         }
2998                         if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
2999                             (B_DELWRI | B_NEEDCOMMIT)) {
3000                                 BUF_UNLOCK(bp);
3001                                 nbp = TAILQ_NEXT(bp, b_bobufs);
3002                                 continue;
3003                         }
3004                         BO_UNLOCK(bo);
3005                         bremfree(bp);
3006                         /*
3007                          * Work out if all buffers are using the same cred
3008                          * so we can deal with them all with one commit.
3009                          *
3010                          * NOTE: we are not clearing B_DONE here, so we have
3011                          * to do it later on in this routine if we intend to
3012                          * initiate I/O on the bp.
3013                          *
3014                          * Note: to avoid loopback deadlocks, we do not
3015                          * assign b_runningbufspace.
3016                          */
3017                         if (wcred == NULL)
3018                                 wcred = bp->b_wcred;
3019                         else if (wcred != bp->b_wcred)
3020                                 wcred = NOCRED;
3021                         vfs_busy_pages(bp, 1);
3022
3023                         BO_LOCK(bo);
3024                         /*
3025                          * bp is protected by being locked, but nbp is not
3026                          * and vfs_busy_pages() may sleep.  We have to
3027                          * recalculate nbp.
3028                          */
3029                         nbp = TAILQ_NEXT(bp, b_bobufs);
3030
3031                         /*
3032                          * A list of these buffers is kept so that the
3033                          * second loop knows which buffers have actually
3034                          * been committed. This is necessary, since there
3035                          * may be a race between the commit rpc and new
3036                          * uncommitted writes on the file.
3037                          */
3038                         bvec[bvecpos++] = bp;
3039                         toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
3040                                 bp->b_dirtyoff;
3041                         if (toff < off)
3042                                 off = toff;
3043                         toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
3044                         if (toff > endoff)
3045                                 endoff = toff;
3046                 }
3047                 BO_UNLOCK(bo);
3048         }
3049         if (bvecpos > 0) {
3050                 /*
3051                  * Commit data on the server, as required.
3052                  * If all bufs are using the same wcred, then use that with
3053                  * one call for all of them, otherwise commit each one
3054                  * separately.
3055                  */
3056                 if (wcred != NOCRED)
3057                         retv = nfs_commit(vp, off, (int)(endoff - off),
3058                                           wcred, td);
3059                 else {
3060                         retv = 0;
3061                         for (i = 0; i < bvecpos; i++) {
3062                                 off_t off, size;
3063                                 bp = bvec[i];
3064                                 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
3065                                         bp->b_dirtyoff;
3066                                 size = (u_quad_t)(bp->b_dirtyend
3067                                                   - bp->b_dirtyoff);
3068                                 retv = nfs_commit(vp, off, (int)size,
3069                                                   bp->b_wcred, td);
3070                                 if (retv) break;
3071                         }
3072                 }
3073
3074                 if (retv == NFSERR_STALEWRITEVERF)
3075                         nfs_clearcommit(vp->v_mount);
3076
3077                 /*
3078                  * Now, either mark the blocks I/O done or mark the
3079                  * blocks dirty, depending on whether the commit
3080                  * succeeded.
3081                  */
3082                 for (i = 0; i < bvecpos; i++) {
3083                         bp = bvec[i];
3084                         bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
3085                         if (retv) {
3086                                 /*
3087                                  * Error, leave B_DELWRI intact
3088                                  */
3089                                 vfs_unbusy_pages(bp);
3090                                 brelse(bp);
3091                         } else {
3092                                 /*
3093                                  * Success, remove B_DELWRI ( bundirty() ).
3094                                  *
3095                                  * b_dirtyoff/b_dirtyend seem to be NFS
3096                                  * specific.  We should probably move that
3097                                  * into bundirty(). XXX
3098                                  */
3099                                 bufobj_wref(bo);
3100                                 bp->b_flags |= B_ASYNC;
3101                                 bundirty(bp);
3102                                 bp->b_flags &= ~B_DONE;
3103                                 bp->b_ioflags &= ~BIO_ERROR;
3104                                 bp->b_dirtyoff = bp->b_dirtyend = 0;
3105                                 bufdone(bp);
3106                         }
3107                 }
3108         }
3109
3110         /*
3111          * Start/do any write(s) that are required.
3112          */
3113 loop:
3114         BO_LOCK(bo);
3115         TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
3116                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
3117                         if (waitfor != MNT_WAIT || passone)
3118                                 continue;
3119
3120                         error = BUF_TIMELOCK(bp,
3121                             LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
3122                             BO_MTX(bo), "nfsfsync", slpflag, slptimeo);
3123                         if (error == 0) {
3124                                 BUF_UNLOCK(bp);
3125                                 goto loop;
3126                         }
3127                         if (error == ENOLCK) {
3128                                 error = 0;
3129                                 goto loop;
3130                         }
3131                         if (nfs_sigintr(nmp, td)) {
3132                                 error = EINTR;
3133                                 goto done;
3134                         }
3135                         if (slpflag & PCATCH) {
3136                                 slpflag = 0;
3137                                 slptimeo = 2 * hz;
3138                         }
3139                         goto loop;
3140                 }
3141                 if ((bp->b_flags & B_DELWRI) == 0)
3142                         panic("nfs_fsync: not dirty");
3143                 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
3144                         BUF_UNLOCK(bp);
3145                         continue;
3146                 }
3147                 BO_UNLOCK(bo);
3148                 bremfree(bp);
3149                 if (passone || !commit)
3150                     bp->b_flags |= B_ASYNC;
3151                 else
3152                     bp->b_flags |= B_ASYNC;
3153                 bwrite(bp);
3154                 if (nfs_sigintr(nmp, td)) {
3155                         error = EINTR;
3156                         goto done;
3157                 }
3158                 goto loop;
3159         }
3160         if (passone) {
3161                 passone = 0;
3162                 BO_UNLOCK(bo);
3163                 goto again;
3164         }
3165         if (waitfor == MNT_WAIT) {
3166                 while (bo->bo_numoutput) {
3167                         error = bufobj_wwait(bo, slpflag, slptimeo);
3168                         if (error) {
3169                             BO_UNLOCK(bo);
3170                             error = nfs_sigintr(nmp, td);
3171                             if (error)
3172                                 goto done;
3173                             if (slpflag & PCATCH) {
3174                                 slpflag = 0;
3175                                 slptimeo = 2 * hz;
3176                             }
3177                             BO_LOCK(bo);
3178                         }
3179                 }
3180                 if (bo->bo_dirty.bv_cnt != 0 && commit) {
3181                         BO_UNLOCK(bo);
3182                         goto loop;
3183                 }
3184                 /*
3185                  * Wait for all the async IO requests to drain
3186                  */
3187                 BO_UNLOCK(bo);
3188                 mtx_lock(&np->n_mtx);
3189                 while (np->n_directio_asyncwr > 0) {
3190                         np->n_flag |= NFSYNCWAIT;
3191                         error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr,
3192                                            &np->n_mtx, slpflag | (PRIBIO + 1),
3193                                            "nfsfsync", 0);
3194                         if (error) {
3195                                 if (nfs_sigintr(nmp, td)) {
3196                                         mtx_unlock(&np->n_mtx);
3197                                         error = EINTR;
3198                                         goto done;
3199                                 }
3200                         }
3201                 }
3202                 mtx_unlock(&np->n_mtx);
3203         } else
3204                 BO_UNLOCK(bo);
3205         mtx_lock(&np->n_mtx);
3206         if (np->n_flag & NWRITEERR) {
3207                 error = np->n_error;
3208                 np->n_flag &= ~NWRITEERR;
3209         }
3210         if (commit && bo->bo_dirty.bv_cnt == 0 &&
3211             bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
3212                 np->n_flag &= ~NMODIFIED;
3213         mtx_unlock(&np->n_mtx);
3214 done:
3215         if (bvec != NULL && bvec != bvec_on_stack)
3216                 free(bvec, M_TEMP);
3217         return (error);
3218 }
3219
3220 /*
3221  * NFS advisory byte-level locks.
3222  */
3223 static int
3224 nfs_advlock(struct vop_advlock_args *ap)
3225 {
3226         struct vnode *vp = ap->a_vp;
3227         u_quad_t size;
3228         int error;
3229
3230         error = vn_lock(vp, LK_SHARED);
3231         if (error)
3232                 return (error);
3233         if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
3234                 size = VTONFS(vp)->n_size;
3235                 VOP_UNLOCK(vp, 0);
3236                 error = lf_advlock(ap, &(vp->v_lockf), size);
3237         } else {
3238                 if (nfs_advlock_p)
3239                         error = nfs_advlock_p(ap);
3240                 else
3241                         error = ENOLCK;
3242         }
3243
3244         return (error);
3245 }
3246
3247 /*
3248  * NFS advisory byte-level locks.
3249  */
3250 static int
3251 nfs_advlockasync(struct vop_advlockasync_args *ap)
3252 {
3253         struct vnode *vp = ap->a_vp;
3254         u_quad_t size;
3255         int error;
3256
3257         error = vn_lock(vp, LK_SHARED);
3258         if (error)
3259                 return (error);
3260         if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
3261                 size = VTONFS(vp)->n_size;
3262                 VOP_UNLOCK(vp, 0);
3263                 error = lf_advlockasync(ap, &(vp->v_lockf), size);
3264         } else {
3265                 VOP_UNLOCK(vp, 0);
3266                 error = EOPNOTSUPP;
3267         }
3268         return (error);
3269 }
3270
3271 /*
3272  * Print out the contents of an nfsnode.
3273  */
3274 static int
3275 nfs_print(struct vop_print_args *ap)
3276 {
3277         struct vnode *vp = ap->a_vp;
3278         struct nfsnode *np = VTONFS(vp);
3279
3280         nfs_printf("\tfileid %ld fsid 0x%x",
3281            np->n_vattr.va_fileid, np->n_vattr.va_fsid);
3282         if (vp->v_type == VFIFO)
3283                 fifo_printinfo(vp);
3284         printf("\n");
3285         return (0);
3286 }
3287
3288 /*
3289  * This is the "real" nfs::bwrite(struct buf*).
3290  * We set B_CACHE if this is a VMIO buffer.
3291  */
3292 int
3293 nfs_writebp(struct buf *bp, int force __unused, struct thread *td)
3294 {
3295         int s;
3296         int oldflags = bp->b_flags;
3297 #if 0
3298         int retv = 1;
3299         off_t off;
3300 #endif
3301
3302         BUF_ASSERT_HELD(bp);
3303
3304         if (bp->b_flags & B_INVAL) {
3305                 brelse(bp);
3306                 return(0);
3307         }
3308
3309         bp->b_flags |= B_CACHE;
3310
3311         /*
3312          * Undirty the bp.  We will redirty it later if the I/O fails.
3313          */
3314
3315         s = splbio();
3316         bundirty(bp);
3317         bp->b_flags &= ~B_DONE;
3318         bp->b_ioflags &= ~BIO_ERROR;
3319         bp->b_iocmd = BIO_WRITE;
3320
3321         bufobj_wref(bp->b_bufobj);
3322         curthread->td_ru.ru_oublock++;
3323         splx(s);
3324
3325         /*
3326          * Note: to avoid loopback deadlocks, we do not
3327          * assign b_runningbufspace.
3328          */
3329         vfs_busy_pages(bp, 1);
3330
3331         BUF_KERNPROC(bp);
3332         bp->b_iooffset = dbtob(bp->b_blkno);
3333         bstrategy(bp);
3334
3335         if( (oldflags & B_ASYNC) == 0) {
3336                 int rtval = bufwait(bp);
3337
3338                 if (oldflags & B_DELWRI) {
3339                         s = splbio();
3340                         reassignbuf(bp);
3341                         splx(s);
3342                 }
3343                 brelse(bp);
3344                 return (rtval);
3345         }
3346
3347         return (0);
3348 }
3349
3350 /*
3351  * nfs special file access vnode op.
3352  * Essentially just get vattr and then imitate iaccess() since the device is
3353  * local to the client.
3354  */
3355 static int
3356 nfsspec_access(struct vop_access_args *ap)
3357 {
3358         struct vattr *vap;
3359         struct ucred *cred = ap->a_cred;
3360         struct vnode *vp = ap->a_vp;
3361         accmode_t accmode = ap->a_accmode;
3362         struct vattr vattr;
3363         int error;
3364
3365         /*
3366          * Disallow write attempts on filesystems mounted read-only;
3367          * unless the file is a socket, fifo, or a block or character
3368          * device resident on the filesystem.
3369          */
3370         if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3371                 switch (vp->v_type) {
3372                 case VREG:
3373                 case VDIR:
3374                 case VLNK:
3375                         return (EROFS);
3376                 default:
3377                         break;
3378                 }
3379         }
3380         vap = &vattr;
3381         error = VOP_GETATTR(vp, vap, cred);
3382         if (error)
3383                 goto out;
3384         error  = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
3385                          accmode, cred, NULL);
3386 out:
3387         return error;
3388 }
3389
3390 /*
3391  * Read wrapper for fifos.
3392  */
3393 static int
3394 nfsfifo_read(struct vop_read_args *ap)
3395 {
3396         struct nfsnode *np = VTONFS(ap->a_vp);
3397         int error;
3398
3399         /*
3400          * Set access flag.
3401          */
3402         mtx_lock(&np->n_mtx);
3403         np->n_flag |= NACC;
3404         getnanotime(&np->n_atim);
3405         mtx_unlock(&np->n_mtx);
3406         error = fifo_specops.vop_read(ap);
3407         return error;
3408 }
3409
3410 /*
3411  * Write wrapper for fifos.
3412  */
3413 static int
3414 nfsfifo_write(struct vop_write_args *ap)
3415 {
3416         struct nfsnode *np = VTONFS(ap->a_vp);
3417
3418         /*
3419          * Set update flag.
3420          */
3421         mtx_lock(&np->n_mtx);
3422         np->n_flag |= NUPD;
3423         getnanotime(&np->n_mtim);
3424         mtx_unlock(&np->n_mtx);
3425         return(fifo_specops.vop_write(ap));
3426 }
3427
3428 /*
3429  * Close wrapper for fifos.
3430  *
3431  * Update the times on the nfsnode then do fifo close.
3432  */
3433 static int
3434 nfsfifo_close(struct vop_close_args *ap)
3435 {
3436         struct vnode *vp = ap->a_vp;
3437         struct nfsnode *np = VTONFS(vp);
3438         struct vattr vattr;
3439         struct timespec ts;
3440
3441         mtx_lock(&np->n_mtx);
3442         if (np->n_flag & (NACC | NUPD)) {
3443                 getnanotime(&ts);
3444                 if (np->n_flag & NACC)
3445                         np->n_atim = ts;
3446                 if (np->n_flag & NUPD)
3447                         np->n_mtim = ts;
3448                 np->n_flag |= NCHG;
3449                 if (vrefcnt(vp) == 1 &&
3450                     (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3451                         VATTR_NULL(&vattr);
3452                         if (np->n_flag & NACC)
3453                                 vattr.va_atime = np->n_atim;
3454                         if (np->n_flag & NUPD)
3455                                 vattr.va_mtime = np->n_mtim;
3456                         mtx_unlock(&np->n_mtx);
3457                         (void)VOP_SETATTR(vp, &vattr, ap->a_cred);
3458                         goto out;
3459                 }
3460         }
3461         mtx_unlock(&np->n_mtx);
3462 out:
3463         return (fifo_specops.vop_close(ap));
3464 }
3465
3466 /*
3467  * Just call nfs_writebp() with the force argument set to 1.
3468  *
3469  * NOTE: B_DONE may or may not be set in a_bp on call.
3470  */
3471 static int
3472 nfs_bwrite(struct buf *bp)
3473 {
3474
3475         return (nfs_writebp(bp, 1, curthread));
3476 }
3477
3478 struct buf_ops buf_ops_nfs = {
3479         .bop_name       =       "buf_ops_nfs",
3480         .bop_write      =       nfs_bwrite,
3481         .bop_strategy   =       bufstrategy,
3482         .bop_sync       =       bufsync,
3483         .bop_bdflush    =       bufbdflush,
3484 };