2 * Copyright (c) 1997-2006 Erez Zadok
3 * Copyright (c) 1990 Jan-Simon Pendry
4 * Copyright (c) 1990 Imperial College of Science, Technology & Medicine
5 * Copyright (c) 1990 The Regents of the University of California.
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry at Imperial College, London.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgment:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * File: am-utils/amd/nfs_subr.c
46 #endif /* HAVE_CONFIG_H */
51 * Convert from UN*X to NFS error code.
52 * Some systems like linux define their own (see
53 * conf/mount/mount_linux.h).
56 # define nfs_error(e) ((nfsstat)(e))
57 #endif /* nfs_error */
60 * File Handle structure
62 * This is interpreted by indexing the exported array
63 * by fhh_id (for old-style filehandles), or by retrieving
64 * the node name from fhh_path (for new-style filehandles).
66 * The whole structure is mapped onto a standard fhandle_t
70 u_int fhh_gen; /* generation number */
73 int fhh_type; /* old or new am_fh */
74 pid_t fhh_pid; /* process id */
75 int fhh_id; /* map id */
77 char fhh_path[NFS_FHSIZE-sizeof(u_int)]; /* path to am_node */
82 /* forward declarations */
83 /* converting am-filehandles to mount-points */
84 static am_node *fh_to_mp3(am_nfs_fh *fhp, int *rp, int vop);
85 static am_node *fh_to_mp(am_nfs_fh *fhp);
86 static void count_map_entries(const am_node *mp, u_int *out_blocks, u_int *out_bfree, u_int *out_bavail);
90 do_readlink(am_node *mp, int *error_return)
95 * If there is a readlink method then use it,
96 * otherwise if a link exists use that,
97 * otherwise use the mount point.
99 if (mp->am_mnt->mf_ops->readlink) {
101 mp = (*mp->am_mnt->mf_ops->readlink) (mp, &retry);
103 *error_return = retry;
106 /* reschedule_timeout_mp(); */
112 ln = mp->am_mnt->mf_mount;
120 nfsproc_null_2_svc(voidp argp, struct svc_req *rqstp)
129 nfsproc_getattr_2_svc(am_nfs_fh *argp, struct svc_req *rqstp)
131 static nfsattrstat res;
134 time_t now = clocktime(NULL);
136 if (amuDebug(D_TRACE))
137 plog(XLOG_DEBUG, "getattr:");
139 mp = fh_to_mp3(argp, &retry, VLOOK_CREATE);
141 if (amuDebug(D_TRACE))
142 plog(XLOG_DEBUG, "\tretry=%d", retry);
148 res.ns_status = nfs_error(retry);
153 if (amuDebug(D_TRACE))
154 plog(XLOG_DEBUG, "\tstat(%s), size = %d, mtime=%ld.%ld",
156 (int) res.ns_u.ns_attr_u.na_size,
157 (long) res.ns_u.ns_attr_u.na_mtime.nt_seconds,
158 (long) res.ns_u.ns_attr_u.na_mtime.nt_useconds);
160 /* Delay unmount of what was looked up */
161 if (mp->am_timeo_w < 4 * gopt.am_timeo_w)
162 mp->am_timeo_w += gopt.am_timeo_w;
163 mp->am_ttl = now + mp->am_timeo_w;
165 mp->am_stats.s_getattr++;
171 nfsproc_setattr_2_svc(nfssattrargs *argp, struct svc_req *rqstp)
173 static nfsattrstat res;
175 if (!fh_to_mp(&argp->sag_fhandle))
176 res.ns_status = nfs_error(ESTALE);
178 res.ns_status = nfs_error(EROFS);
185 nfsproc_root_2_svc(voidp argp, struct svc_req *rqstp)
194 nfsproc_lookup_2_svc(nfsdiropargs *argp, struct svc_req *rqstp)
196 static nfsdiropres res;
202 if (amuDebug(D_TRACE))
203 plog(XLOG_DEBUG, "lookup:");
205 /* finally, find the effective uid/gid from RPC request */
206 if (getcreds(rqstp, &uid, &gid, nfsxprt) < 0)
207 plog(XLOG_ERROR, "cannot get uid/gid from RPC credentials");
208 xsnprintf(opt_uid, sizeof(uid_str), "%d", (int) uid);
209 xsnprintf(opt_gid, sizeof(gid_str), "%d", (int) gid);
211 mp = fh_to_mp3(&argp->da_fhandle, &retry, VLOOK_CREATE);
217 res.dr_status = nfs_error(retry);
221 if (amuDebug(D_TRACE))
222 plog(XLOG_DEBUG, "\tlookup(%s, %s)", mp->am_path, argp->da_name);
223 ap = mp->am_mnt->mf_ops->lookup_child(mp, argp->da_name, &error, VLOOK_CREATE);
225 ap = mp->am_mnt->mf_ops->mount_child(ap, &error);
231 res.dr_status = nfs_error(error);
234 * XXX: EXPERIMENTAL! Delay unmount of what was looked up. This
235 * should reduce the chance for race condition between unmounting an
236 * entry synchronously, and re-mounting it asynchronously.
238 if (ap->am_ttl < mp->am_ttl)
239 ap->am_ttl = mp->am_ttl;
240 mp_to_fh(ap, &res.dr_u.dr_drok_u.drok_fhandle);
241 res.dr_u.dr_drok_u.drok_attributes = ap->am_fattr;
242 res.dr_status = NFS_OK;
244 mp->am_stats.s_lookup++;
245 /* reschedule_timeout_mp(); */
253 nfs_quick_reply(am_node *mp, int error)
255 SVCXPRT *transp = mp->am_transp;
257 xdrproc_t xdr_result = (xdrproc_t) xdr_diropres;
260 * If there's a transp structure then we can reply to the client's
261 * nfs lookup request.
266 * Construct a valid reply to a lookup request. Same
267 * code as in nfsproc_lookup_2_svc() above.
269 mp_to_fh(mp, &res.dr_u.dr_drok_u.drok_fhandle);
270 res.dr_u.dr_drok_u.drok_attributes = mp->am_fattr;
271 res.dr_status = NFS_OK;
274 * Return the error that was passed to us.
276 res.dr_status = nfs_error(error);
281 if (!svc_sendreply(transp, (XDRPROC_T_TYPE) xdr_result, (SVC_IN_ARG_TYPE) & res))
282 svcerr_systemerr(transp);
285 * Free up transp. It's only used for one reply.
287 XFREE(mp->am_transp);
288 dlog("Quick reply sent for %s", mp->am_mnt->mf_mount);
294 nfsproc_readlink_2_svc(am_nfs_fh *argp, struct svc_req *rqstp)
296 static nfsreadlinkres res;
300 if (amuDebug(D_TRACE))
301 plog(XLOG_DEBUG, "readlink:");
303 mp = fh_to_mp3(argp, &retry, VLOOK_CREATE);
310 res.rlr_status = nfs_error(retry);
312 char *ln = do_readlink(mp, &retry);
315 res.rlr_status = NFS_OK;
316 if (amuDebug(D_TRACE) && ln)
317 plog(XLOG_DEBUG, "\treadlink(%s) = %s", mp->am_path, ln);
318 res.rlr_u.rlr_data_u = ln;
319 mp->am_stats.s_readlink++;
327 nfsproc_read_2_svc(nfsreadargs *argp, struct svc_req *rqstp)
329 static nfsreadres res;
331 memset((char *) &res, 0, sizeof(res));
332 res.rr_status = nfs_error(EACCES);
339 nfsproc_writecache_2_svc(voidp argp, struct svc_req *rqstp)
348 nfsproc_write_2_svc(nfswriteargs *argp, struct svc_req *rqstp)
350 static nfsattrstat res;
352 if (!fh_to_mp(&argp->wra_fhandle))
353 res.ns_status = nfs_error(ESTALE);
355 res.ns_status = nfs_error(EROFS);
362 nfsproc_create_2_svc(nfscreateargs *argp, struct svc_req *rqstp)
364 static nfsdiropres res;
366 if (!fh_to_mp(&argp->ca_where.da_fhandle))
367 res.dr_status = nfs_error(ESTALE);
369 res.dr_status = nfs_error(EROFS);
376 unlink_or_rmdir(nfsdiropargs *argp, struct svc_req *rqstp, int unlinkp)
381 am_node *mp = fh_to_mp3(&argp->da_fhandle, &retry, VLOOK_DELETE);
387 res = nfs_error(retry);
391 if (mp->am_fattr.na_type != NFDIR) {
392 res = nfs_error(ENOTDIR);
396 if (amuDebug(D_TRACE))
397 plog(XLOG_DEBUG, "\tremove(%s, %s)", mp->am_path, argp->da_name);
399 mp = mp->am_mnt->mf_ops->lookup_child(mp, argp->da_name, &retry, VLOOK_DELETE);
407 * Usual NFS workaround...
409 else if (retry == ENOENT)
411 res = nfs_error(retry);
413 forcibly_timeout_mp(mp);
423 nfsproc_remove_2_svc(nfsdiropargs *argp, struct svc_req *rqstp)
425 return unlink_or_rmdir(argp, rqstp, TRUE);
430 nfsproc_rename_2_svc(nfsrenameargs *argp, struct svc_req *rqstp)
434 if (!fh_to_mp(&argp->rna_from.da_fhandle) || !fh_to_mp(&argp->rna_to.da_fhandle))
435 res = nfs_error(ESTALE);
437 * If the kernel is doing clever things with referenced files
438 * then let it pretend...
440 else if (NSTREQ(argp->rna_to.da_name, ".nfs", 4))
443 * otherwise a failure
446 res = nfs_error(EROFS);
453 nfsproc_link_2_svc(nfslinkargs *argp, struct svc_req *rqstp)
457 if (!fh_to_mp(&argp->la_fhandle) || !fh_to_mp(&argp->la_to.da_fhandle))
458 res = nfs_error(ESTALE);
460 res = nfs_error(EROFS);
467 nfsproc_symlink_2_svc(nfssymlinkargs *argp, struct svc_req *rqstp)
471 if (!fh_to_mp(&argp->sla_from.da_fhandle))
472 res = nfs_error(ESTALE);
474 res = nfs_error(EROFS);
481 nfsproc_mkdir_2_svc(nfscreateargs *argp, struct svc_req *rqstp)
483 static nfsdiropres res;
485 if (!fh_to_mp(&argp->ca_where.da_fhandle))
486 res.dr_status = nfs_error(ESTALE);
488 res.dr_status = nfs_error(EROFS);
495 nfsproc_rmdir_2_svc(nfsdiropargs *argp, struct svc_req *rqstp)
497 return unlink_or_rmdir(argp, rqstp, FALSE);
502 nfsproc_readdir_2_svc(nfsreaddirargs *argp, struct svc_req *rqstp)
504 static nfsreaddirres res;
505 static nfsentry e_res[MAX_READDIR_ENTRIES];
509 if (amuDebug(D_TRACE))
510 plog(XLOG_DEBUG, "readdir:");
512 mp = fh_to_mp3(&argp->rda_fhandle, &retry, VLOOK_CREATE);
518 res.rdr_status = nfs_error(retry);
520 if (amuDebug(D_TRACE))
521 plog(XLOG_DEBUG, "\treaddir(%s)", mp->am_path);
522 res.rdr_status = nfs_error((*mp->am_mnt->mf_ops->readdir)
523 (mp, argp->rda_cookie,
524 &res.rdr_u.rdr_reply_u, e_res, argp->rda_count));
525 mp->am_stats.s_readdir++;
533 nfsproc_statfs_2_svc(am_nfs_fh *argp, struct svc_req *rqstp)
535 static nfsstatfsres res;
540 if (amuDebug(D_TRACE))
541 plog(XLOG_DEBUG, "statfs:");
543 mp = fh_to_mp3(argp, &retry, VLOOK_CREATE);
549 res.sfr_status = nfs_error(retry);
552 if (amuDebug(D_TRACE))
553 plog(XLOG_DEBUG, "\tstat_fs(%s)", mp->am_path);
556 * just return faked up file system information
558 fp = &res.sfr_u.sfr_reply_u;
560 fp->sfrok_tsize = 1024;
561 fp->sfrok_bsize = 1024;
563 /* check if map is browsable and show_statfs_entries=yes */
564 if ((gopt.flags & CFM_SHOW_STATFS_ENTRIES) &&
565 mp->am_mnt && mp->am_mnt->mf_mopts) {
566 mnt.mnt_opts = mp->am_mnt->mf_mopts;
567 if (amu_hasmntopt(&mnt, "browsable")) {
568 count_map_entries(mp,
574 fp->sfrok_blocks = 0; /* set to 1 if you don't want empty automounts */
576 fp->sfrok_bavail = 0;
579 res.sfr_status = NFS_OK;
580 mp->am_stats.s_statfs++;
588 * count how many total entries there are in a map, and how many
589 * of them are in use.
592 count_map_entries(const am_node *mp, u_int *out_blocks, u_int *out_bfree, u_int *out_bavail)
594 u_int blocks, bfree, bavail, i;
599 blocks = bfree = bavail = 0;
605 mmp = (mnt_map *) mf->mf_private;
609 /* iterate over keys */
610 for (i = 0; i < NKVHASH; i++) {
611 for (k = mmp->kvhash[i]; k ; k = k->next) {
616 * XXX: Need to count how many are actively in use and recompute
617 * bfree and bavail based on it.
623 *out_blocks = blocks;
625 *out_bavail = bavail;
630 * Convert from file handle to automount node.
633 fh_to_mp3(am_nfs_fh *fhp, int *rp, int vop)
635 struct am_fh *fp = (struct am_fh *) fhp;
638 if (fp->u.s.fhh_type != 0) {
639 /* New filehandle type */
640 int len = sizeof(*fhp) - sizeof(fp->fhh_gen);
641 char *path = xmalloc(len+1);
643 * Because fhp is treated as a filehandle we use memcpy
644 * instead of xstrlcpy.
646 memcpy(path, (char *) fp->u.fhh_path, len);
648 /* dlog("fh_to_mp3: new filehandle: %s", path); */
650 ap = path_to_exported_ap(path);
653 /* dlog("fh_to_mp3: old filehandle: %d", fp->u.s.fhh_id); */
655 * Check process id matches
656 * If it doesn't then it is probably
657 * from an old kernel-cached filehandle
658 * which is now out of date.
660 if (fp->u.s.fhh_pid != get_server_pid()) {
661 dlog("fh_to_mp3: wrong pid %ld != my pid %ld",
662 (long) fp->u.s.fhh_pid, get_server_pid());
667 * Get hold of the supposed mount node
669 ap = get_exported_ap(fp->u.s.fhh_id);
673 * Check the generation number in the node
674 * matches the one from the kernel. If not
675 * then the old node has been timed out and
676 * a new one allocated.
678 if (ap != NULL && ap->am_gen != fp->fhh_gen)
682 * If it doesn't exists then drop the request
689 * If the node is hung then locate a new node
690 * for it. This implements the replicated filesystem
693 if (ap->am_mnt && FSRV_ISDOWN(ap->am_mnt->mf_server) && ap->am_parent) {
695 am_node *orig_ap = ap;
697 dlog("fh_to_mp3: %s (%s) is hung: lookup alternative file server",
698 orig_ap->am_path, orig_ap->am_mnt->mf_info);
701 * Update modify time of parent node.
702 * With any luck the kernel will re-stat
703 * the child node and get new information.
705 clocktime(&orig_ap->am_fattr.na_mtime);
708 * Call the parent's lookup routine for an object
709 * with the same name. This may return -1 in error
710 * if a mount is in progress. In any case, if no
711 * mount node is returned the error code is propagated
714 if (vop == VLOOK_CREATE) {
715 ap = orig_ap->am_parent->am_mnt->mf_ops->lookup_child(orig_ap->am_parent, orig_ap->am_name, &error, vop);
717 ap = orig_ap->am_parent->am_mnt->mf_ops->mount_child(ap, &error);
723 if (error < 0 && amd_state == Finishing)
730 * Update last access to original node. This
731 * avoids timing it out and so sending ESTALE
732 * back to the kernel.
733 * XXX - Not sure we need this anymore (jsp, 90/10/6).
741 * Disallow references to objects being unmounted, unless
742 * they are automount points.
744 if (ap->am_mnt && (ap->am_mnt->mf_flags & MFF_UNMOUNTING) &&
745 !(ap->am_flags & AMF_ROOT)) {
746 if (amd_state == Finishing)
755 if (!ap || !ap->am_mnt) {
757 * If we are shutting down then it is likely
758 * that this node has disappeared because of
759 * a fast timeout. To avoid things thrashing
760 * just pretend it doesn't exist at all. If
761 * ESTALE is returned, some NFS clients just
762 * keep retrying (stupid or what - if it's
763 * stale now, what's it going to be in 5 minutes?)
765 if (amd_state == Finishing)
777 fh_to_mp(am_nfs_fh *fhp)
781 return fh_to_mp3(fhp, &dummy, VLOOK_CREATE);
786 * Convert from automount node to file handle.
789 mp_to_fh(am_node *mp, am_nfs_fh *fhp)
792 struct am_fh *fp = (struct am_fh *) fhp;
794 memset((char *) fhp, 0, sizeof(am_nfs_fh));
796 /* Store the generation number */
797 fp->fhh_gen = mp->am_gen;
799 pathlen = strlen(mp->am_path);
800 if (pathlen <= sizeof(*fhp) - sizeof(fp->fhh_gen)) {
801 /* dlog("mp_to_fh: new filehandle: %s", mp->am_path); */
804 * Because fhp is treated as a filehandle we use memcpy instead of
807 memcpy(fp->u.fhh_path, mp->am_path, pathlen); /* making a filehandle */
810 * Take the process id
812 fp->u.s.fhh_pid = get_server_pid();
817 fp->u.s.fhh_id = mp->am_mapno;
820 * ... and the generation number (previously stored)
821 * to make a "unique" triple that will never
822 * be reallocated except across reboots (which doesn't matter)
823 * or if we are unlucky enough to be given the same
824 * pid as a previous amd (very unlikely).
826 /* dlog("mp_to_fh: old filehandle: %d", fp->u.s.fhh_id); */