2 * Copyright (c) 1997-2004 Erez Zadok
3 * Copyright (c) 1990 Jan-Simon Pendry
4 * Copyright (c) 1990 Imperial College of Science, Technology & Medicine
5 * Copyright (c) 1990 The Regents of the University of California.
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry at Imperial College, London.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgment:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * $Id: srvr_nfs.c,v 1.7.2.11 2004/01/06 03:15:16 ezk Exp $
52 #endif /* HAVE_CONFIG_H */
57 * Number of pings allowed to fail before host is declared down
58 * - three-fifths of the allowed mount time...
60 #define MAX_ALLOWED_PINGS (3 + /* for luck ... */ 1)
63 * How often to ping when starting a new server
65 #define FAST_NFS_PING 3
67 #if (FAST_NFS_PING * MAX_ALLOWED_PINGS) >= ALLOWED_MOUNT_TIME
68 # error: sanity check failed in srvr_nfs.c
70 * you cannot do things this way...
71 * sufficient fast pings must be given the chance to fail
72 * within the allowed mount time
74 #endif /* (FAST_NFS_PING * MAX_ALLOWED_PINGS) >= ALLOWED_MOUNT_TIME */
76 #define NPXID_ALLOC(struct ) (++np_xid)
78 /* structures and typedefs */
79 typedef struct nfs_private {
80 u_short np_mountd; /* Mount daemon port number */
81 char np_mountd_inval; /* Port *may* be invalid */
82 int np_ping; /* Number of failed ping attempts */
83 time_t np_ttl; /* Time when server is thought dead */
84 int np_xid; /* RPC transaction id for pings */
85 int np_error; /* Error during portmap request */
89 qelem nfs_srvr_list = {&nfs_srvr_list, &nfs_srvr_list};
92 static int np_xid; /* For NFS pings */
94 static char ping_buf[sizeof(struct rpc_msg) + 32];
96 #if defined(MNTTAB_OPT_PROTO) || defined(HAVE_FS_NFS3)
98 * Protocols we know about, in order of preference.
100 * Note that Solaris 8 and newer NetBSD systems are switching to UDP first,
101 * so this order may have to be adjusted for Amd in the future once more
102 * vendors make that change. -Erez 11/24/2000
104 static char *protocols[] = { "tcp", "udp", NULL };
105 #endif /* defined(MNTTAB_OPT_PROTO) || defined(HAVE_FS_NFS3) */
107 /* forward definitions */
108 static void nfs_keepalive(voidp);
113 * Flush any cached data
116 flush_srvr_nfs_cache(void)
120 ITER(fs, fserver, &nfs_srvr_list) {
121 nfs_private *np = (nfs_private *) fs->fs_private;
123 np->np_mountd_inval = TRUE;
131 * Startup the NFS ping for a particular version.
134 start_ping(u_long nfs_version)
137 struct rpc_msg ping_msg;
140 * Non nfs mounts like /afs/glue.umd.edu have ended up here.
142 if (nfs_version == 0) {
143 nfs_version = NFS_VERSION;
144 plog(XLOG_WARNING, "start_ping: nfs_version = 0 fixed");
146 plog(XLOG_INFO, "start_ping: nfs_version: %d", (int) nfs_version);
148 rpc_msg_init(&ping_msg, NFS_PROGRAM, nfs_version, NFSPROC_NULL);
151 * Create an XDR endpoint
153 xdrmem_create(&ping_xdr, ping_buf, sizeof(ping_buf), XDR_ENCODE);
156 * Create the NFS ping message
158 if (!xdr_callmsg(&ping_xdr, &ping_msg)) {
159 plog(XLOG_ERROR, "Couldn't create ping RPC message");
163 * Find out how long it is
165 ping_len = xdr_getpos(&ping_xdr);
168 * Destroy the XDR endpoint - we don't need it anymore
170 xdr_destroy(&ping_xdr);
175 * Called when a portmap reply arrives
178 got_portmap(voidp pkt, int len, struct sockaddr_in *sa, struct sockaddr_in *ia, voidp idv, int done)
180 fserver *fs2 = (fserver *) idv;
184 * Find which fileserver we are talking about
186 ITER(fs, fserver, &nfs_srvr_list)
191 u_long port = 0; /* XXX - should be short but protocol is naff */
192 int error = done ? pickup_rpc_reply(pkt, len, (voidp) &port, (XDRPROC_T_TYPE) xdr_u_long) : -1;
193 nfs_private *np = (nfs_private *) fs->fs_private;
195 if (!error && port) {
197 dlog("got port (%d) for mountd on %s", (int) port, fs->fs_host);
200 * Grab the port number. Portmap sends back
201 * an u_long in native ordering, so it
202 * needs converting to a u_short in
205 np->np_mountd = htons((u_short) port);
206 np->np_mountd_inval = FALSE;
210 dlog("Error fetching port for mountd on %s", fs->fs_host);
211 dlog("\t error=%d, port=%d", error, (int) port);
214 * Almost certainly no mountd running on remote host
216 np->np_error = error ? error : ETIMEDOUT;
219 if (fs->fs_flags & FSF_WANT)
223 dlog("Got portmap for old port request");
227 dlog("portmap request timed out");
234 * Obtain portmap information
237 call_portmap(fserver *fs, AUTH *auth, u_long prog, u_long vers, u_long prot)
239 struct rpc_msg pmap_msg;
241 char iobuf[UDPMSGSIZE];
245 rpc_msg_init(&pmap_msg, PMAPPROG, PMAPVERS, PMAPPROC_NULL);
250 len = make_rpc_packet(iobuf,
255 (XDRPROC_T_TYPE) xdr_pmap,
258 struct sockaddr_in sin;
259 memset((voidp) &sin, 0, sizeof(sin));
261 sin.sin_port = htons(PMAPPORT);
262 error = fwd_packet(RPC_XID_PORTMAP, (voidp) iobuf, len,
263 &sin, &sin, (voidp) fs, got_portmap);
273 recompute_portmap(fserver *fs)
281 error = make_nfs_auth();
284 nfs_private *np = (nfs_private *) fs->fs_private;
285 np->np_error = error;
289 if (fs->fs_version == 0)
290 plog(XLOG_WARNING, "recompute_portmap: nfs_version = 0 fixed");
292 plog(XLOG_INFO, "recompute_portmap: NFS version %d", (int) fs->fs_version);
294 if (fs->fs_version == NFS_VERSION3)
295 mnt_version = MOUNTVERS3;
297 #endif /* HAVE_FS_NFS3 */
298 mnt_version = MOUNTVERS;
300 plog(XLOG_INFO, "Using MOUNT version: %d", (int) mnt_version);
301 call_portmap(fs, nfs_auth, MOUNTPROG, mnt_version, (u_long) IPPROTO_UDP);
306 * This is called when we get a reply to an RPC ping.
307 * The value of id was taken from the nfs_private
308 * structure when the ping was transmitted.
311 nfs_pinged(voidp pkt, int len, struct sockaddr_in *sp, struct sockaddr_in *tsp, voidp idv, int done)
313 int xid = (long) idv; /* for 64-bit archs */
325 ITER(fs, fserver, &nfs_srvr_list) {
326 nfs_private *np = (nfs_private *) fs->fs_private;
327 if (np->np_xid == xid && (fs->fs_flags & FSF_PINGING)) {
329 * Reset the ping counter.
330 * Update the keepalive timer.
333 if (fs->fs_flags & FSF_DOWN) {
334 fs->fs_flags &= ~FSF_DOWN;
335 if (fs->fs_flags & FSF_VALID) {
336 srvrlog(fs, "is up");
342 srvrlog(fs, "starts up");
344 fs->fs_flags |= FSF_VALID;
349 if (fs->fs_flags & FSF_VALID) {
351 dlog("file server %s type nfs is still up", fs->fs_host);
356 fs->fs_flags |= FSF_VALID;
361 * Adjust ping interval
363 untimeout(fs->fs_cid);
364 fs->fs_cid = timeout(fs->fs_pinger, nfs_keepalive, (voidp) fs);
367 * Update ttl for this server
369 np->np_ttl = clocktime() +
370 (MAX_ALLOWED_PINGS - 1) * FAST_NFS_PING + fs->fs_pinger - 1;
375 np->np_xid = NPXID_ALLOC(struct );
378 * Failed pings is zero...
383 * Recompute portmap information if not known
385 if (np->np_mountd_inval)
386 recompute_portmap(fs);
397 dlog("Spurious ping packet");
403 * Called when no ping-reply received
406 nfs_timed_out(voidp v)
409 nfs_private *np = (nfs_private *) fs->fs_private;
412 * Another ping has failed
416 srvrlog(fs, "not responding");
419 * Not known to be up any longer
422 fs->fs_flags &= ~FSF_VALID;
425 * If ttl has expired then guess that it is dead
427 if (np->np_ttl < clocktime()) {
428 int oflags = fs->fs_flags;
430 dlog("ttl has expired");
432 if ((fs->fs_flags & FSF_DOWN) == 0) {
434 * Server was up, but is now down.
436 srvrlog(fs, "is down");
437 fs->fs_flags |= FSF_DOWN | FSF_VALID;
439 * Since the server is down, the portmap
440 * information may now be wrong, so it
441 * must be flushed from the local cache
443 flush_nfs_fhandle_cache(fs);
450 if ((fs->fs_flags & FSF_VALID) == 0)
451 srvrlog(fs, "starts down");
453 fs->fs_flags |= FSF_VALID;
455 if (oflags != fs->fs_flags && (fs->fs_flags & FSF_WANT))
458 * Reset failed ping count
464 dlog("%d pings to %s failed - at most %d allowed", np->np_ping, fs->fs_host, MAX_ALLOWED_PINGS);
469 * New RPC xid, so any late responses to the previous ping
472 np->np_xid = NPXID_ALLOC(struct );
475 * Run keepalive again
482 * Keep track of whether a server is alive
485 nfs_keepalive(voidp v)
489 nfs_private *np = (nfs_private *) fs->fs_private;
493 * Send an NFS ping to this node
497 start_ping(fs->fs_version);
500 * Queue the packet...
502 error = fwd_packet(MK_RPC_XID(RPC_XID_NFSPING, np->np_xid),
506 (struct sockaddr_in *) 0,
507 (voidp) ((long) np->np_xid), /* for 64-bit archs */
511 * See if a hard error occurred
518 np->np_ping = MAX_ALLOWED_PINGS; /* immediately down */
519 np->np_ttl = (time_t) 0;
521 * This causes an immediate call to nfs_timed_out
522 * whenever the server was thought to be up.
530 dlog("Sent NFS ping to %s", fs->fs_host);
536 * Back off the ping interval if we are not getting replies and
537 * the remote system is know to be down.
539 switch (fs->fs_flags & (FSF_DOWN | FSF_VALID)) {
540 case FSF_VALID: /* Up */
541 if (fstimeo < 0) /* +++ see above */
542 fstimeo = FAST_NFS_PING;
545 case FSF_VALID | FSF_DOWN: /* Down */
546 fstimeo = fs->fs_pinger;
549 default: /* Unknown */
550 fstimeo = FAST_NFS_PING;
555 dlog("NFS timeout in %d seconds", fstimeo);
558 fs->fs_cid = timeout(fstimeo, nfs_timed_out, (voidp) fs);
563 nfs_srvr_port(fserver *fs, u_short *port, voidp wchan)
566 if ((fs->fs_flags & FSF_VALID) == FSF_VALID) {
567 if ((fs->fs_flags & FSF_DOWN) == 0) {
568 nfs_private *np = (nfs_private *) fs->fs_private;
569 if (np->np_error == 0) {
570 *port = np->np_mountd;
573 error = np->np_error;
576 * Now go get the port mapping again in case it changed.
577 * Note that it is used even if (np_mountd_inval)
578 * is True. The flag is used simply as an
579 * indication that the mountd may be invalid, not
580 * that it is known to be invalid.
582 if (np->np_mountd_inval)
583 recompute_portmap(fs);
585 np->np_mountd_inval = TRUE;
590 if (error < 0 && wchan && !(fs->fs_flags & FSF_WANT)) {
592 * If a wait channel is supplied, and no
593 * error has yet occurred, then arrange
594 * that a wakeup is done on the wait channel,
595 * whenever a wakeup is done on this fs node.
596 * Wakeup's are done on the fs node whenever
597 * it changes state - thus causing control to
598 * come back here and new, better things to happen.
600 fs->fs_flags |= FSF_WANT;
601 sched_task(wakeup_task, wchan, (voidp) fs);
608 start_nfs_pings(fserver *fs, int pingval)
610 if (fs->fs_flags & FSF_PINGING) {
612 dlog("Already running pings to %s", fs->fs_host);
618 untimeout(fs->fs_cid);
620 srvrlog(fs, "wired up");
621 fs->fs_flags |= FSF_VALID;
622 fs->fs_flags &= ~FSF_DOWN;
624 fs->fs_flags |= FSF_PINGING;
631 * Find an nfs server for a host.
634 find_nfs_srvr(mntfs *mf)
636 char *host = mf->mf_fo->opt_rhost;
637 char *nfs_proto = NULL;
642 struct hostent *hp = 0;
643 struct sockaddr_in *ip;
644 u_long nfs_version = 0; /* default is no version specified */
645 #ifdef MNTTAB_OPT_PROTO
646 char *rfsname = mf->mf_fo->opt_rfs;
647 #endif /* MNTTAB_OPT_PROTO */
650 * Get ping interval from mount options.
651 * Current only used to decide whether pings
652 * are required or not. < 0 = no pings.
654 mnt.mnt_opts = mf->mf_mopts;
655 pingval = hasmntval(&mnt, "ping");
658 * Get the NFS version from the mount options. This is used
659 * to decide the highest NFS version to try.
661 #ifdef MNTTAB_OPT_VERS
662 nfs_version = hasmntval(&mnt, MNTTAB_OPT_VERS);
663 #endif /* MNTTAB_OPT_VERS */
665 #ifdef MNTTAB_OPT_PROTO
667 char *proto_opt = hasmnteq(&mnt, MNTTAB_OPT_PROTO);
670 for (p = protocols; *p; p ++)
671 if (NSTREQ(proto_opt, *p, strlen(*p))) {
676 plog(XLOG_WARNING, "ignoring unknown protocol option for %s:%s",
680 #endif /* MNTTAB_OPT_PROTO */
682 #ifdef HAVE_NFS_NFSV2_H
683 /* allow overriding if nfsv2 option is specified in mount options */
684 if (hasmntopt(&mnt, "nfsv2")) {
685 nfs_version = (u_long) 2; /* nullify any ``vers=X'' statements */
686 nfs_proto = "udp"; /* nullify any ``proto=tcp'' statements */
687 plog(XLOG_WARNING, "found compatiblity option \"nfsv2\": set options vers=2,proto=udp for host %s", host);
689 #endif /* HAVE_NFS_NFSV2_H */
691 /* check if we globally overridden the NFS version/protocol */
693 nfs_version = gopt.nfs_vers;
694 plog(XLOG_INFO, "find_nfs_srvr: force NFS version to %d",
697 if (gopt.nfs_proto) {
698 nfs_proto = gopt.nfs_proto;
699 plog(XLOG_INFO, "find_nfs_srvr: force NFS protocol transport to %s", nfs_proto);
703 * lookup host address and canonical name
705 hp = gethostbyname(host);
708 * New code from Bob Harris <harris@basil-rathbone.mit.edu>
709 * Use canonical name to keep track of file server
710 * information. This way aliases do not generate
711 * multiple NFS pingers. (Except when we're normalizing
714 if (hp && !(gopt.flags & CFM_NORMALIZE_HOSTNAMES))
715 host = (char *) hp->h_name;
718 switch (hp->h_addrtype) {
720 ip = ALLOC(struct sockaddr_in);
721 memset((voidp) ip, 0, sizeof(*ip));
722 ip->sin_family = AF_INET;
723 memmove((voidp) &ip->sin_addr, (voidp) hp->h_addr, sizeof(ip->sin_addr));
725 ip->sin_port = htons(NFS_PORT);
733 plog(XLOG_USER, "Unknown host: %s", host);
738 * Get the NFS Version, and verify server is up. Probably no
739 * longer need to start server down below.
744 * Find the best combination of NFS version and protocol.
745 * When given a choice, use the highest available version,
746 * and use TCP over UDP if available.
749 nfs_version = get_nfs_version(host, ip, nfs_version, nfs_proto);
751 int best_nfs_version = 0;
752 int proto_nfs_version;
755 for (p = protocols; *p; p++) {
756 proto_nfs_version = get_nfs_version(host, ip, nfs_version, *p);
758 if (proto_nfs_version > best_nfs_version) {
759 best_nfs_version = proto_nfs_version;
763 nfs_version = best_nfs_version;
768 * If the NFS server is down or does not support the portmapper call
769 * (such as certain Novell NFS servers) we mark it as version 2 and we
770 * let the nfs code deal with the case that is down. If when the
771 * server comes back up, it can support NFS V.3 and/or TCP, it will
774 nfs_version = NFS_VERSION;
777 #else /* not HAVE_FS_NFS3 */
778 nfs_version = NFS_VERSION;
779 #endif /* not HAVE_FS_NFS3 */
785 plog(XLOG_INFO, "Using NFS version %d, protocol %s on host %s",
786 (int) nfs_version, nfs_proto, host);
789 * Try to find an existing fs server structure for this host.
790 * Note that differing versions or protocols have their own structures.
791 * XXX: Need to fix the ping mechanism to actually use the NFS protocol
792 * chosen here (right now it always uses datagram sockets).
794 ITER(fs, fserver, &nfs_srvr_list) {
795 if (STREQ(host, fs->fs_host) &&
796 nfs_version == fs->fs_version &&
797 STREQ(nfs_proto, fs->fs_proto)) {
799 * following if statement from Mike Mitchell
801 * Initialize the ping data if we aren't pinging
802 * now. The np_ttl and np_ping fields are
803 * especially important.
805 if (!(fs->fs_flags & FSF_PINGING)) {
806 np = (nfs_private *) fs->fs_private;
807 np->np_mountd_inval = TRUE;
808 np->np_xid = NPXID_ALLOC(struct );
812 * Initially the server will be deemed dead
813 * after MAX_ALLOWED_PINGS of the fast variety
816 np->np_ttl = MAX_ALLOWED_PINGS * FAST_NFS_PING + clocktime() - 1;
819 * fill in the IP address -- this is only needed
820 * if there is a chance an IP address will change
822 * Mike Mitchell, mcm@unx.sas.com, 09/08/93
825 memmove((voidp) &fs->fs_ip->sin_addr, (voidp) hp->h_addr, sizeof(fs->fs_ip->sin_addr));
827 start_nfs_pings(fs, pingval);
836 * Get here if we can't find an entry
840 * Allocate a new server
842 fs = ALLOC(struct fserver);
844 fs->fs_host = strdup(hp ? hp->h_name : "unknown_hostname");
845 if (gopt.flags & CFM_NORMALIZE_HOSTNAMES)
846 host_normalize(&fs->fs_host);
850 fs->fs_flags = FSF_DOWN; /* Starts off down */
852 fs->fs_flags = FSF_ERROR | FSF_VALID;
853 mf->mf_flags |= MFF_ERROR;
854 mf->mf_error = ENOENT;
856 fs->fs_version = nfs_version;
857 fs->fs_proto = nfs_proto;
858 fs->fs_type = MNTTAB_TYPE_NFS;
859 fs->fs_pinger = AM_PINGER;
860 np = ALLOC(struct nfs_private);
861 memset((voidp) np, 0, sizeof(*np));
862 np->np_mountd_inval = TRUE;
863 np->np_xid = NPXID_ALLOC(struct );
867 * Initially the server will be deemed dead after
868 * MAX_ALLOWED_PINGS of the fast variety have failed.
870 np->np_ttl = clocktime() + MAX_ALLOWED_PINGS * FAST_NFS_PING - 1;
871 fs->fs_private = (voidp) np;
872 fs->fs_prfree = (void (*)(voidp)) free;
874 if (!(fs->fs_flags & FSF_ERROR)) {
876 * Start of keepalive timer
878 start_nfs_pings(fs, pingval);
882 * Add to list of servers
884 ins_que(&fs->fs_q, &nfs_srvr_list);