2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * @(#)nfs_srvcache.c 8.3 (Berkeley) 3/30/95
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
39 * Reference: Chet Juszczak, "Improving the Performance and Correctness
40 * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
41 * pages 53-63. San Diego, February 1989.
43 #include <sys/param.h>
44 #include <sys/malloc.h>
45 #include <sys/mount.h>
46 #include <sys/systm.h>
49 #include <sys/mutex.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h> /* for sodupsockaddr */
52 #include <sys/eventhandler.h>
54 #include <netinet/in.h>
55 #include <nfs/rpcv2.h>
56 #include <nfs/nfsproto.h>
57 #include <nfsserver/nfs.h>
58 #include <nfsserver/nfsrvcache.h>
60 static long numnfsrvcache;
61 static long desirednfsrvcache;
63 #define NFSRCHASH(xid) \
64 (&nfsrvhashtbl[((xid) + ((xid) >> 24)) & nfsrvhash])
65 static LIST_HEAD(nfsrvhash, nfsrvcache) *nfsrvhashtbl;
66 static TAILQ_HEAD(nfsrvlru, nfsrvcache) nfsrvlruhead;
67 static u_long nfsrvhash;
68 static eventhandler_tag nfsrv_nmbclusters_tag;
73 #define NETFAMILY(rp) \
74 (((rp)->rc_flag & RC_NAM) ? (rp)->rc_nam->sa_family : AF_INET)
77 * Static array that defines which nfs rpc's are nonidempotent
79 static const int nonidempotent[NFS_NPROCS] = {
105 /* True iff the rpc reply is an nfs status ONLY! */
106 static const int nfsv2_repstat[NFS_NPROCS] = {
128 * Size the NFS server's duplicate request cache at 1/2 the nmbclsters, floating
129 * within a (64, 2048) range. This is to prevent all mbuf clusters being tied up
130 * in the NFS dupreq cache for small values of nmbclusters.
133 nfsrvcache_size_change(void *tag)
135 desirednfsrvcache = nmbclusters /2;
136 if (desirednfsrvcache > NFSRVCACHE_MAX_SIZE)
137 desirednfsrvcache = NFSRVCACHE_MAX_SIZE;
138 if (desirednfsrvcache < NFSRVCACHE_MIN_SIZE)
139 desirednfsrvcache = NFSRVCACHE_MIN_SIZE;
143 * Initialize the server request cache list
146 nfsrv_initcache(void)
148 nfsrvcache_size_change(NULL);
149 nfsrvhashtbl = hashinit(desirednfsrvcache, M_NFSD, &nfsrvhash);
150 TAILQ_INIT(&nfsrvlruhead);
151 nfsrv_nmbclusters_tag = EVENTHANDLER_REGISTER(nmbclusters_change,
152 nfsrvcache_size_change, NULL, EVENTHANDLER_PRI_FIRST);
156 * Teardown the server request cache list
159 nfsrv_destroycache(void)
161 KASSERT(TAILQ_EMPTY(&nfsrvlruhead), ("%s: pending requests", __func__));
162 EVENTHANDLER_DEREGISTER(nmbclusters_change, nfsrv_nmbclusters_tag);
163 hashdestroy(nfsrvhashtbl, M_NFSD, nfsrvhash);
167 * Look for the request in the cache
169 * return action and optionally reply
171 * insert it in the cache
173 * The rules are as follows:
174 * - if in progress, return DROP request
175 * - if completed within DELAY of the current time, return DROP it
176 * - if completed a longer time ago return REPLY if the reply was cached or
178 * Update/add new request at end of lru list
181 nfsrv_getcache(struct nfsrv_descript *nd, struct mbuf **repp)
183 struct nfsrvcache *rp;
185 struct sockaddr_in *saddr;
192 * Don't cache recent requests for reliable transport protocols.
193 * (Maybe we should for the case of a reconnect, but..)
198 LIST_FOREACH(rp, NFSRCHASH(nd->nd_retxid), rc_hash) {
199 if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
200 netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
201 NFS_DPF(RC, ("H%03x", rp->rc_xid & 0xfff));
202 if ((rp->rc_flag & RC_LOCKED) != 0) {
203 rp->rc_flag |= RC_WANTED;
204 (void) msleep(rp, &nfsd_mtx, PZERO-1,
208 rp->rc_flag |= RC_LOCKED;
209 /* If not at end of LRU chain, move it there */
210 if (TAILQ_NEXT(rp, rc_lru)) {
211 TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru);
212 TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru);
214 if (rp->rc_state == RC_UNUSED)
215 panic("nfsrv cache");
216 if (rp->rc_state == RC_INPROG) {
217 nfsrvstats.srvcache_inproghits++;
219 } else if (rp->rc_flag & RC_REPSTATUS) {
220 nfsrvstats.srvcache_nonidemdonehits++;
222 *repp = nfs_rephead(0, nd, rp->rc_status,
226 } else if (rp->rc_flag & RC_REPMBUF) {
227 nfsrvstats.srvcache_nonidemdonehits++;
229 *repp = m_copym(rp->rc_reply, 0, M_COPYALL,
234 nfsrvstats.srvcache_idemdonehits++;
235 rp->rc_state = RC_INPROG;
238 rp->rc_flag &= ~RC_LOCKED;
239 if (rp->rc_flag & RC_WANTED) {
240 rp->rc_flag &= ~RC_WANTED;
246 nfsrvstats.srvcache_misses++;
247 NFS_DPF(RC, ("M%03x", nd->nd_retxid & 0xfff));
248 if (numnfsrvcache < desirednfsrvcache) {
250 rp = (struct nfsrvcache *)malloc((u_long)sizeof *rp,
251 M_NFSD, M_WAITOK | M_ZERO);
254 rp->rc_flag = RC_LOCKED;
256 rp = TAILQ_FIRST(&nfsrvlruhead);
257 while ((rp->rc_flag & RC_LOCKED) != 0) {
258 rp->rc_flag |= RC_WANTED;
259 (void) msleep(rp, &nfsd_mtx, PZERO-1, "nfsrc", 0);
260 rp = TAILQ_FIRST(&nfsrvlruhead);
262 rp->rc_flag |= RC_LOCKED;
263 LIST_REMOVE(rp, rc_hash);
264 TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru);
265 if (rp->rc_flag & RC_REPMBUF)
266 m_freem(rp->rc_reply);
267 if (rp->rc_flag & RC_NAM)
268 FREE(rp->rc_nam, M_SONAME);
269 rp->rc_flag &= (RC_LOCKED | RC_WANTED);
271 TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru);
272 rp->rc_state = RC_INPROG;
273 rp->rc_xid = nd->nd_retxid;
274 saddr = (struct sockaddr_in *)nd->nd_nam;
275 switch (saddr->sin_family) {
277 rp->rc_flag |= RC_INETADDR;
278 rp->rc_inetaddr = saddr->sin_addr.s_addr;
284 * XXXRW: Seems like we should only set RC_NAM if we
285 * actually manage to set rc_nam to something non-NULL.
287 rp->rc_flag |= RC_NAM;
288 rp->rc_nam = sodupsockaddr(nd->nd_nam, M_NOWAIT);
291 rp->rc_proc = nd->nd_procnum;
292 LIST_INSERT_HEAD(NFSRCHASH(nd->nd_retxid), rp, rc_hash);
293 rp->rc_flag &= ~RC_LOCKED;
294 if (rp->rc_flag & RC_WANTED) {
295 rp->rc_flag &= ~RC_WANTED;
302 * Update a request cache entry after the rpc has been done
305 nfsrv_updatecache(struct nfsrv_descript *nd, int repvalid, struct mbuf *repmbuf)
307 struct nfsrvcache *rp;
314 LIST_FOREACH(rp, NFSRCHASH(nd->nd_retxid), rc_hash) {
315 if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
316 netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
317 NFS_DPF(RC, ("U%03x", rp->rc_xid & 0xfff));
318 if ((rp->rc_flag & RC_LOCKED) != 0) {
319 rp->rc_flag |= RC_WANTED;
320 (void) msleep(rp, &nfsd_mtx, PZERO-1,
324 rp->rc_flag |= RC_LOCKED;
325 if (rp->rc_state == RC_DONE) {
327 * This can occur if the cache is too small.
328 * Retransmits of the same request aren't
329 * dropped so we may see the operation
330 * complete more then once.
332 if (rp->rc_flag & RC_REPMBUF) {
333 m_freem(rp->rc_reply);
334 rp->rc_flag &= ~RC_REPMBUF;
337 rp->rc_state = RC_DONE;
339 * If we have a valid reply update status and save
340 * the reply for non-idempotent rpc's.
342 if (repvalid && nonidempotent[nd->nd_procnum]) {
343 if ((nd->nd_flag & ND_NFSV3) == 0 &&
345 nfsrvv2_procid[nd->nd_procnum]]) {
346 rp->rc_status = nd->nd_repstat;
347 rp->rc_flag |= RC_REPSTATUS;
350 rp->rc_reply = m_copym(repmbuf,
351 0, M_COPYALL, M_TRYWAIT);
353 rp->rc_flag |= RC_REPMBUF;
356 rp->rc_flag &= ~RC_LOCKED;
357 if (rp->rc_flag & RC_WANTED) {
358 rp->rc_flag &= ~RC_WANTED;
364 NFS_DPF(RC, ("L%03x", nd->nd_retxid & 0xfff));
368 * Clean out the cache. Called when the last nfsd terminates.
371 nfsrv_cleancache(void)
373 struct nfsrvcache *rp, *nextrp;
377 TAILQ_FOREACH_SAFE(rp, &nfsrvlruhead, rc_lru, nextrp) {
378 LIST_REMOVE(rp, rc_hash);
379 TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru);
380 if (rp->rc_flag & RC_REPMBUF)
381 m_freem(rp->rc_reply);
382 if (rp->rc_flag & RC_NAM)
383 free(rp->rc_nam, M_SONAME);