sys/rpc/svc.c

   1 /*      $NetBSD: svc.c,v 1.21 2000/07/06 03:10:35 christos Exp $        */
   2
   3 /*-
   4  * SPDX-License-Identifier: BSD-3-Clause
   5  *
   6  * Copyright (c) 2009, Sun Microsystems, Inc.
   7  * All rights reserved.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions are met:
  11  * - Redistributions of source code must retain the above copyright notice,
  12  *   this list of conditions and the following disclaimer.
  13  * - Redistributions in binary form must reproduce the above copyright notice,
  14  *   this list of conditions and the following disclaimer in the documentation
  15  *   and/or other materials provided with the distribution.
  16  * - Neither the name of Sun Microsystems, Inc. nor the names of its
  17  *   contributors may be used to endorse or promote products derived
  18  *   from this software without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30  * POSSIBILITY OF SUCH DAMAGE.
  31  */
  32
  33 #if defined(LIBC_SCCS) && !defined(lint)
  34 static char *sccsid2 = "@(#)svc.c 1.44 88/02/08 Copyr 1984 Sun Micro";
  35 static char *sccsid = "@(#)svc.c        2.4 88/08/11 4.0 RPCSRC";
  36 #endif
  37 #include <sys/cdefs.h>
  38 /*
  39  * svc.c, Server-side remote procedure call interface.
  40  *
  41  * There are two sets of procedures here.  The xprt routines are
  42  * for handling transport handles.  The svc routines handle the
  43  * list of service routines.
  44  *
  45  * Copyright (C) 1984, Sun Microsystems, Inc.
  46  */
  47
  48 #include <sys/param.h>
  49 #include <sys/jail.h>
  50 #include <sys/lock.h>
  51 #include <sys/kernel.h>
  52 #include <sys/kthread.h>
  53 #include <sys/malloc.h>
  54 #include <sys/mbuf.h>
  55 #include <sys/mutex.h>
  56 #include <sys/proc.h>
  57 #include <sys/queue.h>
  58 #include <sys/socketvar.h>
  59 #include <sys/systm.h>
  60 #include <sys/smp.h>
  61 #include <sys/sx.h>
  62 #include <sys/ucred.h>
  63
  64 #include <rpc/rpc.h>
  65 #include <rpc/rpcb_clnt.h>
  66 #include <rpc/replay.h>
  67
  68 #include <rpc/rpc_com.h>
  69
  70 #define SVC_VERSQUIET 0x0001            /* keep quiet about vers mismatch */
  71 #define version_keepquiet(xp) (SVC_EXT(xp)->xp_flags & SVC_VERSQUIET)
  72
  73 static struct svc_callout *svc_find(SVCPOOL *pool, rpcprog_t, rpcvers_t,
  74     char *);
  75 static void svc_new_thread(SVCGROUP *grp);
  76 static void xprt_unregister_locked(SVCXPRT *xprt);
  77 static void svc_change_space_used(SVCPOOL *pool, long delta);
  78 static bool_t svc_request_space_available(SVCPOOL *pool);
  79 static void svcpool_cleanup(SVCPOOL *pool);
  80
  81 /* ***************  SVCXPRT related stuff **************** */
  82
  83 static int svcpool_minthread_sysctl(SYSCTL_HANDLER_ARGS);
  84 static int svcpool_maxthread_sysctl(SYSCTL_HANDLER_ARGS);
  85 static int svcpool_threads_sysctl(SYSCTL_HANDLER_ARGS);
  86
  87 SVCPOOL*
  88 svcpool_create(const char *name, struct sysctl_oid_list *sysctl_base)
  89 {
  90         SVCPOOL *pool;
  91         SVCGROUP *grp;
  92         int g;
  93
  94         pool = malloc(sizeof(SVCPOOL), M_RPC, M_WAITOK|M_ZERO);
  95
  96         mtx_init(&pool->sp_lock, "sp_lock", NULL, MTX_DEF);
  97         pool->sp_name = name;
  98         pool->sp_state = SVCPOOL_INIT;
  99         pool->sp_proc = NULL;
 100         TAILQ_INIT(&pool->sp_callouts);
 101         TAILQ_INIT(&pool->sp_lcallouts);
 102         pool->sp_minthreads = 1;
 103         pool->sp_maxthreads = 1;
 104         pool->sp_groupcount = 1;
 105         for (g = 0; g < SVC_MAXGROUPS; g++) {
 106                 grp = &pool->sp_groups[g];
 107                 mtx_init(&grp->sg_lock, "sg_lock", NULL, MTX_DEF);
 108                 grp->sg_pool = pool;
 109                 grp->sg_state = SVCPOOL_ACTIVE;
 110                 TAILQ_INIT(&grp->sg_xlist);
 111                 TAILQ_INIT(&grp->sg_active);
 112                 LIST_INIT(&grp->sg_idlethreads);
 113                 grp->sg_minthreads = 1;
 114                 grp->sg_maxthreads = 1;
 115         }
 116
 117         /*
 118          * Don't use more than a quarter of mbuf clusters.  Nota bene:
 119          * nmbclusters is an int, but nmbclusters*MCLBYTES may overflow
 120          * on LP64 architectures, so cast to u_long to avoid undefined
 121          * behavior.  (ILP32 architectures cannot have nmbclusters
 122          * large enough to overflow for other reasons.)
 123          */
 124         pool->sp_space_high = (u_long)nmbclusters * MCLBYTES / 4;
 125         pool->sp_space_low = (pool->sp_space_high / 3) * 2;
 126
 127         sysctl_ctx_init(&pool->sp_sysctl);
 128         if (IS_DEFAULT_VNET(curvnet) && sysctl_base) {
 129                 SYSCTL_ADD_PROC(&pool->sp_sysctl, sysctl_base, OID_AUTO,
 130                     "minthreads", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 131                     pool, 0, svcpool_minthread_sysctl, "I",
 132                     "Minimal number of threads");
 133                 SYSCTL_ADD_PROC(&pool->sp_sysctl, sysctl_base, OID_AUTO,
 134                     "maxthreads", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 135                     pool, 0, svcpool_maxthread_sysctl, "I",
 136                     "Maximal number of threads");
 137                 SYSCTL_ADD_PROC(&pool->sp_sysctl, sysctl_base, OID_AUTO,
 138                     "threads", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 139                     pool, 0, svcpool_threads_sysctl, "I",
 140                     "Current number of threads");
 141                 SYSCTL_ADD_INT(&pool->sp_sysctl, sysctl_base, OID_AUTO,
 142                     "groups", CTLFLAG_RD, &pool->sp_groupcount, 0,
 143                     "Number of thread groups");
 144
 145                 SYSCTL_ADD_ULONG(&pool->sp_sysctl, sysctl_base, OID_AUTO,
 146                     "request_space_used", CTLFLAG_RD,
 147                     &pool->sp_space_used,
 148                     "Space in parsed but not handled requests.");
 149
 150                 SYSCTL_ADD_ULONG(&pool->sp_sysctl, sysctl_base, OID_AUTO,
 151                     "request_space_used_highest", CTLFLAG_RD,
 152                     &pool->sp_space_used_highest,
 153                     "Highest space used since reboot.");
 154
 155                 SYSCTL_ADD_ULONG(&pool->sp_sysctl, sysctl_base, OID_AUTO,
 156                     "request_space_high", CTLFLAG_RW,
 157                     &pool->sp_space_high,
 158                     "Maximum space in parsed but not handled requests.");
 159
 160                 SYSCTL_ADD_ULONG(&pool->sp_sysctl, sysctl_base, OID_AUTO,
 161                     "request_space_low", CTLFLAG_RW,
 162                     &pool->sp_space_low,
 163                     "Low water mark for request space.");
 164
 165                 SYSCTL_ADD_INT(&pool->sp_sysctl, sysctl_base, OID_AUTO,
 166                     "request_space_throttled", CTLFLAG_RD,
 167                     &pool->sp_space_throttled, 0,
 168                     "Whether nfs requests are currently throttled");
 169
 170                 SYSCTL_ADD_INT(&pool->sp_sysctl, sysctl_base, OID_AUTO,
 171                     "request_space_throttle_count", CTLFLAG_RD,
 172                     &pool->sp_space_throttle_count, 0,
 173                     "Count of times throttling based on request space has occurred");
 174         }
 175
 176         return pool;
 177 }
 178
 179 /*
 180  * Code common to svcpool_destroy() and svcpool_close(), which cleans up
 181  * the pool data structures.
 182  */
 183 static void
 184 svcpool_cleanup(SVCPOOL *pool)
 185 {
 186         SVCGROUP *grp;
 187         SVCXPRT *xprt, *nxprt;
 188         struct svc_callout *s;
 189         struct svc_loss_callout *sl;
 190         struct svcxprt_list cleanup;
 191         int g;
 192
 193         TAILQ_INIT(&cleanup);
 194
 195         for (g = 0; g < SVC_MAXGROUPS; g++) {
 196                 grp = &pool->sp_groups[g];
 197                 mtx_lock(&grp->sg_lock);
 198                 while ((xprt = TAILQ_FIRST(&grp->sg_xlist)) != NULL) {
 199                         xprt_unregister_locked(xprt);
 200                         TAILQ_INSERT_TAIL(&cleanup, xprt, xp_link);
 201                 }
 202                 mtx_unlock(&grp->sg_lock);
 203         }
 204         TAILQ_FOREACH_SAFE(xprt, &cleanup, xp_link, nxprt) {
 205                 if (xprt->xp_socket != NULL)
 206                         soshutdown(xprt->xp_socket, SHUT_WR);
 207                 SVC_RELEASE(xprt);
 208         }
 209
 210         mtx_lock(&pool->sp_lock);
 211         while ((s = TAILQ_FIRST(&pool->sp_callouts)) != NULL) {
 212                 mtx_unlock(&pool->sp_lock);
 213                 svc_unreg(pool, s->sc_prog, s->sc_vers);
 214                 mtx_lock(&pool->sp_lock);
 215         }
 216         while ((sl = TAILQ_FIRST(&pool->sp_lcallouts)) != NULL) {
 217                 mtx_unlock(&pool->sp_lock);
 218                 svc_loss_unreg(pool, sl->slc_dispatch);
 219                 mtx_lock(&pool->sp_lock);
 220         }
 221         mtx_unlock(&pool->sp_lock);
 222 }
 223
 224 void
 225 svcpool_destroy(SVCPOOL *pool)
 226 {
 227         SVCGROUP *grp;
 228         int g;
 229
 230         svcpool_cleanup(pool);
 231
 232         for (g = 0; g < SVC_MAXGROUPS; g++) {
 233                 grp = &pool->sp_groups[g];
 234                 mtx_destroy(&grp->sg_lock);
 235         }
 236         mtx_destroy(&pool->sp_lock);
 237
 238         if (pool->sp_rcache)
 239                 replay_freecache(pool->sp_rcache);
 240
 241         sysctl_ctx_free(&pool->sp_sysctl);
 242         free(pool, M_RPC);
 243 }
 244
 245 /*
 246  * Similar to svcpool_destroy(), except that it does not destroy the actual
 247  * data structures.  As such, "pool" may be used again.
 248  */
 249 void
 250 svcpool_close(SVCPOOL *pool)
 251 {
 252         SVCGROUP *grp;
 253         int g;
 254
 255         svcpool_cleanup(pool);
 256
 257         /* Now, initialize the pool's state for a fresh svc_run() call. */
 258         mtx_lock(&pool->sp_lock);
 259         pool->sp_state = SVCPOOL_INIT;
 260         mtx_unlock(&pool->sp_lock);
 261         for (g = 0; g < SVC_MAXGROUPS; g++) {
 262                 grp = &pool->sp_groups[g];
 263                 mtx_lock(&grp->sg_lock);
 264                 grp->sg_state = SVCPOOL_ACTIVE;
 265                 mtx_unlock(&grp->sg_lock);
 266         }
 267 }
 268
 269 /*
 270  * Sysctl handler to get the present thread count on a pool
 271  */
 272 static int
 273 svcpool_threads_sysctl(SYSCTL_HANDLER_ARGS)
 274 {
 275         SVCPOOL *pool;
 276         int threads, error, g;
 277
 278         pool = oidp->oid_arg1;
 279         threads = 0;
 280         mtx_lock(&pool->sp_lock);
 281         for (g = 0; g < pool->sp_groupcount; g++)
 282                 threads += pool->sp_groups[g].sg_threadcount;
 283         mtx_unlock(&pool->sp_lock);
 284         error = sysctl_handle_int(oidp, &threads, 0, req);
 285         return (error);
 286 }
 287
 288 /*
 289  * Sysctl handler to set the minimum thread count on a pool
 290  */
 291 static int
 292 svcpool_minthread_sysctl(SYSCTL_HANDLER_ARGS)
 293 {
 294         SVCPOOL *pool;
 295         int newminthreads, error, g;
 296
 297         pool = oidp->oid_arg1;
 298         newminthreads = pool->sp_minthreads;
 299         error = sysctl_handle_int(oidp, &newminthreads, 0, req);
 300         if (error == 0 && newminthreads != pool->sp_minthreads) {
 301                 if (newminthreads > pool->sp_maxthreads)
 302                         return (EINVAL);
 303                 mtx_lock(&pool->sp_lock);
 304                 pool->sp_minthreads = newminthreads;
 305                 for (g = 0; g < pool->sp_groupcount; g++) {
 306                         pool->sp_groups[g].sg_minthreads = max(1,
 307                             pool->sp_minthreads / pool->sp_groupcount);
 308                 }
 309                 mtx_unlock(&pool->sp_lock);
 310         }
 311         return (error);
 312 }
 313
 314 /*
 315  * Sysctl handler to set the maximum thread count on a pool
 316  */
 317 static int
 318 svcpool_maxthread_sysctl(SYSCTL_HANDLER_ARGS)
 319 {
 320         SVCPOOL *pool;
 321         int newmaxthreads, error, g;
 322
 323         pool = oidp->oid_arg1;
 324         newmaxthreads = pool->sp_maxthreads;
 325         error = sysctl_handle_int(oidp, &newmaxthreads, 0, req);
 326         if (error == 0 && newmaxthreads != pool->sp_maxthreads) {
 327                 if (newmaxthreads < pool->sp_minthreads)
 328                         return (EINVAL);
 329                 mtx_lock(&pool->sp_lock);
 330                 pool->sp_maxthreads = newmaxthreads;
 331                 for (g = 0; g < pool->sp_groupcount; g++) {
 332                         pool->sp_groups[g].sg_maxthreads = max(1,
 333                             pool->sp_maxthreads / pool->sp_groupcount);
 334                 }
 335                 mtx_unlock(&pool->sp_lock);
 336         }
 337         return (error);
 338 }
 339
 340 /*
 341  * Activate a transport handle.
 342  */
 343 void
 344 xprt_register(SVCXPRT *xprt)
 345 {
 346         SVCPOOL *pool = xprt->xp_pool;
 347         SVCGROUP *grp;
 348         int g;
 349
 350         SVC_ACQUIRE(xprt);
 351         g = atomic_fetchadd_int(&pool->sp_nextgroup, 1) % pool->sp_groupcount;
 352         xprt->xp_group = grp = &pool->sp_groups[g];
 353         mtx_lock(&grp->sg_lock);
 354         xprt->xp_registered = TRUE;
 355         xprt->xp_active = FALSE;
 356         TAILQ_INSERT_TAIL(&grp->sg_xlist, xprt, xp_link);
 357         mtx_unlock(&grp->sg_lock);
 358 }
 359
 360 /*
 361  * De-activate a transport handle. Note: the locked version doesn't
 362  * release the transport - caller must do that after dropping the pool
 363  * lock.
 364  */
 365 static void
 366 xprt_unregister_locked(SVCXPRT *xprt)
 367 {
 368         SVCGROUP *grp = xprt->xp_group;
 369
 370         mtx_assert(&grp->sg_lock, MA_OWNED);
 371         KASSERT(xprt->xp_registered == TRUE,
 372             ("xprt_unregister_locked: not registered"));
 373         xprt_inactive_locked(xprt);
 374         TAILQ_REMOVE(&grp->sg_xlist, xprt, xp_link);
 375         xprt->xp_registered = FALSE;
 376 }
 377
 378 void
 379 xprt_unregister(SVCXPRT *xprt)
 380 {
 381         SVCGROUP *grp = xprt->xp_group;
 382
 383         mtx_lock(&grp->sg_lock);
 384         if (xprt->xp_registered == FALSE) {
 385                 /* Already unregistered by another thread */
 386                 mtx_unlock(&grp->sg_lock);
 387                 return;
 388         }
 389         xprt_unregister_locked(xprt);
 390         mtx_unlock(&grp->sg_lock);
 391
 392         if (xprt->xp_socket != NULL)
 393                 soshutdown(xprt->xp_socket, SHUT_WR);
 394         SVC_RELEASE(xprt);
 395 }
 396
 397 /*
 398  * Attempt to assign a service thread to this transport.
 399  */
 400 static int
 401 xprt_assignthread(SVCXPRT *xprt)
 402 {
 403         SVCGROUP *grp = xprt->xp_group;
 404         SVCTHREAD *st;
 405
 406         mtx_assert(&grp->sg_lock, MA_OWNED);
 407         st = LIST_FIRST(&grp->sg_idlethreads);
 408         if (st) {
 409                 LIST_REMOVE(st, st_ilink);
 410                 SVC_ACQUIRE(xprt);
 411                 xprt->xp_thread = st;
 412                 st->st_xprt = xprt;
 413                 cv_signal(&st->st_cond);
 414                 return (TRUE);
 415         } else {
 416                 /*
 417                  * See if we can create a new thread. The
 418                  * actual thread creation happens in
 419                  * svc_run_internal because our locking state
 420                  * is poorly defined (we are typically called
 421                  * from a socket upcall). Don't create more
 422                  * than one thread per second.
 423                  */
 424                 if (grp->sg_state == SVCPOOL_ACTIVE
 425                     && grp->sg_lastcreatetime < time_uptime
 426                     && grp->sg_threadcount < grp->sg_maxthreads) {
 427                         grp->sg_state = SVCPOOL_THREADWANTED;
 428                 }
 429         }
 430         return (FALSE);
 431 }
 432
 433 void
 434 xprt_active(SVCXPRT *xprt)
 435 {
 436         SVCGROUP *grp = xprt->xp_group;
 437
 438         mtx_lock(&grp->sg_lock);
 439
 440         if (!xprt->xp_registered) {
 441                 /*
 442                  * Race with xprt_unregister - we lose.
 443                  */
 444                 mtx_unlock(&grp->sg_lock);
 445                 return;
 446         }
 447
 448         if (!xprt->xp_active) {
 449                 xprt->xp_active = TRUE;
 450                 if (xprt->xp_thread == NULL) {
 451                         if (!svc_request_space_available(xprt->xp_pool) ||
 452                             !xprt_assignthread(xprt))
 453                                 TAILQ_INSERT_TAIL(&grp->sg_active, xprt,
 454                                     xp_alink);
 455                 }
 456         }
 457
 458         mtx_unlock(&grp->sg_lock);
 459 }
 460
 461 void
 462 xprt_inactive_locked(SVCXPRT *xprt)
 463 {
 464         SVCGROUP *grp = xprt->xp_group;
 465
 466         mtx_assert(&grp->sg_lock, MA_OWNED);
 467         if (xprt->xp_active) {
 468                 if (xprt->xp_thread == NULL)
 469                         TAILQ_REMOVE(&grp->sg_active, xprt, xp_alink);
 470                 xprt->xp_active = FALSE;
 471         }
 472 }
 473
 474 void
 475 xprt_inactive(SVCXPRT *xprt)
 476 {
 477         SVCGROUP *grp = xprt->xp_group;
 478
 479         mtx_lock(&grp->sg_lock);
 480         xprt_inactive_locked(xprt);
 481         mtx_unlock(&grp->sg_lock);
 482 }
 483
 484 /*
 485  * Variant of xprt_inactive() for use only when sure that port is
 486  * assigned to thread. For example, within receive handlers.
 487  */
 488 void
 489 xprt_inactive_self(SVCXPRT *xprt)
 490 {
 491
 492         KASSERT(xprt->xp_thread != NULL,
 493             ("xprt_inactive_self(%p) with NULL xp_thread", xprt));
 494         xprt->xp_active = FALSE;
 495 }
 496
 497 /*
 498  * Add a service program to the callout list.
 499  * The dispatch routine will be called when a rpc request for this
 500  * program number comes in.
 501  */
 502 bool_t
 503 svc_reg(SVCXPRT *xprt, const rpcprog_t prog, const rpcvers_t vers,
 504     void (*dispatch)(struct svc_req *, SVCXPRT *),
 505     const struct netconfig *nconf)
 506 {
 507         SVCPOOL *pool = xprt->xp_pool;
 508         struct svc_callout *s;
 509         char *netid = NULL;
 510         int flag = 0;
 511
 512 /* VARIABLES PROTECTED BY svc_lock: s, svc_head */
 513
 514         if (xprt->xp_netid) {
 515                 netid = strdup(xprt->xp_netid, M_RPC);
 516                 flag = 1;
 517         } else if (nconf && nconf->nc_netid) {
 518                 netid = strdup(nconf->nc_netid, M_RPC);
 519                 flag = 1;
 520         } /* must have been created with svc_raw_create */
 521         if ((netid == NULL) && (flag == 1)) {
 522                 return (FALSE);
 523         }
 524
 525         mtx_lock(&pool->sp_lock);
 526         if ((s = svc_find(pool, prog, vers, netid)) != NULL) {
 527                 if (netid)
 528                         free(netid, M_RPC);
 529                 if (s->sc_dispatch == dispatch)
 530                         goto rpcb_it; /* he is registering another xptr */
 531                 mtx_unlock(&pool->sp_lock);
 532                 return (FALSE);
 533         }
 534         s = malloc(sizeof (struct svc_callout), M_RPC, M_NOWAIT);
 535         if (s == NULL) {
 536                 if (netid)
 537                         free(netid, M_RPC);
 538                 mtx_unlock(&pool->sp_lock);
 539                 return (FALSE);
 540         }
 541
 542         s->sc_prog = prog;
 543         s->sc_vers = vers;
 544         s->sc_dispatch = dispatch;
 545         s->sc_netid = netid;
 546         TAILQ_INSERT_TAIL(&pool->sp_callouts, s, sc_link);
 547
 548         if ((xprt->xp_netid == NULL) && (flag == 1) && netid)
 549                 ((SVCXPRT *) xprt)->xp_netid = strdup(netid, M_RPC);
 550
 551 rpcb_it:
 552         mtx_unlock(&pool->sp_lock);
 553         /* now register the information with the local binder service */
 554         if (nconf) {
 555                 bool_t dummy;
 556                 struct netconfig tnc;
 557                 struct netbuf nb;
 558                 tnc = *nconf;
 559                 nb.buf = &xprt->xp_ltaddr;
 560                 nb.len = xprt->xp_ltaddr.ss_len;
 561                 dummy = rpcb_set(prog, vers, &tnc, &nb);
 562                 return (dummy);
 563         }
 564         return (TRUE);
 565 }
 566
 567 /*
 568  * Remove a service program from the callout list.
 569  */
 570 void
 571 svc_unreg(SVCPOOL *pool, const rpcprog_t prog, const rpcvers_t vers)
 572 {
 573         struct svc_callout *s;
 574
 575         /* unregister the information anyway */
 576         (void) rpcb_unset(prog, vers, NULL);
 577         mtx_lock(&pool->sp_lock);
 578         while ((s = svc_find(pool, prog, vers, NULL)) != NULL) {
 579                 TAILQ_REMOVE(&pool->sp_callouts, s, sc_link);
 580                 if (s->sc_netid)
 581                         mem_free(s->sc_netid, sizeof (s->sc_netid) + 1);
 582                 mem_free(s, sizeof (struct svc_callout));
 583         }
 584         mtx_unlock(&pool->sp_lock);
 585 }
 586
 587 /*
 588  * Add a service connection loss program to the callout list.
 589  * The dispatch routine will be called when some port in ths pool die.
 590  */
 591 bool_t
 592 svc_loss_reg(SVCXPRT *xprt, void (*dispatch)(SVCXPRT *))
 593 {
 594         SVCPOOL *pool = xprt->xp_pool;
 595         struct svc_loss_callout *s;
 596
 597         mtx_lock(&pool->sp_lock);
 598         TAILQ_FOREACH(s, &pool->sp_lcallouts, slc_link) {
 599                 if (s->slc_dispatch == dispatch)
 600                         break;
 601         }
 602         if (s != NULL) {
 603                 mtx_unlock(&pool->sp_lock);
 604                 return (TRUE);
 605         }
 606         s = malloc(sizeof(struct svc_loss_callout), M_RPC, M_NOWAIT);
 607         if (s == NULL) {
 608                 mtx_unlock(&pool->sp_lock);
 609                 return (FALSE);
 610         }
 611         s->slc_dispatch = dispatch;
 612         TAILQ_INSERT_TAIL(&pool->sp_lcallouts, s, slc_link);
 613         mtx_unlock(&pool->sp_lock);
 614         return (TRUE);
 615 }
 616
 617 /*
 618  * Remove a service connection loss program from the callout list.
 619  */
 620 void
 621 svc_loss_unreg(SVCPOOL *pool, void (*dispatch)(SVCXPRT *))
 622 {
 623         struct svc_loss_callout *s;
 624
 625         mtx_lock(&pool->sp_lock);
 626         TAILQ_FOREACH(s, &pool->sp_lcallouts, slc_link) {
 627                 if (s->slc_dispatch == dispatch) {
 628                         TAILQ_REMOVE(&pool->sp_lcallouts, s, slc_link);
 629                         free(s, M_RPC);
 630                         break;
 631                 }
 632         }
 633         mtx_unlock(&pool->sp_lock);
 634 }
 635
 636 /* ********************** CALLOUT list related stuff ************* */
 637
 638 /*
 639  * Search the callout list for a program number, return the callout
 640  * struct.
 641  */
 642 static struct svc_callout *
 643 svc_find(SVCPOOL *pool, rpcprog_t prog, rpcvers_t vers, char *netid)
 644 {
 645         struct svc_callout *s;
 646
 647         mtx_assert(&pool->sp_lock, MA_OWNED);
 648         TAILQ_FOREACH(s, &pool->sp_callouts, sc_link) {
 649                 if (s->sc_prog == prog && s->sc_vers == vers
 650                     && (netid == NULL || s->sc_netid == NULL ||
 651                         strcmp(netid, s->sc_netid) == 0))
 652                         break;
 653         }
 654
 655         return (s);
 656 }
 657
 658 /* ******************* REPLY GENERATION ROUTINES  ************ */
 659
 660 static bool_t
 661 svc_sendreply_common(struct svc_req *rqstp, struct rpc_msg *rply,
 662     struct mbuf *body)
 663 {
 664         SVCXPRT *xprt = rqstp->rq_xprt;
 665         bool_t ok;
 666
 667         if (rqstp->rq_args) {
 668                 m_freem(rqstp->rq_args);
 669                 rqstp->rq_args = NULL;
 670         }
 671
 672         if (xprt->xp_pool->sp_rcache)
 673                 replay_setreply(xprt->xp_pool->sp_rcache,
 674                     rply, svc_getrpccaller(rqstp), body);
 675
 676         if (!SVCAUTH_WRAP(&rqstp->rq_auth, &body))
 677                 return (FALSE);
 678
 679         ok = SVC_REPLY(xprt, rply, rqstp->rq_addr, body, &rqstp->rq_reply_seq);
 680         if (rqstp->rq_addr) {
 681                 free(rqstp->rq_addr, M_SONAME);
 682                 rqstp->rq_addr = NULL;
 683         }
 684
 685         return (ok);
 686 }
 687
 688 /*
 689  * Send a reply to an rpc request
 690  */
 691 bool_t
 692 svc_sendreply(struct svc_req *rqstp, xdrproc_t xdr_results, void * xdr_location)
 693 {
 694         struct rpc_msg rply;
 695         struct mbuf *m;
 696         XDR xdrs;
 697         bool_t ok;
 698
 699         rply.rm_xid = rqstp->rq_xid;
 700         rply.rm_direction = REPLY;
 701         rply.rm_reply.rp_stat = MSG_ACCEPTED;
 702         rply.acpted_rply.ar_verf = rqstp->rq_verf;
 703         rply.acpted_rply.ar_stat = SUCCESS;
 704         rply.acpted_rply.ar_results.where = NULL;
 705         rply.acpted_rply.ar_results.proc = (xdrproc_t) xdr_void;
 706
 707         m = m_getcl(M_WAITOK, MT_DATA, 0);
 708         xdrmbuf_create(&xdrs, m, XDR_ENCODE);
 709         ok = xdr_results(&xdrs, xdr_location);
 710         XDR_DESTROY(&xdrs);
 711
 712         if (ok) {
 713                 return (svc_sendreply_common(rqstp, &rply, m));
 714         } else {
 715                 m_freem(m);
 716                 return (FALSE);
 717         }
 718 }
 719
 720 bool_t
 721 svc_sendreply_mbuf(struct svc_req *rqstp, struct mbuf *m)
 722 {
 723         struct rpc_msg rply;
 724
 725         rply.rm_xid = rqstp->rq_xid;
 726         rply.rm_direction = REPLY;
 727         rply.rm_reply.rp_stat = MSG_ACCEPTED;
 728         rply.acpted_rply.ar_verf = rqstp->rq_verf;
 729         rply.acpted_rply.ar_stat = SUCCESS;
 730         rply.acpted_rply.ar_results.where = NULL;
 731         rply.acpted_rply.ar_results.proc = (xdrproc_t) xdr_void;
 732
 733         return (svc_sendreply_common(rqstp, &rply, m));
 734 }
 735
 736 /*
 737  * No procedure error reply
 738  */
 739 void
 740 svcerr_noproc(struct svc_req *rqstp)
 741 {
 742         SVCXPRT *xprt = rqstp->rq_xprt;
 743         struct rpc_msg rply;
 744
 745         rply.rm_xid = rqstp->rq_xid;
 746         rply.rm_direction = REPLY;
 747         rply.rm_reply.rp_stat = MSG_ACCEPTED;
 748         rply.acpted_rply.ar_verf = rqstp->rq_verf;
 749         rply.acpted_rply.ar_stat = PROC_UNAVAIL;
 750
 751         if (xprt->xp_pool->sp_rcache)
 752                 replay_setreply(xprt->xp_pool->sp_rcache,
 753                     &rply, svc_getrpccaller(rqstp), NULL);
 754
 755         svc_sendreply_common(rqstp, &rply, NULL);
 756 }
 757
 758 /*
 759  * Can't decode args error reply
 760  */
 761 void
 762 svcerr_decode(struct svc_req *rqstp)
 763 {
 764         SVCXPRT *xprt = rqstp->rq_xprt;
 765         struct rpc_msg rply;
 766
 767         rply.rm_xid = rqstp->rq_xid;
 768         rply.rm_direction = REPLY;
 769         rply.rm_reply.rp_stat = MSG_ACCEPTED;
 770         rply.acpted_rply.ar_verf = rqstp->rq_verf;
 771         rply.acpted_rply.ar_stat = GARBAGE_ARGS;
 772
 773         if (xprt->xp_pool->sp_rcache)
 774                 replay_setreply(xprt->xp_pool->sp_rcache,
 775                     &rply, (struct sockaddr *) &xprt->xp_rtaddr, NULL);
 776
 777         svc_sendreply_common(rqstp, &rply, NULL);
 778 }
 779
 780 /*
 781  * Some system error
 782  */
 783 void
 784 svcerr_systemerr(struct svc_req *rqstp)
 785 {
 786         SVCXPRT *xprt = rqstp->rq_xprt;
 787         struct rpc_msg rply;
 788
 789         rply.rm_xid = rqstp->rq_xid;
 790         rply.rm_direction = REPLY;
 791         rply.rm_reply.rp_stat = MSG_ACCEPTED;
 792         rply.acpted_rply.ar_verf = rqstp->rq_verf;
 793         rply.acpted_rply.ar_stat = SYSTEM_ERR;
 794
 795         if (xprt->xp_pool->sp_rcache)
 796                 replay_setreply(xprt->xp_pool->sp_rcache,
 797                     &rply, svc_getrpccaller(rqstp), NULL);
 798
 799         svc_sendreply_common(rqstp, &rply, NULL);
 800 }
 801
 802 /*
 803  * Authentication error reply
 804  */
 805 void
 806 svcerr_auth(struct svc_req *rqstp, enum auth_stat why)
 807 {
 808         SVCXPRT *xprt = rqstp->rq_xprt;
 809         struct rpc_msg rply;
 810
 811         rply.rm_xid = rqstp->rq_xid;
 812         rply.rm_direction = REPLY;
 813         rply.rm_reply.rp_stat = MSG_DENIED;
 814         rply.rjcted_rply.rj_stat = AUTH_ERROR;
 815         rply.rjcted_rply.rj_why = why;
 816
 817         if (xprt->xp_pool->sp_rcache)
 818                 replay_setreply(xprt->xp_pool->sp_rcache,
 819                     &rply, svc_getrpccaller(rqstp), NULL);
 820
 821         svc_sendreply_common(rqstp, &rply, NULL);
 822 }
 823
 824 /*
 825  * Auth too weak error reply
 826  */
 827 void
 828 svcerr_weakauth(struct svc_req *rqstp)
 829 {
 830
 831         svcerr_auth(rqstp, AUTH_TOOWEAK);
 832 }
 833
 834 /*
 835  * Program unavailable error reply
 836  */
 837 void
 838 svcerr_noprog(struct svc_req *rqstp)
 839 {
 840         SVCXPRT *xprt = rqstp->rq_xprt;
 841         struct rpc_msg rply;
 842
 843         rply.rm_xid = rqstp->rq_xid;
 844         rply.rm_direction = REPLY;
 845         rply.rm_reply.rp_stat = MSG_ACCEPTED;
 846         rply.acpted_rply.ar_verf = rqstp->rq_verf;
 847         rply.acpted_rply.ar_stat = PROG_UNAVAIL;
 848
 849         if (xprt->xp_pool->sp_rcache)
 850                 replay_setreply(xprt->xp_pool->sp_rcache,
 851                     &rply, svc_getrpccaller(rqstp), NULL);
 852
 853         svc_sendreply_common(rqstp, &rply, NULL);
 854 }
 855
 856 /*
 857  * Program version mismatch error reply
 858  */
 859 void
 860 svcerr_progvers(struct svc_req *rqstp, rpcvers_t low_vers, rpcvers_t high_vers)
 861 {
 862         SVCXPRT *xprt = rqstp->rq_xprt;
 863         struct rpc_msg rply;
 864
 865         rply.rm_xid = rqstp->rq_xid;
 866         rply.rm_direction = REPLY;
 867         rply.rm_reply.rp_stat = MSG_ACCEPTED;
 868         rply.acpted_rply.ar_verf = rqstp->rq_verf;
 869         rply.acpted_rply.ar_stat = PROG_MISMATCH;
 870         rply.acpted_rply.ar_vers.low = (uint32_t)low_vers;
 871         rply.acpted_rply.ar_vers.high = (uint32_t)high_vers;
 872
 873         if (xprt->xp_pool->sp_rcache)
 874                 replay_setreply(xprt->xp_pool->sp_rcache,
 875                     &rply, svc_getrpccaller(rqstp), NULL);
 876
 877         svc_sendreply_common(rqstp, &rply, NULL);
 878 }
 879
 880 /*
 881  * Allocate a new server transport structure. All fields are
 882  * initialized to zero and xp_p3 is initialized to point at an
 883  * extension structure to hold various flags and authentication
 884  * parameters.
 885  */
 886 SVCXPRT *
 887 svc_xprt_alloc(void)
 888 {
 889         SVCXPRT *xprt;
 890         SVCXPRT_EXT *ext;
 891
 892         xprt = mem_alloc(sizeof(SVCXPRT));
 893         ext = mem_alloc(sizeof(SVCXPRT_EXT));
 894         xprt->xp_p3 = ext;
 895         refcount_init(&xprt->xp_refs, 1);
 896
 897         return (xprt);
 898 }
 899
 900 /*
 901  * Free a server transport structure.
 902  */
 903 void
 904 svc_xprt_free(SVCXPRT *xprt)
 905 {
 906
 907         mem_free(xprt->xp_p3, sizeof(SVCXPRT_EXT));
 908         /* The size argument is ignored, so 0 is ok. */
 909         mem_free(xprt->xp_gidp, 0);
 910         mem_free(xprt, sizeof(SVCXPRT));
 911 }
 912
 913 /* ******************* SERVER INPUT STUFF ******************* */
 914
 915 /*
 916  * Read RPC requests from a transport and queue them to be
 917  * executed. We handle authentication and replay cache replies here.
 918  * Actually dispatching the RPC is deferred till svc_executereq.
 919  */
 920 static enum xprt_stat
 921 svc_getreq(SVCXPRT *xprt, struct svc_req **rqstp_ret)
 922 {
 923         SVCPOOL *pool = xprt->xp_pool;
 924         struct svc_req *r;
 925         struct rpc_msg msg;
 926         struct mbuf *args;
 927         struct svc_loss_callout *s;
 928         enum xprt_stat stat;
 929
 930         /* now receive msgs from xprtprt (support batch calls) */
 931         r = malloc(sizeof(*r), M_RPC, M_WAITOK|M_ZERO);
 932
 933         msg.rm_call.cb_cred.oa_base = r->rq_credarea;
 934         msg.rm_call.cb_verf.oa_base = &r->rq_credarea[MAX_AUTH_BYTES];
 935         r->rq_clntcred = &r->rq_credarea[2*MAX_AUTH_BYTES];
 936         if (SVC_RECV(xprt, &msg, &r->rq_addr, &args)) {
 937                 enum auth_stat why;
 938
 939                 /*
 940                  * Handle replays and authenticate before queuing the
 941                  * request to be executed.
 942                  */
 943                 SVC_ACQUIRE(xprt);
 944                 r->rq_xprt = xprt;
 945                 if (pool->sp_rcache) {
 946                         struct rpc_msg repmsg;
 947                         struct mbuf *repbody;
 948                         enum replay_state rs;
 949                         rs = replay_find(pool->sp_rcache, &msg,
 950                             svc_getrpccaller(r), &repmsg, &repbody);
 951                         switch (rs) {
 952                         case RS_NEW:
 953                                 break;
 954                         case RS_DONE:
 955                                 SVC_REPLY(xprt, &repmsg, r->rq_addr,
 956                                     repbody, &r->rq_reply_seq);
 957                                 if (r->rq_addr) {
 958                                         free(r->rq_addr, M_SONAME);
 959                                         r->rq_addr = NULL;
 960                                 }
 961                                 m_freem(args);
 962                                 goto call_done;
 963
 964                         default:
 965                                 m_freem(args);
 966                                 goto call_done;
 967                         }
 968                 }
 969
 970                 r->rq_xid = msg.rm_xid;
 971                 r->rq_prog = msg.rm_call.cb_prog;
 972                 r->rq_vers = msg.rm_call.cb_vers;
 973                 r->rq_proc = msg.rm_call.cb_proc;
 974                 r->rq_size = sizeof(*r) + m_length(args, NULL);
 975                 r->rq_args = args;
 976                 if ((why = _authenticate(r, &msg)) != AUTH_OK) {
 977                         /*
 978                          * RPCSEC_GSS uses this return code
 979                          * for requests that form part of its
 980                          * context establishment protocol and
 981                          * should not be dispatched to the
 982                          * application.
 983                          */
 984                         if (why != RPCSEC_GSS_NODISPATCH)
 985                                 svcerr_auth(r, why);
 986                         goto call_done;
 987                 }
 988
 989                 if (!SVCAUTH_UNWRAP(&r->rq_auth, &r->rq_args)) {
 990                         svcerr_decode(r);
 991                         goto call_done;
 992                 }
 993
 994                 /*
 995                  * Everything checks out, return request to caller.
 996                  */
 997                 *rqstp_ret = r;
 998                 r = NULL;
 999         }
1000 call_done:
1001         if (r) {
1002                 svc_freereq(r);
1003                 r = NULL;
1004         }
1005         if ((stat = SVC_STAT(xprt)) == XPRT_DIED) {
1006                 TAILQ_FOREACH(s, &pool->sp_lcallouts, slc_link)
1007                         (*s->slc_dispatch)(xprt);
1008                 xprt_unregister(xprt);
1009         }
1010
1011         return (stat);
1012 }
1013
1014 static void
1015 svc_executereq(struct svc_req *rqstp)
1016 {
1017         SVCXPRT *xprt = rqstp->rq_xprt;
1018         SVCPOOL *pool = xprt->xp_pool;
1019         int prog_found;
1020         rpcvers_t low_vers;
1021         rpcvers_t high_vers;
1022         struct svc_callout *s;
1023
1024         /* now match message with a registered service*/
1025         prog_found = FALSE;
1026         low_vers = (rpcvers_t) -1L;
1027         high_vers = (rpcvers_t) 0L;
1028         TAILQ_FOREACH(s, &pool->sp_callouts, sc_link) {
1029                 if (s->sc_prog == rqstp->rq_prog) {
1030                         if (s->sc_vers == rqstp->rq_vers) {
1031                                 /*
1032                                  * We hand ownership of r to the
1033                                  * dispatch method - they must call
1034                                  * svc_freereq.
1035                                  */
1036                                 (*s->sc_dispatch)(rqstp, xprt);
1037                                 return;
1038                         }  /* found correct version */
1039                         prog_found = TRUE;
1040                         if (s->sc_vers < low_vers)
1041                                 low_vers = s->sc_vers;
1042                         if (s->sc_vers > high_vers)
1043                                 high_vers = s->sc_vers;
1044                 }   /* found correct program */
1045         }
1046
1047         /*
1048          * if we got here, the program or version
1049          * is not served ...
1050          */
1051         if (prog_found)
1052                 svcerr_progvers(rqstp, low_vers, high_vers);
1053         else
1054                 svcerr_noprog(rqstp);
1055
1056         svc_freereq(rqstp);
1057 }
1058
1059 static void
1060 svc_checkidle(SVCGROUP *grp)
1061 {
1062         SVCXPRT *xprt, *nxprt;
1063         time_t timo;
1064         struct svcxprt_list cleanup;
1065
1066         TAILQ_INIT(&cleanup);
1067         TAILQ_FOREACH_SAFE(xprt, &grp->sg_xlist, xp_link, nxprt) {
1068                 /*
1069                  * Only some transports have idle timers. Don't time
1070                  * something out which is just waking up.
1071                  */
1072                 if (!xprt->xp_idletimeout || xprt->xp_thread)
1073                         continue;
1074
1075                 timo = xprt->xp_lastactive + xprt->xp_idletimeout;
1076                 if (time_uptime > timo) {
1077                         xprt_unregister_locked(xprt);
1078                         TAILQ_INSERT_TAIL(&cleanup, xprt, xp_link);
1079                 }
1080         }
1081
1082         mtx_unlock(&grp->sg_lock);
1083         TAILQ_FOREACH_SAFE(xprt, &cleanup, xp_link, nxprt) {
1084                 soshutdown(xprt->xp_socket, SHUT_WR);
1085                 SVC_RELEASE(xprt);
1086         }
1087         mtx_lock(&grp->sg_lock);
1088 }
1089
1090 static void
1091 svc_assign_waiting_sockets(SVCPOOL *pool)
1092 {
1093         SVCGROUP *grp;
1094         SVCXPRT *xprt;
1095         int g;
1096
1097         for (g = 0; g < pool->sp_groupcount; g++) {
1098                 grp = &pool->sp_groups[g];
1099                 mtx_lock(&grp->sg_lock);
1100                 while ((xprt = TAILQ_FIRST(&grp->sg_active)) != NULL) {
1101                         if (xprt_assignthread(xprt))
1102                                 TAILQ_REMOVE(&grp->sg_active, xprt, xp_alink);
1103                         else
1104                                 break;
1105                 }
1106                 mtx_unlock(&grp->sg_lock);
1107         }
1108 }
1109
1110 static void
1111 svc_change_space_used(SVCPOOL *pool, long delta)
1112 {
1113         unsigned long value;
1114
1115         value = atomic_fetchadd_long(&pool->sp_space_used, delta) + delta;
1116         if (delta > 0) {
1117                 if (value >= pool->sp_space_high && !pool->sp_space_throttled) {
1118                         pool->sp_space_throttled = TRUE;
1119                         pool->sp_space_throttle_count++;
1120                 }
1121                 if (value > pool->sp_space_used_highest)
1122                         pool->sp_space_used_highest = value;
1123         } else {
1124                 if (value < pool->sp_space_low && pool->sp_space_throttled) {
1125                         pool->sp_space_throttled = FALSE;
1126                         svc_assign_waiting_sockets(pool);
1127                 }
1128         }
1129 }
1130
1131 static bool_t
1132 svc_request_space_available(SVCPOOL *pool)
1133 {
1134
1135         if (pool->sp_space_throttled)
1136                 return (FALSE);
1137         return (TRUE);
1138 }
1139
1140 static void
1141 svc_run_internal(SVCGROUP *grp, bool_t ismaster)
1142 {
1143         SVCPOOL *pool = grp->sg_pool;
1144         SVCTHREAD *st, *stpref;
1145         SVCXPRT *xprt;
1146         enum xprt_stat stat;
1147         struct svc_req *rqstp;
1148         struct proc *p;
1149         long sz;
1150         int error;
1151
1152         st = mem_alloc(sizeof(*st));
1153         mtx_init(&st->st_lock, "st_lock", NULL, MTX_DEF);
1154         st->st_pool = pool;
1155         st->st_xprt = NULL;
1156         STAILQ_INIT(&st->st_reqs);
1157         cv_init(&st->st_cond, "rpcsvc");
1158
1159         mtx_lock(&grp->sg_lock);
1160
1161         /*
1162          * If we are a new thread which was spawned to cope with
1163          * increased load, set the state back to SVCPOOL_ACTIVE.
1164          */
1165         if (grp->sg_state == SVCPOOL_THREADSTARTING)
1166                 grp->sg_state = SVCPOOL_ACTIVE;
1167
1168         while (grp->sg_state != SVCPOOL_CLOSING) {
1169                 /*
1170                  * Create new thread if requested.
1171                  */
1172                 if (grp->sg_state == SVCPOOL_THREADWANTED) {
1173                         grp->sg_state = SVCPOOL_THREADSTARTING;
1174                         grp->sg_lastcreatetime = time_uptime;
1175                         mtx_unlock(&grp->sg_lock);
1176                         svc_new_thread(grp);
1177                         mtx_lock(&grp->sg_lock);
1178                         continue;
1179                 }
1180
1181                 /*
1182                  * Check for idle transports once per second.
1183                  */
1184                 if (time_uptime > grp->sg_lastidlecheck) {
1185                         grp->sg_lastidlecheck = time_uptime;
1186                         svc_checkidle(grp);
1187                 }
1188
1189                 xprt = st->st_xprt;
1190                 if (!xprt) {
1191                         /*
1192                          * Enforce maxthreads count.
1193                          */
1194                         if (!ismaster && grp->sg_threadcount >
1195                             grp->sg_maxthreads)
1196                                 break;
1197
1198                         /*
1199                          * Before sleeping, see if we can find an
1200                          * active transport which isn't being serviced
1201                          * by a thread.
1202                          */
1203                         if (svc_request_space_available(pool) &&
1204                             (xprt = TAILQ_FIRST(&grp->sg_active)) != NULL) {
1205                                 TAILQ_REMOVE(&grp->sg_active, xprt, xp_alink);
1206                                 SVC_ACQUIRE(xprt);
1207                                 xprt->xp_thread = st;
1208                                 st->st_xprt = xprt;
1209                                 continue;
1210                         }
1211
1212                         LIST_INSERT_HEAD(&grp->sg_idlethreads, st, st_ilink);
1213                         if (ismaster || (!ismaster &&
1214                             grp->sg_threadcount > grp->sg_minthreads))
1215                                 error = cv_timedwait_sig(&st->st_cond,
1216                                     &grp->sg_lock, 5 * hz);
1217                         else
1218                                 error = cv_wait_sig(&st->st_cond,
1219                                     &grp->sg_lock);
1220                         if (st->st_xprt == NULL)
1221                                 LIST_REMOVE(st, st_ilink);
1222
1223                         /*
1224                          * Reduce worker thread count when idle.
1225                          */
1226                         if (error == EWOULDBLOCK) {
1227                                 if (!ismaster
1228                                     && (grp->sg_threadcount
1229                                         > grp->sg_minthreads)
1230                                         && !st->st_xprt)
1231                                         break;
1232                         } else if (error != 0) {
1233                                 KASSERT(error == EINTR || error == ERESTART,
1234                                     ("non-signal error %d", error));
1235                                 mtx_unlock(&grp->sg_lock);
1236                                 p = curproc;
1237                                 PROC_LOCK(p);
1238                                 if (P_SHOULDSTOP(p) ||
1239                                     (p->p_flag & P_TOTAL_STOP) != 0) {
1240                                         thread_suspend_check(0);
1241                                         PROC_UNLOCK(p);
1242                                         mtx_lock(&grp->sg_lock);
1243                                 } else {
1244                                         PROC_UNLOCK(p);
1245                                         svc_exit(pool);
1246                                         mtx_lock(&grp->sg_lock);
1247                                         break;
1248                                 }
1249                         }
1250                         continue;
1251                 }
1252                 mtx_unlock(&grp->sg_lock);
1253
1254                 /*
1255                  * Drain the transport socket and queue up any RPCs.
1256                  */
1257                 xprt->xp_lastactive = time_uptime;
1258                 do {
1259                         if (!svc_request_space_available(pool))
1260                                 break;
1261                         rqstp = NULL;
1262                         stat = svc_getreq(xprt, &rqstp);
1263                         if (rqstp) {
1264                                 svc_change_space_used(pool, rqstp->rq_size);
1265                                 /*
1266                                  * See if the application has a preference
1267                                  * for some other thread.
1268                                  */
1269                                 if (pool->sp_assign) {
1270                                         stpref = pool->sp_assign(st, rqstp);
1271                                         rqstp->rq_thread = stpref;
1272                                         STAILQ_INSERT_TAIL(&stpref->st_reqs,
1273                                             rqstp, rq_link);
1274                                         mtx_unlock(&stpref->st_lock);
1275                                         if (stpref != st)
1276                                                 rqstp = NULL;
1277                                 } else {
1278                                         rqstp->rq_thread = st;
1279                                         STAILQ_INSERT_TAIL(&st->st_reqs,
1280                                             rqstp, rq_link);
1281                                 }
1282                         }
1283                 } while (rqstp == NULL && stat == XPRT_MOREREQS
1284                     && grp->sg_state != SVCPOOL_CLOSING);
1285
1286                 /*
1287                  * Move this transport to the end of the active list to
1288                  * ensure fairness when multiple transports are active.
1289                  * If this was the last queued request, svc_getreq will end
1290                  * up calling xprt_inactive to remove from the active list.
1291                  */
1292                 mtx_lock(&grp->sg_lock);
1293                 xprt->xp_thread = NULL;
1294                 st->st_xprt = NULL;
1295                 if (xprt->xp_active) {
1296                         if (!svc_request_space_available(pool) ||
1297                             !xprt_assignthread(xprt))
1298                                 TAILQ_INSERT_TAIL(&grp->sg_active,
1299                                     xprt, xp_alink);
1300                 }
1301                 mtx_unlock(&grp->sg_lock);
1302                 SVC_RELEASE(xprt);
1303
1304                 /*
1305                  * Execute what we have queued.
1306                  */
1307                 mtx_lock(&st->st_lock);
1308                 while ((rqstp = STAILQ_FIRST(&st->st_reqs)) != NULL) {
1309                         STAILQ_REMOVE_HEAD(&st->st_reqs, rq_link);
1310                         mtx_unlock(&st->st_lock);
1311                         sz = (long)rqstp->rq_size;
1312                         svc_executereq(rqstp);
1313                         svc_change_space_used(pool, -sz);
1314                         mtx_lock(&st->st_lock);
1315                 }
1316                 mtx_unlock(&st->st_lock);
1317                 mtx_lock(&grp->sg_lock);
1318         }
1319
1320         if (st->st_xprt) {
1321                 xprt = st->st_xprt;
1322                 st->st_xprt = NULL;
1323                 SVC_RELEASE(xprt);
1324         }
1325         KASSERT(STAILQ_EMPTY(&st->st_reqs), ("stray reqs on exit"));
1326         mtx_destroy(&st->st_lock);
1327         cv_destroy(&st->st_cond);
1328         mem_free(st, sizeof(*st));
1329
1330         grp->sg_threadcount--;
1331         if (!ismaster)
1332                 wakeup(grp);
1333         mtx_unlock(&grp->sg_lock);
1334 }
1335
1336 static void
1337 svc_thread_start(void *arg)
1338 {
1339
1340         svc_run_internal((SVCGROUP *) arg, FALSE);
1341         kthread_exit();
1342 }
1343
1344 static void
1345 svc_new_thread(SVCGROUP *grp)
1346 {
1347         SVCPOOL *pool = grp->sg_pool;
1348         struct thread *td;
1349
1350         mtx_lock(&grp->sg_lock);
1351         grp->sg_threadcount++;
1352         mtx_unlock(&grp->sg_lock);
1353         kthread_add(svc_thread_start, grp, pool->sp_proc, &td, 0, 0,
1354             "%s: service", pool->sp_name);
1355 }
1356
1357 void
1358 svc_run(SVCPOOL *pool)
1359 {
1360         int g, i;
1361         struct proc *p;
1362         struct thread *td;
1363         SVCGROUP *grp;
1364
1365         p = curproc;
1366         td = curthread;
1367         snprintf(td->td_name, sizeof(td->td_name),
1368             "%s: master", pool->sp_name);
1369         pool->sp_state = SVCPOOL_ACTIVE;
1370         pool->sp_proc = p;
1371
1372         /* Choose group count based on number of threads and CPUs. */
1373         pool->sp_groupcount = max(1, min(SVC_MAXGROUPS,
1374             min(pool->sp_maxthreads / 2, mp_ncpus) / 6));
1375         for (g = 0; g < pool->sp_groupcount; g++) {
1376                 grp = &pool->sp_groups[g];
1377                 grp->sg_minthreads = max(1,
1378                     pool->sp_minthreads / pool->sp_groupcount);
1379                 grp->sg_maxthreads = max(1,
1380                     pool->sp_maxthreads / pool->sp_groupcount);
1381                 grp->sg_lastcreatetime = time_uptime;
1382         }
1383
1384         /* Starting threads */
1385         pool->sp_groups[0].sg_threadcount++;
1386         for (g = 0; g < pool->sp_groupcount; g++) {
1387                 grp = &pool->sp_groups[g];
1388                 for (i = ((g == 0) ? 1 : 0); i < grp->sg_minthreads; i++)
1389                         svc_new_thread(grp);
1390         }
1391         svc_run_internal(&pool->sp_groups[0], TRUE);
1392
1393         /* Waiting for threads to stop. */
1394         for (g = 0; g < pool->sp_groupcount; g++) {
1395                 grp = &pool->sp_groups[g];
1396                 mtx_lock(&grp->sg_lock);
1397                 while (grp->sg_threadcount > 0)
1398                         msleep(grp, &grp->sg_lock, 0, "svcexit", 0);
1399                 mtx_unlock(&grp->sg_lock);
1400         }
1401 }
1402
1403 void
1404 svc_exit(SVCPOOL *pool)
1405 {
1406         SVCGROUP *grp;
1407         SVCTHREAD *st;
1408         int g;
1409
1410         pool->sp_state = SVCPOOL_CLOSING;
1411         for (g = 0; g < pool->sp_groupcount; g++) {
1412                 grp = &pool->sp_groups[g];
1413                 mtx_lock(&grp->sg_lock);
1414                 if (grp->sg_state != SVCPOOL_CLOSING) {
1415                         grp->sg_state = SVCPOOL_CLOSING;
1416                         LIST_FOREACH(st, &grp->sg_idlethreads, st_ilink)
1417                                 cv_signal(&st->st_cond);
1418                 }
1419                 mtx_unlock(&grp->sg_lock);
1420         }
1421 }
1422
1423 bool_t
1424 svc_getargs(struct svc_req *rqstp, xdrproc_t xargs, void *args)
1425 {
1426         struct mbuf *m;
1427         XDR xdrs;
1428         bool_t stat;
1429
1430         m = rqstp->rq_args;
1431         rqstp->rq_args = NULL;
1432
1433         xdrmbuf_create(&xdrs, m, XDR_DECODE);
1434         stat = xargs(&xdrs, args);
1435         XDR_DESTROY(&xdrs);
1436
1437         return (stat);
1438 }
1439
1440 bool_t
1441 svc_freeargs(struct svc_req *rqstp, xdrproc_t xargs, void *args)
1442 {
1443         XDR xdrs;
1444
1445         if (rqstp->rq_addr) {
1446                 free(rqstp->rq_addr, M_SONAME);
1447                 rqstp->rq_addr = NULL;
1448         }
1449
1450         xdrs.x_op = XDR_FREE;
1451         return (xargs(&xdrs, args));
1452 }
1453
1454 void
1455 svc_freereq(struct svc_req *rqstp)
1456 {
1457         SVCTHREAD *st;
1458         SVCPOOL *pool;
1459
1460         st = rqstp->rq_thread;
1461         if (st) {
1462                 pool = st->st_pool;
1463                 if (pool->sp_done)
1464                         pool->sp_done(st, rqstp);
1465         }
1466
1467         if (rqstp->rq_auth.svc_ah_ops)
1468                 SVCAUTH_RELEASE(&rqstp->rq_auth);
1469
1470         if (rqstp->rq_xprt) {
1471                 SVC_RELEASE(rqstp->rq_xprt);
1472         }
1473
1474         if (rqstp->rq_addr)
1475                 free(rqstp->rq_addr, M_SONAME);
1476
1477         if (rqstp->rq_args)
1478                 m_freem(rqstp->rq_args);
1479
1480         free(rqstp, M_RPC);
1481 }