2 * Copyright (C) 2004-2009, 2011-2014 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: dispatch.c,v 1.175 2011/11/29 01:03:47 marka Exp $ */
25 #include <sys/types.h>
29 #include <isc/entropy.h>
31 #include <isc/mutex.h>
32 #include <isc/portset.h>
33 #include <isc/print.h>
34 #include <isc/random.h>
35 #include <isc/socket.h>
36 #include <isc/stats.h>
37 #include <isc/string.h>
43 #include <dns/dispatch.h>
44 #include <dns/events.h>
46 #include <dns/message.h>
47 #include <dns/portlist.h>
48 #include <dns/stats.h>
49 #include <dns/tcpmsg.h>
50 #include <dns/types.h>
52 typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
54 typedef struct dispsocket dispsocket_t;
55 typedef ISC_LIST(dispsocket_t) dispsocketlist_t;
57 typedef struct dispportentry dispportentry_t;
58 typedef ISC_LIST(dispportentry_t) dispportlist_t;
60 /* ARC4 Random generator state */
61 typedef struct arc4ctx {
66 isc_entropy_t *entropy; /*%< entropy source for ARC4 */
70 typedef struct dns_qid {
72 unsigned int qid_nbuckets; /*%< hash table size */
73 unsigned int qid_increment; /*%< id increment on collision */
75 dns_displist_t *qid_table; /*%< the table itself */
76 dispsocketlist_t *sock_table; /*%< socket table */
79 struct dns_dispatchmgr {
84 dns_portlist_t *portlist;
86 isc_entropy_t *entropy; /*%< entropy source */
88 /* Locked by "lock". */
91 ISC_LIST(dns_dispatch_t) list;
93 /* Locked by arc4_lock. */
94 isc_mutex_t arc4_lock;
95 arc4ctx_t arc4ctx; /*%< ARC4 context for QID */
97 /* locked by buffer lock */
99 isc_mutex_t buffer_lock;
100 unsigned int buffers; /*%< allocated buffers */
101 unsigned int buffersize; /*%< size of each buffer */
102 unsigned int maxbuffers; /*%< max buffers */
104 /* Locked internally. */
105 isc_mutex_t depool_lock;
106 isc_mempool_t *depool; /*%< pool for dispatch events */
107 isc_mutex_t rpool_lock;
108 isc_mempool_t *rpool; /*%< pool for replies */
109 isc_mutex_t dpool_lock;
110 isc_mempool_t *dpool; /*%< dispatch allocations */
111 isc_mutex_t bpool_lock;
112 isc_mempool_t *bpool; /*%< pool for buffers */
113 isc_mutex_t spool_lock;
114 isc_mempool_t *spool; /*%< pool for dispsocks */
117 * Locked by qid->lock if qid exists; otherwise, can be used without
119 * Memory footprint considerations: this is a simple implementation of
120 * available ports, i.e., an ordered array of the actual port numbers.
121 * This will require about 256KB of memory in the worst case (128KB for
122 * each of IPv4 and IPv6). We could reduce it by representing it as a
123 * more sophisticated way such as a list (or array) of ranges that are
124 * searched to identify a specific port. Our decision here is the saved
125 * memory isn't worth the implementation complexity, considering the
126 * fact that the whole BIND9 process (which is mainly named) already
127 * requires a pretty large memory footprint. We may, however, have to
128 * revisit the decision when we want to use it as a separate module for
129 * an environment where memory requirement is severer.
131 in_port_t *v4ports; /*%< available ports for IPv4 */
132 unsigned int nv4ports; /*%< # of available ports for IPv4 */
133 in_port_t *v6ports; /*%< available ports for IPv4 */
134 unsigned int nv6ports; /*%< # of available ports for IPv4 */
137 #define MGR_SHUTTINGDOWN 0x00000001U
138 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
140 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
142 struct dns_dispentry {
144 dns_dispatch_t *disp;
150 isc_taskaction_t action;
152 isc_boolean_t item_out;
153 dispsocket_t *dispsocket;
154 ISC_LIST(dns_dispatchevent_t) items;
155 ISC_LINK(dns_dispentry_t) link;
159 * Maximum number of dispatch sockets that can be pooled for reuse. The
160 * appropriate value may vary, but experiments have shown a busy caching server
161 * may need more than 1000 sockets concurrently opened. The maximum allowable
162 * number of dispatch sockets (per manager) will be set to the double of this
165 #ifndef DNS_DISPATCH_POOLSOCKS
166 #define DNS_DISPATCH_POOLSOCKS 2048
170 * Quota to control the number of dispatch sockets. If a dispatch has more
171 * than the quota of sockets, new queries will purge oldest ones, so that
172 * a massive number of outstanding queries won't prevent subsequent queries
173 * (especially if the older ones take longer time and result in timeout).
175 #ifndef DNS_DISPATCH_SOCKSQUOTA
176 #define DNS_DISPATCH_SOCKSQUOTA 3072
181 isc_socket_t *socket;
182 dns_dispatch_t *disp;
184 in_port_t localport; /* XXX: should be removed later */
185 dispportentry_t *portentry;
186 dns_dispentry_t *resp;
188 ISC_LINK(dispsocket_t) link;
190 ISC_LINK(dispsocket_t) blink;
194 * A port table entry. We remember every port we first open in a table with a
195 * reference counter so that we can 'reuse' the same port (with different
196 * destination addresses) using the SO_REUSEADDR socket option.
198 struct dispportentry {
201 ISC_LINK(struct dispportentry) link;
204 #ifndef DNS_DISPATCH_PORTTABLESIZE
205 #define DNS_DISPATCH_PORTTABLESIZE 1024
208 #define INVALID_BUCKET (0xffffdead)
211 * Number of tasks for each dispatch that use separate sockets for different
212 * transactions. This must be a power of 2 as it will divide 32 bit numbers
213 * to get an uniformly random tasks selection. See get_dispsocket().
215 #define MAX_INTERNAL_TASKS 64
217 struct dns_dispatch {
219 unsigned int magic; /*%< magic */
220 dns_dispatchmgr_t *mgr; /*%< dispatch manager */
223 * internal task buckets. We use multiple tasks to distribute various
224 * socket events well when using separate dispatch sockets. We use the
225 * 1st task (task[0]) for internal control events.
227 isc_task_t *task[MAX_INTERNAL_TASKS];
228 isc_socket_t *socket; /*%< isc socket attached to */
229 isc_sockaddr_t local; /*%< local address */
230 in_port_t localport; /*%< local UDP port */
231 unsigned int maxrequests; /*%< max requests */
232 isc_event_t *ctlevent;
234 isc_mutex_t sepool_lock;
235 isc_mempool_t *sepool; /*%< pool for socket events */
237 /*% Locked by mgr->lock. */
238 ISC_LINK(dns_dispatch_t) link;
240 /* Locked by "lock". */
241 isc_mutex_t lock; /*%< locks all below */
242 isc_sockettype_t socktype;
243 unsigned int attributes;
244 unsigned int refcount; /*%< number of users */
245 dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */
246 unsigned int shutting_down : 1,
250 recv_pending : 1; /*%< is a recv() pending? */
251 isc_result_t shutdown_why;
252 ISC_LIST(dispsocket_t) activesockets;
253 ISC_LIST(dispsocket_t) inactivesockets;
254 unsigned int nsockets;
255 unsigned int requests; /*%< how many requests we have */
256 unsigned int tcpbuffers; /*%< allocated buffers */
257 dns_tcpmsg_t tcpmsg; /*%< for tcp streams */
259 arc4ctx_t arc4ctx; /*%< for QID/UDP port num */
260 dispportlist_t *port_table; /*%< hold ports 'owned' by us */
261 isc_mempool_t *portpool; /*%< port table entries */
264 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
265 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
267 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
268 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
270 #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
271 #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
273 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
274 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
276 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
277 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
279 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
280 (disp)->qid : (disp)->mgr->qid
281 #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
282 (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
285 * Locking a query port buffer is a bit tricky. We access the buffer without
286 * locking until qid is created. Technically, there is a possibility of race
287 * between the creation of qid and access to the port buffer; in practice,
288 * however, this should be safe because qid isn't created until the first
289 * dispatch is created and there should be no contending situation until then.
291 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
292 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
297 static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
298 dns_messageid_t, in_port_t, unsigned int);
299 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
300 static void destroy_disp(isc_task_t *task, isc_event_t *event);
301 static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
302 static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
303 static void udp_exrecv(isc_task_t *, isc_event_t *);
304 static void udp_shrecv(isc_task_t *, isc_event_t *);
305 static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
306 static void tcp_recv(isc_task_t *, isc_event_t *);
307 static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
308 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
310 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
311 static void *allocate_udp_buffer(dns_dispatch_t *disp);
312 static inline void free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
313 static inline dns_dispatchevent_t *allocate_devent(dns_dispatch_t *disp);
314 static void do_cancel(dns_dispatch_t *disp);
315 static dns_dispentry_t *linear_first(dns_qid_t *disp);
316 static dns_dispentry_t *linear_next(dns_qid_t *disp,
317 dns_dispentry_t *resp);
318 static void dispatch_free(dns_dispatch_t **dispp);
319 static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
320 dns_dispatch_t *disp,
321 isc_socketmgr_t *sockmgr,
322 isc_sockaddr_t *localaddr,
323 isc_socket_t **sockp,
324 isc_socket_t *dup_socket);
325 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
326 isc_socketmgr_t *sockmgr,
327 isc_taskmgr_t *taskmgr,
328 isc_sockaddr_t *localaddr,
329 unsigned int maxrequests,
330 unsigned int attributes,
331 dns_dispatch_t **dispp,
332 isc_socket_t *dup_socket);
333 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
334 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
335 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
336 unsigned int increment, dns_qid_t **qidp,
337 isc_boolean_t needaddrtable);
338 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
339 static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
340 unsigned int options, isc_socket_t **sockp,
341 isc_socket_t *dup_socket);
342 static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
343 isc_sockaddr_t *sockaddrp);
345 #define LVL(x) ISC_LOG_DEBUG(x)
348 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
349 ISC_FORMAT_PRINTF(3, 4);
352 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
356 if (! isc_log_wouldlog(dns_lctx, level))
360 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
363 isc_log_write(dns_lctx,
364 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
365 level, "dispatchmgr %p: %s", mgr, msgbuf);
369 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
370 if (mgr->stats != NULL)
371 isc_stats_increment(mgr->stats, counter);
375 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
376 ISC_FORMAT_PRINTF(3, 4);
379 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
383 if (! isc_log_wouldlog(dns_lctx, level))
387 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
390 isc_log_write(dns_lctx,
391 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
392 level, "dispatch %p: %s", disp, msgbuf);
396 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
397 int level, const char *fmt, ...)
398 ISC_FORMAT_PRINTF(4, 5);
401 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
402 int level, const char *fmt, ...)
408 if (! isc_log_wouldlog(dns_lctx, level))
412 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
415 if (VALID_RESPONSE(resp)) {
416 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
417 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
418 DNS_LOGMODULE_DISPATCH, level,
419 "dispatch %p response %p %s: %s", disp, resp,
422 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
423 DNS_LOGMODULE_DISPATCH, level,
424 "dispatch %p req/resp %p: %s", disp, resp,
430 * ARC4 random number generator derived from OpenBSD.
431 * Only dispatch_random() and dispatch_uniformrandom() are expected
432 * to be called from general dispatch routines; the rest of them are subroutines
435 * The original copyright follows:
436 * Copyright (c) 1996, David Mazieres <dm@uun.org>
437 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
439 * Permission to use, copy, modify, and distribute this software for any
440 * purpose with or without fee is hereby granted, provided that the above
441 * copyright notice and this permission notice appear in all copies.
443 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
444 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
445 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
446 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
447 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
448 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
449 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
453 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
457 for (n = 0; n < 256; n++)
462 actx->entropy = entropy; /* don't have to attach */
467 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
472 for (n = 0; n < 256; n++) {
473 actx->i = (actx->i + 1);
474 si = actx->s[actx->i];
475 actx->j = (actx->j + si + dat[n % datlen]);
476 actx->s[actx->i] = actx->s[actx->j];
477 actx->s[actx->j] = si;
482 static inline isc_uint8_t
483 dispatch_arc4get8(arc4ctx_t *actx) {
486 actx->i = (actx->i + 1);
487 si = actx->s[actx->i];
488 actx->j = (actx->j + si);
489 sj = actx->s[actx->j];
490 actx->s[actx->i] = sj;
491 actx->s[actx->j] = si;
493 return (actx->s[(si + sj) & 0xff]);
496 static inline isc_uint16_t
497 dispatch_arc4get16(arc4ctx_t *actx) {
500 val = dispatch_arc4get8(actx) << 8;
501 val |= dispatch_arc4get8(actx);
507 dispatch_arc4stir(arc4ctx_t *actx) {
510 unsigned char rnd[128];
511 isc_uint32_t rnd32[32];
515 if (actx->entropy != NULL) {
517 * We accept any quality of random data to avoid blocking.
519 result = isc_entropy_getdata(actx->entropy, rnd.rnd,
520 sizeof(rnd), NULL, 0);
521 RUNTIME_CHECK(result == ISC_R_SUCCESS);
523 for (i = 0; i < 32; i++)
524 isc_random_get(&rnd.rnd32[i]);
526 dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
529 * Discard early keystream, as per recommendations in:
530 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
532 for (i = 0; i < 256; i++)
533 (void)dispatch_arc4get8(actx);
536 * Derived from OpenBSD's implementation. The rationale is not clear,
537 * but should be conservative enough in safety, and reasonably large
540 actx->count = 1600000;
544 dispatch_random(arc4ctx_t *actx) {
547 if (actx->lock != NULL)
550 actx->count -= sizeof(isc_uint16_t);
551 if (actx->count <= 0)
552 dispatch_arc4stir(actx);
553 result = dispatch_arc4get16(actx);
555 if (actx->lock != NULL)
562 * For general purpose library, we don't have to be too strict about the
563 * quality of random values. Performance doesn't matter much, either.
564 * So we simply use the isc_random module to keep the library as small as
569 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
580 dispatch_random(arc4ctx_t *actx) {
591 dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
598 * Ensure the range of random numbers [min, 0xffff] be a multiple of
599 * upper_bound and contain at least a half of the 16 bit range.
602 if (upper_bound > 0x8000)
603 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
605 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
608 * This could theoretically loop forever but each retry has
609 * p > 0.5 (worst case, usually far better) of selecting a
610 * number inside the range we need, so it should rarely need
614 r = dispatch_random(actx);
619 return (r % upper_bound);
623 * Return a hash of the destination and message id.
626 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
631 ret = isc_sockaddr_hash(dest, ISC_TRUE);
632 ret ^= (id << 16) | port;
633 ret %= qid->qid_nbuckets;
635 INSIST(ret < qid->qid_nbuckets);
641 * Find the first entry in 'qid'. Returns NULL if there are no entries.
643 static dns_dispentry_t *
644 linear_first(dns_qid_t *qid) {
645 dns_dispentry_t *ret;
650 while (bucket < qid->qid_nbuckets) {
651 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
661 * Find the next entry after 'resp' in 'qid'. Return NULL if there are
664 static dns_dispentry_t *
665 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
666 dns_dispentry_t *ret;
669 ret = ISC_LIST_NEXT(resp, link);
673 bucket = resp->bucket;
675 while (bucket < qid->qid_nbuckets) {
676 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
686 * The dispatch must be locked.
689 destroy_disp_ok(dns_dispatch_t *disp)
691 if (disp->refcount != 0)
694 if (disp->recv_pending != 0)
697 if (!ISC_LIST_EMPTY(disp->activesockets))
700 if (disp->shutting_down == 0)
707 * Called when refcount reaches 0 (and safe to destroy).
709 * The dispatcher must be locked.
710 * The manager must not be locked.
713 destroy_disp(isc_task_t *task, isc_event_t *event) {
714 dns_dispatch_t *disp;
715 dns_dispatchmgr_t *mgr;
716 isc_boolean_t killmgr;
717 dispsocket_t *dispsocket;
720 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
724 disp = event->ev_arg;
728 ISC_LIST_UNLINK(mgr->list, disp, link);
730 dispatch_log(disp, LVL(90),
731 "shutting down; detaching from sock %p, task %p",
732 disp->socket, disp->task[0]); /* XXXX */
734 if (disp->sepool != NULL) {
735 isc_mempool_destroy(&disp->sepool);
736 (void)isc_mutex_destroy(&disp->sepool_lock);
739 if (disp->socket != NULL)
740 isc_socket_detach(&disp->socket);
741 while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
742 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
743 destroy_dispsocket(disp, &dispsocket);
745 for (i = 0; i < disp->ntasks; i++)
746 isc_task_detach(&disp->task[i]);
747 isc_event_free(&event);
749 dispatch_free(&disp);
751 killmgr = destroy_mgr_ok(mgr);
758 * Manipulate port table per dispatch: find an entry for a given port number,
759 * create a new entry, and decrement a given entry with possible clean-up.
761 static dispportentry_t *
762 port_search(dns_dispatch_t *disp, in_port_t port) {
763 dispportentry_t *portentry;
765 REQUIRE(disp->port_table != NULL);
767 portentry = ISC_LIST_HEAD(disp->port_table[port %
768 DNS_DISPATCH_PORTTABLESIZE]);
769 while (portentry != NULL) {
770 if (portentry->port == port)
772 portentry = ISC_LIST_NEXT(portentry, link);
778 static dispportentry_t *
779 new_portentry(dns_dispatch_t *disp, in_port_t port) {
780 dispportentry_t *portentry;
783 REQUIRE(disp->port_table != NULL);
785 portentry = isc_mempool_get(disp->portpool);
786 if (portentry == NULL)
789 portentry->port = port;
791 ISC_LINK_INIT(portentry, link);
794 ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
802 * The caller must not hold the qid->lock.
805 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
806 dispportentry_t *portentry = *portentryp;
809 REQUIRE(disp->port_table != NULL);
810 REQUIRE(portentry != NULL && portentry->refs > 0);
816 if (portentry->refs == 0) {
817 ISC_LIST_UNLINK(disp->port_table[portentry->port %
818 DNS_DISPATCH_PORTTABLESIZE],
820 isc_mempool_put(disp->portpool, portentry);
828 * Find a dispsocket for socket address 'dest', and port number 'port'.
829 * Return NULL if no such entry exists.
831 static dispsocket_t *
832 socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
835 dispsocket_t *dispsock;
837 REQUIRE(VALID_QID(qid));
838 REQUIRE(bucket < qid->qid_nbuckets);
840 dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
842 while (dispsock != NULL) {
843 if (dispsock->portentry != NULL &&
844 dispsock->portentry->port == port &&
845 isc_sockaddr_equal(dest, &dispsock->host))
847 dispsock = ISC_LIST_NEXT(dispsock, blink);
854 * Make a new socket for a single dispatch with a random port number.
855 * The caller must hold the disp->lock
858 get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
859 isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp,
864 dns_dispatchmgr_t *mgr = disp->mgr;
865 isc_socket_t *sock = NULL;
866 isc_result_t result = ISC_R_FAILURE;
868 isc_sockaddr_t localaddr;
869 unsigned int bucket = 0;
870 dispsocket_t *dispsock;
873 unsigned int bindoptions;
874 dispportentry_t *portentry = NULL;
877 if (isc_sockaddr_pf(&disp->local) == AF_INET) {
878 nports = disp->mgr->nv4ports;
879 ports = disp->mgr->v4ports;
881 nports = disp->mgr->nv6ports;
882 ports = disp->mgr->v6ports;
885 return (ISC_R_ADDRNOTAVAIL);
887 dispsock = ISC_LIST_HEAD(disp->inactivesockets);
888 if (dispsock != NULL) {
889 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
890 sock = dispsock->socket;
891 dispsock->socket = NULL;
893 dispsock = isc_mempool_get(mgr->spool);
894 if (dispsock == NULL)
895 return (ISC_R_NOMEMORY);
898 dispsock->socket = NULL;
899 dispsock->disp = disp;
900 dispsock->resp = NULL;
901 dispsock->portentry = NULL;
903 dispsock->task = NULL;
904 isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
905 ISC_LINK_INIT(dispsock, link);
906 ISC_LINK_INIT(dispsock, blink);
907 dispsock->magic = DISPSOCK_MAGIC;
911 * Pick up a random UDP port and open a new socket with it. Avoid
912 * choosing ports that share the same destination because it will be
913 * very likely to fail in bind(2) or connect(2).
915 localaddr = disp->local;
918 for (i = 0; i < 64; i++) {
919 port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
921 isc_sockaddr_setport(&localaddr, port);
924 bucket = dns_hash(qid, dest, 0, port);
925 if (socket_search(qid, dest, port, bucket) != NULL) {
931 portentry = port_search(disp, port);
933 if (portentry != NULL)
934 bindoptions |= ISC_SOCKET_REUSEADDRESS;
935 result = open_socket(sockmgr, &localaddr, bindoptions, &sock,
937 if (result == ISC_R_SUCCESS) {
938 if (portentry == NULL) {
939 portentry = new_portentry(disp, port);
940 if (portentry == NULL) {
941 result = ISC_R_NOMEMORY;
950 } else if (result == ISC_R_NOPERM) {
951 char buf[ISC_SOCKADDR_FORMATSIZE];
952 isc_sockaddr_format(&localaddr, buf, sizeof(buf));
953 dispatch_log(disp, ISC_LOG_WARNING,
954 "open_socket(%s) -> %s: continuing",
955 buf, isc_result_totext(result));
956 } else if (result != ISC_R_ADDRINUSE)
960 if (result == ISC_R_SUCCESS) {
961 dispsock->socket = sock;
962 dispsock->host = *dest;
963 dispsock->portentry = portentry;
964 dispsock->bucket = bucket;
966 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
968 *dispsockp = dispsock;
972 * We could keep it in the inactive list, but since this should
973 * be an exceptional case and might be resource shortage, we'd
977 isc_socket_detach(&sock);
978 destroy_dispsocket(disp, &dispsock);
985 * Destroy a dedicated dispatch socket.
988 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
989 dispsocket_t *dispsock;
993 * The dispatch must be locked.
996 REQUIRE(dispsockp != NULL && *dispsockp != NULL);
997 dispsock = *dispsockp;
998 REQUIRE(!ISC_LINK_LINKED(dispsock, link));
1001 dispsock->magic = 0;
1002 if (dispsock->portentry != NULL)
1003 deref_portentry(disp, &dispsock->portentry);
1004 if (dispsock->socket != NULL)
1005 isc_socket_detach(&dispsock->socket);
1006 if (ISC_LINK_LINKED(dispsock, blink)) {
1007 qid = DNS_QID(disp);
1009 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1013 if (dispsock->task != NULL)
1014 isc_task_detach(&dispsock->task);
1015 isc_mempool_put(disp->mgr->spool, dispsock);
1021 * Deactivate a dedicated dispatch socket. Move it to the inactive list for
1022 * future reuse unless the total number of sockets are exceeding the maximum.
1025 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1026 isc_result_t result;
1030 * The dispatch must be locked.
1032 ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
1033 if (dispsock->resp != NULL) {
1034 INSIST(dispsock->resp->dispsocket == dispsock);
1035 dispsock->resp->dispsocket = NULL;
1038 INSIST(dispsock->portentry != NULL);
1039 deref_portentry(disp, &dispsock->portentry);
1042 if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
1043 destroy_dispsocket(disp, &dispsock);
1045 result = isc_socket_close(dispsock->socket);
1047 qid = DNS_QID(disp);
1049 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1053 if (result == ISC_R_SUCCESS)
1054 ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
1057 * If the underlying system does not allow this
1058 * optimization, destroy this temporary structure (and
1059 * create a new one for a new transaction).
1061 INSIST(result == ISC_R_NOTIMPLEMENTED);
1062 destroy_dispsocket(disp, &dispsock);
1066 /* This kind of optimization isn't necessary for normal use */
1070 destroy_dispsocket(disp, &dispsock);
1075 * Find an entry for query ID 'id', socket address 'dest', and port number
1077 * Return NULL if no such entry exists.
1079 static dns_dispentry_t *
1080 entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
1081 in_port_t port, unsigned int bucket)
1083 dns_dispentry_t *res;
1085 REQUIRE(VALID_QID(qid));
1086 REQUIRE(bucket < qid->qid_nbuckets);
1088 res = ISC_LIST_HEAD(qid->qid_table[bucket]);
1090 while (res != NULL) {
1091 if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1092 res->port == port) {
1095 res = ISC_LIST_NEXT(res, link);
1102 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1103 isc_mempool_t *bpool;
1104 INSIST(buf != NULL && len != 0);
1107 switch (disp->socktype) {
1108 case isc_sockettype_tcp:
1109 INSIST(disp->tcpbuffers > 0);
1111 isc_mem_put(disp->mgr->mctx, buf, len);
1113 case isc_sockettype_udp:
1114 LOCK(&disp->mgr->buffer_lock);
1115 INSIST(disp->mgr->buffers > 0);
1116 INSIST(len == disp->mgr->buffersize);
1117 disp->mgr->buffers--;
1118 bpool = disp->mgr->bpool;
1119 UNLOCK(&disp->mgr->buffer_lock);
1120 isc_mempool_put(bpool, buf);
1129 allocate_udp_buffer(dns_dispatch_t *disp) {
1130 isc_mempool_t *bpool;
1133 LOCK(&disp->mgr->buffer_lock);
1134 bpool = disp->mgr->bpool;
1135 disp->mgr->buffers++;
1136 UNLOCK(&disp->mgr->buffer_lock);
1138 temp = isc_mempool_get(bpool);
1141 LOCK(&disp->mgr->buffer_lock);
1142 disp->mgr->buffers--;
1143 UNLOCK(&disp->mgr->buffer_lock);
1150 free_sevent(isc_event_t *ev) {
1151 isc_mempool_t *pool = ev->ev_destroy_arg;
1152 isc_socketevent_t *sev = (isc_socketevent_t *) ev;
1153 isc_mempool_put(pool, sev);
1156 static inline isc_socketevent_t *
1157 allocate_sevent(dns_dispatch_t *disp, isc_socket_t *socket,
1158 isc_eventtype_t type, isc_taskaction_t action, const void *arg)
1160 isc_socketevent_t *ev;
1163 ev = isc_mempool_get(disp->sepool);
1166 DE_CONST(arg, deconst_arg);
1167 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type,
1168 action, deconst_arg, socket,
1169 free_sevent, disp->sepool);
1170 ev->result = ISC_R_UNSET;
1171 ISC_LINK_INIT(ev, ev_link);
1172 ISC_LIST_INIT(ev->bufferlist);
1173 ev->region.base = NULL;
1183 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1184 if (disp->failsafe_ev == ev) {
1185 INSIST(disp->shutdown_out == 1);
1186 disp->shutdown_out = 0;
1191 isc_mempool_put(disp->mgr->depool, ev);
1194 static inline dns_dispatchevent_t *
1195 allocate_devent(dns_dispatch_t *disp) {
1196 dns_dispatchevent_t *ev;
1198 ev = isc_mempool_get(disp->mgr->depool);
1201 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1202 NULL, NULL, NULL, NULL, NULL);
1208 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1209 dispsocket_t *dispsock = ev->ev_arg;
1213 REQUIRE(VALID_DISPSOCK(dispsock));
1214 udp_recv(ev, dispsock->disp, dispsock);
1218 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1219 dns_dispatch_t *disp = ev->ev_arg;
1223 REQUIRE(VALID_DISPATCH(disp));
1224 udp_recv(ev, disp, NULL);
1230 * If I/O result == CANCELED or error, free the buffer.
1232 * If query, free the buffer, restart.
1235 * Allocate event, fill in details.
1236 * If cannot allocate, free buffer, restart.
1237 * find target. If not found, free buffer, restart.
1238 * if event queue is not empty, queue. else, send.
1242 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1243 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1246 isc_buffer_t source;
1248 dns_dispentry_t *resp = NULL;
1249 dns_dispatchevent_t *rev;
1250 unsigned int bucket;
1251 isc_boolean_t killit;
1252 isc_boolean_t queue_response;
1253 dns_dispatchmgr_t *mgr;
1255 isc_netaddr_t netaddr;
1258 isc_boolean_t qidlocked = ISC_FALSE;
1265 dispatch_log(disp, LVL(90),
1266 "got packet: requests %d, buffers %d, recvs %d",
1267 disp->requests, disp->mgr->buffers, disp->recv_pending);
1269 if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1271 * Unless the receive event was imported from a listening
1272 * interface, in which case the event type is
1273 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1275 INSIST(disp->recv_pending != 0);
1276 disp->recv_pending = 0;
1279 if (dispsock != NULL &&
1280 (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1282 * dispsock->resp can be NULL if this transaction was canceled
1283 * just after receiving a response. Since this socket is
1284 * exclusively used and there should be at most one receive
1285 * event the canceled event should have been no effect. So
1286 * we can (and should) deactivate the socket right now.
1288 deactivate_dispsocket(disp, dispsock);
1292 if (disp->shutting_down) {
1294 * This dispatcher is shutting down.
1296 free_buffer(disp, ev->region.base, ev->region.length);
1298 isc_event_free(&ev_in);
1301 killit = destroy_disp_ok(disp);
1302 UNLOCK(&disp->lock);
1304 isc_task_send(disp->task[0], &disp->ctlevent);
1309 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1310 if (dispsock != NULL) {
1311 resp = dispsock->resp;
1313 if (ev->result != ISC_R_SUCCESS) {
1315 * This is most likely a network error on a
1316 * connected socket. It makes no sense to
1317 * check the address or parse the packet, but it
1318 * will help to return the error to the caller.
1323 free_buffer(disp, ev->region.base, ev->region.length);
1325 UNLOCK(&disp->lock);
1326 isc_event_free(&ev_in);
1329 } else if (ev->result != ISC_R_SUCCESS) {
1330 free_buffer(disp, ev->region.base, ev->region.length);
1332 if (ev->result != ISC_R_CANCELED)
1333 dispatch_log(disp, ISC_LOG_ERROR,
1334 "odd socket result in udp_recv(): %s",
1335 isc_result_totext(ev->result));
1337 UNLOCK(&disp->lock);
1338 isc_event_free(&ev_in);
1343 * If this is from a blackholed address, drop it.
1345 isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1346 if (disp->mgr->blackhole != NULL &&
1347 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1348 NULL, &match, NULL) == ISC_R_SUCCESS &&
1351 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1352 char netaddrstr[ISC_NETADDR_FORMATSIZE];
1353 isc_netaddr_format(&netaddr, netaddrstr,
1354 sizeof(netaddrstr));
1355 dispatch_log(disp, LVL(10),
1356 "blackholed packet from %s",
1359 free_buffer(disp, ev->region.base, ev->region.length);
1364 * Peek into the buffer to see what we can see.
1366 isc_buffer_init(&source, ev->region.base, ev->region.length);
1367 isc_buffer_add(&source, ev->n);
1368 dres = dns_message_peekheader(&source, &id, &flags);
1369 if (dres != ISC_R_SUCCESS) {
1370 free_buffer(disp, ev->region.base, ev->region.length);
1371 dispatch_log(disp, LVL(10), "got garbage packet");
1375 dispatch_log(disp, LVL(92),
1376 "got valid DNS message header, /QR %c, id %u",
1377 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1380 * Look at flags. If query, drop it. If response,
1381 * look to see where it goes.
1383 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1385 free_buffer(disp, ev->region.base, ev->region.length);
1390 * Search for the corresponding response. If we are using an exclusive
1391 * socket, we've already identified it and we can skip the search; but
1392 * the ID and the address must match the expected ones.
1395 bucket = dns_hash(qid, &ev->address, id, disp->localport);
1397 qidlocked = ISC_TRUE;
1398 resp = entry_search(qid, &ev->address, id, disp->localport,
1400 dispatch_log(disp, LVL(90),
1401 "search for response in bucket %d: %s",
1402 bucket, (resp == NULL ? "not found" : "found"));
1405 inc_stats(mgr, dns_resstatscounter_mismatch);
1406 free_buffer(disp, ev->region.base, ev->region.length);
1409 } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1411 dispatch_log(disp, LVL(90),
1412 "response to an exclusive socket doesn't match");
1413 inc_stats(mgr, dns_resstatscounter_mismatch);
1414 free_buffer(disp, ev->region.base, ev->region.length);
1419 * Now that we have the original dispatch the query was sent
1420 * from check that the address and port the response was
1421 * sent to make sense.
1423 if (disp != resp->disp) {
1428 * Check that the socket types and ports match.
1430 if (disp->socktype != resp->disp->socktype ||
1431 isc_sockaddr_getport(&disp->local) !=
1432 isc_sockaddr_getport(&resp->disp->local)) {
1433 free_buffer(disp, ev->region.base, ev->region.length);
1438 * If each dispatch is bound to a different address
1441 * Note under Linux a packet can be sent out via IPv4 socket
1442 * and the response be received via a IPv6 socket.
1444 * Requests sent out via IPv6 should always come back in
1447 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1448 isc_sockaddr_pf(&disp->local) != PF_INET6) {
1449 free_buffer(disp, ev->region.base, ev->region.length);
1452 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1453 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1454 if (!isc_sockaddr_eqaddr(&disp->local, &resp->disp->local) &&
1455 !isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1456 !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1457 free_buffer(disp, ev->region.base, ev->region.length);
1463 queue_response = resp->item_out;
1464 rev = allocate_devent(resp->disp);
1466 free_buffer(disp, ev->region.base, ev->region.length);
1471 * At this point, rev contains the event we want to fill in, and
1472 * resp contains the information on the place to send it to.
1473 * Send the event off.
1475 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1476 isc_buffer_add(&rev->buffer, ev->n);
1477 rev->result = ev->result;
1479 rev->addr = ev->address;
1480 rev->pktinfo = ev->pktinfo;
1481 rev->attributes = ev->attributes;
1482 if (queue_response) {
1483 ISC_LIST_APPEND(resp->items, rev, ev_link);
1485 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1487 resp->action, resp->arg, resp, NULL, NULL);
1488 request_log(disp, resp, LVL(90),
1489 "[a] Sent event %p buffer %p len %d to task %p",
1490 rev, rev->buffer.base, rev->buffer.length,
1492 resp->item_out = ISC_TRUE;
1493 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1500 * Restart recv() to get the next packet.
1503 result = startrecv(disp, dispsock);
1504 if (result != ISC_R_SUCCESS && dispsock != NULL) {
1506 * XXX: wired. There seems to be no recovery process other than
1507 * deactivate this socket anyway (since we cannot start
1508 * receiving, we won't be able to receive a cancel event
1511 deactivate_dispsocket(disp, dispsock);
1513 UNLOCK(&disp->lock);
1515 isc_event_free(&ev_in);
1521 * If I/O result == CANCELED, EOF, or error, notify everyone as the
1522 * various queues drain.
1524 * If query, restart.
1527 * Allocate event, fill in details.
1528 * If cannot allocate, restart.
1529 * find target. If not found, restart.
1530 * if event queue is not empty, queue. else, send.
1534 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1535 dns_dispatch_t *disp = ev_in->ev_arg;
1536 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1540 dns_dispentry_t *resp;
1541 dns_dispatchevent_t *rev;
1542 unsigned int bucket;
1543 isc_boolean_t killit;
1544 isc_boolean_t queue_response;
1547 char buf[ISC_SOCKADDR_FORMATSIZE];
1551 REQUIRE(VALID_DISPATCH(disp));
1555 dispatch_log(disp, LVL(90),
1556 "got TCP packet: requests %d, buffers %d, recvs %d",
1557 disp->requests, disp->tcpbuffers, disp->recv_pending);
1561 INSIST(disp->recv_pending != 0);
1562 disp->recv_pending = 0;
1564 if (disp->refcount == 0) {
1566 * This dispatcher is shutting down. Force cancelation.
1568 tcpmsg->result = ISC_R_CANCELED;
1571 if (tcpmsg->result != ISC_R_SUCCESS) {
1572 switch (tcpmsg->result) {
1573 case ISC_R_CANCELED:
1577 dispatch_log(disp, LVL(90), "shutting down on EOF");
1581 case ISC_R_CONNECTIONRESET:
1582 level = ISC_LOG_INFO;
1586 level = ISC_LOG_ERROR;
1588 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1589 dispatch_log(disp, level, "shutting down due to TCP "
1590 "receive error: %s: %s", buf,
1591 isc_result_totext(tcpmsg->result));
1597 * The event is statically allocated in the tcpmsg
1598 * structure, and destroy_disp() frees the tcpmsg, so we must
1599 * free the event *before* calling destroy_disp().
1601 isc_event_free(&ev_in);
1603 disp->shutting_down = 1;
1604 disp->shutdown_why = tcpmsg->result;
1607 * If the recv() was canceled pass the word on.
1609 killit = destroy_disp_ok(disp);
1610 UNLOCK(&disp->lock);
1612 isc_task_send(disp->task[0], &disp->ctlevent);
1616 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1618 tcpmsg->buffer.length, tcpmsg->buffer.base);
1621 * Peek into the buffer to see what we can see.
1623 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1624 if (dres != ISC_R_SUCCESS) {
1625 dispatch_log(disp, LVL(10), "got garbage packet");
1629 dispatch_log(disp, LVL(92),
1630 "got valid DNS message header, /QR %c, id %u",
1631 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1634 * Allocate an event to send to the query or response client, and
1635 * allocate a new buffer for our use.
1639 * Look at flags. If query, drop it. If response,
1640 * look to see where it goes.
1642 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1652 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1654 resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1655 dispatch_log(disp, LVL(90),
1656 "search for response in bucket %d: %s",
1657 bucket, (resp == NULL ? "not found" : "found"));
1661 queue_response = resp->item_out;
1662 rev = allocate_devent(disp);
1667 * At this point, rev contains the event we want to fill in, and
1668 * resp contains the information on the place to send it to.
1669 * Send the event off.
1671 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1673 rev->result = ISC_R_SUCCESS;
1675 rev->addr = tcpmsg->address;
1676 if (queue_response) {
1677 ISC_LIST_APPEND(resp->items, rev, ev_link);
1679 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1680 resp->action, resp->arg, resp, NULL, NULL);
1681 request_log(disp, resp, LVL(90),
1682 "[b] Sent event %p buffer %p len %d to task %p",
1683 rev, rev->buffer.base, rev->buffer.length,
1685 resp->item_out = ISC_TRUE;
1686 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1692 * Restart recv() to get the next packet.
1695 (void)startrecv(disp, NULL);
1697 UNLOCK(&disp->lock);
1699 isc_event_free(&ev_in);
1703 * disp must be locked.
1706 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1708 isc_region_t region;
1709 isc_socket_t *socket;
1711 if (disp->shutting_down == 1)
1712 return (ISC_R_SUCCESS);
1714 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1715 return (ISC_R_SUCCESS);
1717 if (disp->recv_pending != 0 && dispsock == NULL)
1718 return (ISC_R_SUCCESS);
1720 if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1721 return (ISC_R_NOMEMORY);
1723 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1725 return (ISC_R_SUCCESS);
1727 if (dispsock != NULL)
1728 socket = dispsock->socket;
1730 socket = disp->socket;
1731 INSIST(socket != NULL);
1733 switch (disp->socktype) {
1735 * UDP reads are always maximal.
1737 case isc_sockettype_udp:
1738 region.length = disp->mgr->buffersize;
1739 region.base = allocate_udp_buffer(disp);
1740 if (region.base == NULL)
1741 return (ISC_R_NOMEMORY);
1742 if (dispsock != NULL) {
1743 isc_task_t *dt = dispsock->task;
1744 isc_socketevent_t *sev =
1745 allocate_sevent(disp, socket,
1746 ISC_SOCKEVENT_RECVDONE,
1747 udp_exrecv, dispsock);
1749 free_buffer(disp, region.base, region.length);
1750 return (ISC_R_NOMEMORY);
1753 res = isc_socket_recv2(socket, ®ion, 1, dt, sev, 0);
1754 if (res != ISC_R_SUCCESS) {
1755 free_buffer(disp, region.base, region.length);
1759 isc_task_t *dt = disp->task[0];
1760 isc_socketevent_t *sev =
1761 allocate_sevent(disp, socket,
1762 ISC_SOCKEVENT_RECVDONE,
1765 free_buffer(disp, region.base, region.length);
1766 return (ISC_R_NOMEMORY);
1769 res = isc_socket_recv2(socket, ®ion, 1, dt, sev, 0);
1770 if (res != ISC_R_SUCCESS) {
1771 free_buffer(disp, region.base, region.length);
1772 disp->shutdown_why = res;
1773 disp->shutting_down = 1;
1775 return (ISC_R_SUCCESS); /* recover by cancel */
1777 INSIST(disp->recv_pending == 0);
1778 disp->recv_pending = 1;
1782 case isc_sockettype_tcp:
1783 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1785 if (res != ISC_R_SUCCESS) {
1786 disp->shutdown_why = res;
1787 disp->shutting_down = 1;
1789 return (ISC_R_SUCCESS); /* recover by cancel */
1791 INSIST(disp->recv_pending == 0);
1792 disp->recv_pending = 1;
1799 return (ISC_R_SUCCESS);
1803 * Mgr must be locked when calling this function.
1805 static isc_boolean_t
1806 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1807 mgr_log(mgr, LVL(90),
1808 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1809 "depool=%d, rpool=%d, dpool=%d",
1810 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1811 isc_mempool_getallocated(mgr->depool),
1812 isc_mempool_getallocated(mgr->rpool),
1813 isc_mempool_getallocated(mgr->dpool));
1814 if (!MGR_IS_SHUTTINGDOWN(mgr))
1816 if (!ISC_LIST_EMPTY(mgr->list))
1818 if (isc_mempool_getallocated(mgr->depool) != 0)
1820 if (isc_mempool_getallocated(mgr->rpool) != 0)
1822 if (isc_mempool_getallocated(mgr->dpool) != 0)
1829 * Mgr must be unlocked when calling this function.
1832 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1834 dns_dispatchmgr_t *mgr;
1843 DESTROYLOCK(&mgr->lock);
1846 DESTROYLOCK(&mgr->arc4_lock);
1848 isc_mempool_destroy(&mgr->depool);
1849 isc_mempool_destroy(&mgr->rpool);
1850 isc_mempool_destroy(&mgr->dpool);
1851 if (mgr->bpool != NULL)
1852 isc_mempool_destroy(&mgr->bpool);
1853 if (mgr->spool != NULL)
1854 isc_mempool_destroy(&mgr->spool);
1856 DESTROYLOCK(&mgr->spool_lock);
1857 DESTROYLOCK(&mgr->bpool_lock);
1858 DESTROYLOCK(&mgr->dpool_lock);
1859 DESTROYLOCK(&mgr->rpool_lock);
1860 DESTROYLOCK(&mgr->depool_lock);
1863 if (mgr->entropy != NULL)
1864 isc_entropy_detach(&mgr->entropy);
1866 if (mgr->qid != NULL)
1867 qid_destroy(mctx, &mgr->qid);
1869 DESTROYLOCK(&mgr->buffer_lock);
1871 if (mgr->blackhole != NULL)
1872 dns_acl_detach(&mgr->blackhole);
1874 if (mgr->stats != NULL)
1875 isc_stats_detach(&mgr->stats);
1877 if (mgr->v4ports != NULL) {
1878 isc_mem_put(mctx, mgr->v4ports,
1879 mgr->nv4ports * sizeof(in_port_t));
1881 if (mgr->v6ports != NULL) {
1882 isc_mem_put(mctx, mgr->v6ports,
1883 mgr->nv6ports * sizeof(in_port_t));
1885 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1886 isc_mem_detach(&mctx);
1890 open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1891 unsigned int options, isc_socket_t **sockp,
1892 isc_socket_t *dup_socket)
1895 isc_result_t result;
1900 result = isc_socket_open(sock);
1901 if (result != ISC_R_SUCCESS)
1906 } else if (dup_socket != NULL) {
1907 result = isc_socket_dup(dup_socket, &sock);
1908 if (result != ISC_R_SUCCESS)
1911 isc_socket_setname(sock, "dispatcher", NULL);
1913 return (ISC_R_SUCCESS);
1915 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1916 isc_sockettype_udp, &sock);
1917 if (result != ISC_R_SUCCESS)
1921 isc_socket_setname(sock, "dispatcher", NULL);
1923 #ifndef ISC_ALLOW_MAPPED
1924 isc_socket_ipv6only(sock, ISC_TRUE);
1926 result = isc_socket_bind(sock, local, options);
1927 if (result != ISC_R_SUCCESS) {
1929 isc_socket_detach(&sock);
1932 isc_socket_close(sock);
1941 return (ISC_R_SUCCESS);
1945 * Create a temporary port list to set the initial default set of dispatch
1946 * ports: [1024, 65535]. This is almost meaningless as the application will
1947 * normally set the ports explicitly, but is provided to fill some minor corner
1951 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1952 isc_result_t result;
1954 result = isc_portset_create(mctx, portsetp);
1955 if (result != ISC_R_SUCCESS)
1957 isc_portset_addrange(*portsetp, 1024, 65535);
1959 return (ISC_R_SUCCESS);
1967 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1968 dns_dispatchmgr_t **mgrp)
1970 dns_dispatchmgr_t *mgr;
1971 isc_result_t result;
1972 isc_portset_t *v4portset = NULL;
1973 isc_portset_t *v6portset = NULL;
1975 REQUIRE(mctx != NULL);
1976 REQUIRE(mgrp != NULL && *mgrp == NULL);
1978 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1980 return (ISC_R_NOMEMORY);
1983 isc_mem_attach(mctx, &mgr->mctx);
1985 mgr->blackhole = NULL;
1988 result = isc_mutex_init(&mgr->lock);
1989 if (result != ISC_R_SUCCESS)
1992 result = isc_mutex_init(&mgr->arc4_lock);
1993 if (result != ISC_R_SUCCESS)
1996 result = isc_mutex_init(&mgr->buffer_lock);
1997 if (result != ISC_R_SUCCESS)
1998 goto kill_arc4_lock;
2000 result = isc_mutex_init(&mgr->depool_lock);
2001 if (result != ISC_R_SUCCESS)
2002 goto kill_buffer_lock;
2004 result = isc_mutex_init(&mgr->rpool_lock);
2005 if (result != ISC_R_SUCCESS)
2006 goto kill_depool_lock;
2008 result = isc_mutex_init(&mgr->dpool_lock);
2009 if (result != ISC_R_SUCCESS)
2010 goto kill_rpool_lock;
2012 result = isc_mutex_init(&mgr->bpool_lock);
2013 if (result != ISC_R_SUCCESS)
2014 goto kill_dpool_lock;
2016 result = isc_mutex_init(&mgr->spool_lock);
2017 if (result != ISC_R_SUCCESS)
2018 goto kill_bpool_lock;
2021 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
2022 &mgr->depool) != ISC_R_SUCCESS) {
2023 result = ISC_R_NOMEMORY;
2024 goto kill_spool_lock;
2028 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
2029 &mgr->rpool) != ISC_R_SUCCESS) {
2030 result = ISC_R_NOMEMORY;
2035 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
2036 &mgr->dpool) != ISC_R_SUCCESS) {
2037 result = ISC_R_NOMEMORY;
2041 isc_mempool_setname(mgr->depool, "dispmgr_depool");
2042 isc_mempool_setmaxalloc(mgr->depool, 32768);
2043 isc_mempool_setfreemax(mgr->depool, 32768);
2044 isc_mempool_associatelock(mgr->depool, &mgr->depool_lock);
2045 isc_mempool_setfillcount(mgr->depool, 256);
2047 isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
2048 isc_mempool_setmaxalloc(mgr->rpool, 32768);
2049 isc_mempool_setfreemax(mgr->rpool, 32768);
2050 isc_mempool_associatelock(mgr->rpool, &mgr->rpool_lock);
2051 isc_mempool_setfillcount(mgr->rpool, 256);
2053 isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
2054 isc_mempool_setmaxalloc(mgr->dpool, 32768);
2055 isc_mempool_setfreemax(mgr->dpool, 32768);
2056 isc_mempool_associatelock(mgr->dpool, &mgr->dpool_lock);
2057 isc_mempool_setfillcount(mgr->dpool, 256);
2060 mgr->buffersize = 0;
2061 mgr->maxbuffers = 0;
2064 mgr->entropy = NULL;
2067 ISC_LIST_INIT(mgr->list);
2068 mgr->v4ports = NULL;
2069 mgr->v6ports = NULL;
2072 mgr->magic = DNS_DISPATCHMGR_MAGIC;
2074 result = create_default_portset(mctx, &v4portset);
2075 if (result == ISC_R_SUCCESS) {
2076 result = create_default_portset(mctx, &v6portset);
2077 if (result == ISC_R_SUCCESS) {
2078 result = dns_dispatchmgr_setavailports(mgr,
2083 if (v4portset != NULL)
2084 isc_portset_destroy(mctx, &v4portset);
2085 if (v6portset != NULL)
2086 isc_portset_destroy(mctx, &v6portset);
2087 if (result != ISC_R_SUCCESS)
2091 if (entropy != NULL)
2092 isc_entropy_attach(entropy, &mgr->entropy);
2097 dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
2100 return (ISC_R_SUCCESS);
2103 isc_mempool_destroy(&mgr->dpool);
2105 isc_mempool_destroy(&mgr->rpool);
2107 isc_mempool_destroy(&mgr->depool);
2109 DESTROYLOCK(&mgr->spool_lock);
2111 DESTROYLOCK(&mgr->bpool_lock);
2113 DESTROYLOCK(&mgr->dpool_lock);
2115 DESTROYLOCK(&mgr->rpool_lock);
2117 DESTROYLOCK(&mgr->depool_lock);
2119 DESTROYLOCK(&mgr->buffer_lock);
2121 DESTROYLOCK(&mgr->arc4_lock);
2123 DESTROYLOCK(&mgr->lock);
2125 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
2126 isc_mem_detach(&mctx);
2132 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
2133 REQUIRE(VALID_DISPATCHMGR(mgr));
2134 if (mgr->blackhole != NULL)
2135 dns_acl_detach(&mgr->blackhole);
2136 dns_acl_attach(blackhole, &mgr->blackhole);
2140 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
2141 REQUIRE(VALID_DISPATCHMGR(mgr));
2142 return (mgr->blackhole);
2146 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
2147 dns_portlist_t *portlist)
2149 REQUIRE(VALID_DISPATCHMGR(mgr));
2152 /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
2157 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
2158 REQUIRE(VALID_DISPATCHMGR(mgr));
2159 return (NULL); /* this function is deprecated */
2163 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
2164 isc_portset_t *v6portset)
2166 in_port_t *v4ports, *v6ports, p;
2167 unsigned int nv4ports, nv6ports, i4, i6;
2169 REQUIRE(VALID_DISPATCHMGR(mgr));
2171 nv4ports = isc_portset_nports(v4portset);
2172 nv6ports = isc_portset_nports(v6portset);
2175 if (nv4ports != 0) {
2176 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
2177 if (v4ports == NULL)
2178 return (ISC_R_NOMEMORY);
2181 if (nv6ports != 0) {
2182 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
2183 if (v6ports == NULL) {
2184 if (v4ports != NULL) {
2185 isc_mem_put(mgr->mctx, v4ports,
2187 isc_portset_nports(v4portset));
2189 return (ISC_R_NOMEMORY);
2197 if (isc_portset_isset(v4portset, p)) {
2198 INSIST(i4 < nv4ports);
2201 if (isc_portset_isset(v6portset, p)) {
2202 INSIST(i6 < nv6ports);
2205 } while (p++ < 65535);
2206 INSIST(i4 == nv4ports && i6 == nv6ports);
2209 if (mgr->v4ports != NULL) {
2210 isc_mem_put(mgr->mctx, mgr->v4ports,
2211 mgr->nv4ports * sizeof(in_port_t));
2213 mgr->v4ports = v4ports;
2214 mgr->nv4ports = nv4ports;
2216 if (mgr->v6ports != NULL) {
2217 isc_mem_put(mgr->mctx, mgr->v6ports,
2218 mgr->nv6ports * sizeof(in_port_t));
2220 mgr->v6ports = v6ports;
2221 mgr->nv6ports = nv6ports;
2224 return (ISC_R_SUCCESS);
2228 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2229 unsigned int buffersize, unsigned int maxbuffers,
2230 unsigned int maxrequests, unsigned int buckets,
2231 unsigned int increment)
2233 isc_result_t result;
2235 REQUIRE(VALID_DISPATCHMGR(mgr));
2236 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2237 REQUIRE(maxbuffers > 0);
2238 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2239 REQUIRE(increment > buckets);
2242 * Keep some number of items around. This should be a config
2243 * option. For now, keep 8, but later keep at least two even
2244 * if the caller wants less. This allows us to ensure certain
2245 * things, like an event can be "freed" and the next allocation
2246 * will always succeed.
2248 * Note that if limits are placed on anything here, we use one
2249 * event internally, so the actual limit should be "wanted + 1."
2257 LOCK(&mgr->buffer_lock);
2259 /* Create or adjust buffer pool */
2260 if (mgr->bpool != NULL) {
2262 * We only increase the maxbuffers to avoid accidental buffer
2263 * shortage. Ideally we'd separate the manager-wide maximum
2264 * from per-dispatch limits and respect the latter within the
2265 * global limit. But at this moment that's deemed to be
2266 * overkilling and isn't worth additional implementation
2269 if (maxbuffers > mgr->maxbuffers) {
2270 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2271 isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2272 mgr->maxbuffers = maxbuffers;
2275 result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2276 if (result != ISC_R_SUCCESS) {
2277 UNLOCK(&mgr->buffer_lock);
2280 isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2281 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2282 isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2283 isc_mempool_associatelock(mgr->bpool, &mgr->bpool_lock);
2284 isc_mempool_setfillcount(mgr->bpool, 256);
2287 /* Create or adjust socket pool */
2288 if (mgr->spool != NULL) {
2289 if (maxrequests < DNS_DISPATCH_POOLSOCKS * 2)
2290 isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2291 isc_mempool_setfreemax(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2292 UNLOCK(&mgr->buffer_lock);
2293 return (ISC_R_SUCCESS);
2295 result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2297 if (result != ISC_R_SUCCESS) {
2298 UNLOCK(&mgr->buffer_lock);
2301 isc_mempool_setname(mgr->spool, "dispmgr_spool");
2302 isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2303 isc_mempool_setfreemax(mgr->spool, maxrequests);
2304 isc_mempool_associatelock(mgr->spool, &mgr->spool_lock);
2305 isc_mempool_setfillcount(mgr->spool, 256);
2307 result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
2308 if (result != ISC_R_SUCCESS)
2311 mgr->buffersize = buffersize;
2312 mgr->maxbuffers = maxbuffers;
2313 UNLOCK(&mgr->buffer_lock);
2314 return (ISC_R_SUCCESS);
2317 isc_mempool_destroy(&mgr->bpool);
2318 if (mgr->spool != NULL)
2319 isc_mempool_destroy(&mgr->spool);
2320 UNLOCK(&mgr->buffer_lock);
2325 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2326 dns_dispatchmgr_t *mgr;
2327 isc_boolean_t killit;
2329 REQUIRE(mgrp != NULL);
2330 REQUIRE(VALID_DISPATCHMGR(*mgrp));
2336 mgr->state |= MGR_SHUTTINGDOWN;
2338 killit = destroy_mgr_ok(mgr);
2341 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2348 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2349 REQUIRE(VALID_DISPATCHMGR(mgr));
2350 REQUIRE(ISC_LIST_EMPTY(mgr->list));
2351 REQUIRE(mgr->stats == NULL);
2353 isc_stats_attach(stats, &mgr->stats);
2357 port_cmp(const void *key, const void *ent) {
2358 in_port_t p1 = *(const in_port_t *)key;
2359 in_port_t p2 = *(const in_port_t *)ent;
2369 static isc_boolean_t
2370 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2371 isc_sockaddr_t *sockaddrp)
2373 isc_sockaddr_t sockaddr;
2374 isc_result_t result;
2375 in_port_t *ports, port;
2376 unsigned int nports;
2377 isc_boolean_t available = ISC_FALSE;
2379 REQUIRE(sock != NULL || sockaddrp != NULL);
2383 sockaddrp = &sockaddr;
2384 result = isc_socket_getsockname(sock, sockaddrp);
2385 if (result != ISC_R_SUCCESS)
2389 if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2390 ports = mgr->v4ports;
2391 nports = mgr->nv4ports;
2393 ports = mgr->v6ports;
2394 nports = mgr->nv6ports;
2399 port = isc_sockaddr_getport(sockaddrp);
2400 if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2401 available = ISC_TRUE;
2408 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2410 static isc_boolean_t
2411 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2412 isc_sockaddr_t sockaddr;
2413 isc_result_t result;
2415 REQUIRE(disp->socket != NULL);
2421 * Don't match wildcard ports unless the port is available in the
2422 * current configuration.
2424 if (isc_sockaddr_getport(addr) == 0 &&
2425 isc_sockaddr_getport(&disp->local) == 0 &&
2426 !portavailable(disp->mgr, disp->socket, NULL)) {
2431 * Check if we match the binding <address,port>.
2432 * Wildcard ports match/fail here.
2434 if (isc_sockaddr_equal(&disp->local, addr))
2436 if (isc_sockaddr_getport(addr) == 0)
2440 * Check if we match a bound wildcard port <address,port>.
2442 if (!isc_sockaddr_eqaddr(&disp->local, addr))
2444 result = isc_socket_getsockname(disp->socket, &sockaddr);
2445 if (result != ISC_R_SUCCESS)
2448 return (isc_sockaddr_equal(&sockaddr, addr));
2452 * Requires mgr be locked.
2454 * No dispatcher can be locked by this thread when calling this function.
2458 * If a matching dispatcher is found, it is locked after this function
2459 * returns, and must be unlocked by the caller.
2462 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2463 unsigned int attributes, unsigned int mask,
2464 dns_dispatch_t **dispp)
2466 dns_dispatch_t *disp;
2467 isc_result_t result;
2470 * Make certain that we will not match a private or exclusive dispatch.
2472 attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2473 mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2475 disp = ISC_LIST_HEAD(mgr->list);
2476 while (disp != NULL) {
2478 if ((disp->shutting_down == 0)
2479 && ATTRMATCH(disp->attributes, attributes, mask)
2480 && local_addr_match(disp, local))
2482 UNLOCK(&disp->lock);
2483 disp = ISC_LIST_NEXT(disp, link);
2487 result = ISC_R_NOTFOUND;
2492 result = ISC_R_SUCCESS;
2499 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2500 unsigned int increment, dns_qid_t **qidp,
2501 isc_boolean_t needsocktable)
2505 isc_result_t result;
2507 REQUIRE(VALID_DISPATCHMGR(mgr));
2508 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2509 REQUIRE(increment > buckets);
2510 REQUIRE(qidp != NULL && *qidp == NULL);
2512 qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2514 return (ISC_R_NOMEMORY);
2516 qid->qid_table = isc_mem_get(mgr->mctx,
2517 buckets * sizeof(dns_displist_t));
2518 if (qid->qid_table == NULL) {
2519 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2520 return (ISC_R_NOMEMORY);
2523 qid->sock_table = NULL;
2524 if (needsocktable) {
2525 qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2526 sizeof(dispsocketlist_t));
2527 if (qid->sock_table == NULL) {
2528 isc_mem_put(mgr->mctx, qid->qid_table,
2529 buckets * sizeof(dns_displist_t));
2530 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2531 return (ISC_R_NOMEMORY);
2535 result = isc_mutex_init(&qid->lock);
2536 if (result != ISC_R_SUCCESS) {
2537 if (qid->sock_table != NULL) {
2538 isc_mem_put(mgr->mctx, qid->sock_table,
2539 buckets * sizeof(dispsocketlist_t));
2541 isc_mem_put(mgr->mctx, qid->qid_table,
2542 buckets * sizeof(dns_displist_t));
2543 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2547 for (i = 0; i < buckets; i++) {
2548 ISC_LIST_INIT(qid->qid_table[i]);
2549 if (qid->sock_table != NULL)
2550 ISC_LIST_INIT(qid->sock_table[i]);
2553 qid->qid_nbuckets = buckets;
2554 qid->qid_increment = increment;
2555 qid->magic = QID_MAGIC;
2557 return (ISC_R_SUCCESS);
2561 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2564 REQUIRE(qidp != NULL);
2567 REQUIRE(VALID_QID(qid));
2571 isc_mem_put(mctx, qid->qid_table,
2572 qid->qid_nbuckets * sizeof(dns_displist_t));
2573 if (qid->sock_table != NULL) {
2574 isc_mem_put(mctx, qid->sock_table,
2575 qid->qid_nbuckets * sizeof(dispsocketlist_t));
2577 DESTROYLOCK(&qid->lock);
2578 isc_mem_put(mctx, qid, sizeof(*qid));
2582 * Allocate and set important limits.
2585 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2586 dns_dispatch_t **dispp)
2588 dns_dispatch_t *disp;
2589 isc_result_t result;
2591 REQUIRE(VALID_DISPATCHMGR(mgr));
2592 REQUIRE(dispp != NULL && *dispp == NULL);
2595 * Set up the dispatcher, mostly. Don't bother setting some of
2596 * the options that are controlled by tcp vs. udp, etc.
2599 disp = isc_mempool_get(mgr->dpool);
2601 return (ISC_R_NOMEMORY);
2605 disp->maxrequests = maxrequests;
2606 disp->attributes = 0;
2607 ISC_LINK_INIT(disp, link);
2609 disp->recv_pending = 0;
2610 memset(&disp->local, 0, sizeof(disp->local));
2611 disp->localport = 0;
2612 disp->shutting_down = 0;
2613 disp->shutdown_out = 0;
2614 disp->connected = 0;
2615 disp->tcpmsg_valid = 0;
2616 disp->shutdown_why = ISC_R_UNEXPECTED;
2618 disp->tcpbuffers = 0;
2620 ISC_LIST_INIT(disp->activesockets);
2621 ISC_LIST_INIT(disp->inactivesockets);
2623 dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL);
2624 disp->port_table = NULL;
2625 disp->portpool = NULL;
2627 result = isc_mutex_init(&disp->lock);
2628 if (result != ISC_R_SUCCESS)
2631 disp->failsafe_ev = allocate_devent(disp);
2632 if (disp->failsafe_ev == NULL) {
2633 result = ISC_R_NOMEMORY;
2637 disp->magic = DISPATCH_MAGIC;
2640 return (ISC_R_SUCCESS);
2646 DESTROYLOCK(&disp->lock);
2648 isc_mempool_put(mgr->dpool, disp);
2655 * MUST be unlocked, and not used by anything.
2658 dispatch_free(dns_dispatch_t **dispp) {
2659 dns_dispatch_t *disp;
2660 dns_dispatchmgr_t *mgr;
2663 REQUIRE(VALID_DISPATCH(*dispp));
2668 REQUIRE(VALID_DISPATCHMGR(mgr));
2670 if (disp->tcpmsg_valid) {
2671 dns_tcpmsg_invalidate(&disp->tcpmsg);
2672 disp->tcpmsg_valid = 0;
2675 INSIST(disp->tcpbuffers == 0);
2676 INSIST(disp->requests == 0);
2677 INSIST(disp->recv_pending == 0);
2678 INSIST(ISC_LIST_EMPTY(disp->activesockets));
2679 INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2681 isc_mempool_put(mgr->depool, disp->failsafe_ev);
2682 disp->failsafe_ev = NULL;
2684 if (disp->qid != NULL)
2685 qid_destroy(mgr->mctx, &disp->qid);
2687 if (disp->port_table != NULL) {
2688 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2689 INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2690 isc_mem_put(mgr->mctx, disp->port_table,
2691 sizeof(disp->port_table[0]) *
2692 DNS_DISPATCH_PORTTABLESIZE);
2695 if (disp->portpool != NULL)
2696 isc_mempool_destroy(&disp->portpool);
2699 DESTROYLOCK(&disp->lock);
2701 isc_mempool_put(mgr->dpool, disp);
2705 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2706 isc_taskmgr_t *taskmgr, unsigned int buffersize,
2707 unsigned int maxbuffers, unsigned int maxrequests,
2708 unsigned int buckets, unsigned int increment,
2709 unsigned int attributes, dns_dispatch_t **dispp)
2711 isc_result_t result;
2712 dns_dispatch_t *disp;
2717 REQUIRE(VALID_DISPATCHMGR(mgr));
2718 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2719 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2720 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2722 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */
2727 * dispatch_allocate() checks mgr for us.
2728 * qid_allocate() checks buckets and increment for us.
2731 result = dispatch_allocate(mgr, maxrequests, &disp);
2732 if (result != ISC_R_SUCCESS) {
2737 result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2738 if (result != ISC_R_SUCCESS)
2739 goto deallocate_dispatch;
2741 disp->socktype = isc_sockettype_tcp;
2742 disp->socket = NULL;
2743 isc_socket_attach(sock, &disp->socket);
2745 disp->sepool = NULL;
2748 disp->task[0] = NULL;
2749 result = isc_task_create(taskmgr, 0, &disp->task[0]);
2750 if (result != ISC_R_SUCCESS)
2753 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2754 DNS_EVENT_DISPATCHCONTROL,
2756 sizeof(isc_event_t));
2757 if (disp->ctlevent == NULL) {
2758 result = ISC_R_NOMEMORY;
2762 isc_task_setname(disp->task[0], "tcpdispatch", disp);
2764 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2765 disp->tcpmsg_valid = 1;
2767 disp->attributes = attributes;
2770 * Append it to the dispatcher list.
2772 ISC_LIST_APPEND(mgr->list, disp, link);
2775 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2776 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2780 return (ISC_R_SUCCESS);
2786 isc_task_detach(&disp->task[0]);
2788 isc_socket_detach(&disp->socket);
2789 deallocate_dispatch:
2790 dispatch_free(&disp);
2798 dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2799 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2800 unsigned int buffersize,
2801 unsigned int maxbuffers, unsigned int maxrequests,
2802 unsigned int buckets, unsigned int increment,
2803 unsigned int attributes, unsigned int mask,
2804 dns_dispatch_t **dispp, dns_dispatch_t *dup_dispatch)
2806 isc_result_t result;
2807 dns_dispatch_t *disp = NULL;
2809 REQUIRE(VALID_DISPATCHMGR(mgr));
2810 REQUIRE(sockmgr != NULL);
2811 REQUIRE(localaddr != NULL);
2812 REQUIRE(taskmgr != NULL);
2813 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2814 REQUIRE(maxbuffers > 0);
2815 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2816 REQUIRE(increment > buckets);
2817 REQUIRE(dispp != NULL && *dispp == NULL);
2818 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2820 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2821 maxrequests, buckets, increment);
2822 if (result != ISC_R_SUCCESS)
2827 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2828 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2833 * See if we have a dispatcher that matches.
2835 if (dup_dispatch == NULL) {
2836 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2837 if (result == ISC_R_SUCCESS) {
2840 if (disp->maxrequests < maxrequests)
2841 disp->maxrequests = maxrequests;
2843 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0
2844 && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2846 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2847 if (disp->recv_pending != 0)
2848 isc_socket_cancel(disp->socket,
2850 ISC_SOCKCANCEL_RECV);
2853 UNLOCK(&disp->lock);
2858 return (ISC_R_SUCCESS);
2866 result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2867 maxrequests, attributes, &disp,
2868 dup_dispatch == NULL
2870 : dup_dispatch->socket);
2872 if (result != ISC_R_SUCCESS) {
2880 return (ISC_R_SUCCESS);
2884 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2885 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2886 unsigned int buffersize,
2887 unsigned int maxbuffers, unsigned int maxrequests,
2888 unsigned int buckets, unsigned int increment,
2889 unsigned int attributes, unsigned int mask,
2890 dns_dispatch_t **dispp)
2892 return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr,
2893 buffersize, maxbuffers, maxrequests,
2894 buckets, increment, attributes,
2895 mask, dispp, NULL));
2899 * mgr should be locked.
2902 #ifndef DNS_DISPATCH_HELD
2903 #define DNS_DISPATCH_HELD 20U
2907 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2908 isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2909 isc_socket_t **sockp, isc_socket_t *dup_socket)
2912 isc_socket_t *held[DNS_DISPATCH_HELD];
2913 isc_sockaddr_t localaddr_bound;
2914 isc_socket_t *sock = NULL;
2915 isc_result_t result = ISC_R_SUCCESS;
2916 isc_boolean_t anyport;
2918 INSIST(sockp != NULL && *sockp == NULL);
2920 localaddr_bound = *localaddr;
2921 anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2924 unsigned int nports;
2928 * If no port is specified, we first try to pick up a random
2929 * port by ourselves.
2931 if (isc_sockaddr_pf(localaddr) == AF_INET) {
2932 nports = disp->mgr->nv4ports;
2933 ports = disp->mgr->v4ports;
2935 nports = disp->mgr->nv6ports;
2936 ports = disp->mgr->v6ports;
2939 return (ISC_R_ADDRNOTAVAIL);
2941 for (i = 0; i < 1024; i++) {
2944 prt = ports[dispatch_uniformrandom(
2947 isc_sockaddr_setport(&localaddr_bound, prt);
2948 result = open_socket(sockmgr, &localaddr_bound,
2951 * Continue if the port choosen is already in use
2952 * or the OS has reserved it.
2954 if (result == ISC_R_NOPERM ||
2955 result == ISC_R_ADDRINUSE)
2957 disp->localport = prt;
2963 * If this fails 1024 times, we then ask the kernel for
2967 /* Allow to reuse address for non-random ports. */
2968 result = open_socket(sockmgr, localaddr,
2969 ISC_SOCKET_REUSEADDRESS, &sock,
2972 if (result == ISC_R_SUCCESS)
2978 memset(held, 0, sizeof(held));
2981 for (j = 0; j < 0xffffU; j++) {
2982 result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
2983 if (result != ISC_R_SUCCESS)
2985 else if (portavailable(mgr, sock, NULL))
2987 if (held[i] != NULL)
2988 isc_socket_detach(&held[i]);
2991 if (i == DNS_DISPATCH_HELD)
2995 mgr_log(mgr, ISC_LOG_ERROR,
2996 "avoid-v%s-udp-ports: unable to allocate "
2997 "an available port",
2998 isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2999 result = ISC_R_FAILURE;
3005 for (i = 0; i < DNS_DISPATCH_HELD; i++) {
3006 if (held[i] != NULL)
3007 isc_socket_detach(&held[i]);
3014 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
3015 isc_taskmgr_t *taskmgr,
3016 isc_sockaddr_t *localaddr,
3017 unsigned int maxrequests,
3018 unsigned int attributes,
3019 dns_dispatch_t **dispp,
3020 isc_socket_t *dup_socket)
3022 isc_result_t result;
3023 dns_dispatch_t *disp;
3024 isc_socket_t *sock = NULL;
3028 * dispatch_allocate() checks mgr for us.
3031 result = dispatch_allocate(mgr, maxrequests, &disp);
3032 if (result != ISC_R_SUCCESS)
3035 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
3036 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock,
3038 if (result != ISC_R_SUCCESS)
3039 goto deallocate_dispatch;
3041 if (isc_log_wouldlog(dns_lctx, 90)) {
3042 char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3044 isc_sockaddr_format(localaddr, addrbuf,
3045 ISC_SOCKADDR_FORMATSIZE);
3046 mgr_log(mgr, LVL(90), "dns_dispatch_createudp: Created"
3047 " UDP dispatch for %s with socket fd %d\n",
3048 addrbuf, isc_socket_getfd(sock));
3052 isc_sockaddr_t sa_any;
3055 * For dispatches using exclusive sockets with a specific
3056 * source address, we only check if the specified address is
3057 * available on the system. Query sockets will be created later
3060 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
3061 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
3062 result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
3064 isc_socket_detach(&sock);
3065 if (result != ISC_R_SUCCESS)
3066 goto deallocate_dispatch;
3069 disp->port_table = isc_mem_get(mgr->mctx,
3070 sizeof(disp->port_table[0]) *
3071 DNS_DISPATCH_PORTTABLESIZE);
3072 if (disp->port_table == NULL)
3073 goto deallocate_dispatch;
3074 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
3075 ISC_LIST_INIT(disp->port_table[i]);
3077 result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
3079 if (result != ISC_R_SUCCESS)
3080 goto deallocate_dispatch;
3081 isc_mempool_setname(disp->portpool, "disp_portpool");
3082 isc_mempool_setfreemax(disp->portpool, 128);
3084 disp->socktype = isc_sockettype_udp;
3085 disp->socket = sock;
3086 disp->local = *localaddr;
3088 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3089 disp->ntasks = MAX_INTERNAL_TASKS;
3092 for (i = 0; i < disp->ntasks; i++) {
3093 disp->task[i] = NULL;
3094 result = isc_task_create(taskmgr, 0, &disp->task[i]);
3095 if (result != ISC_R_SUCCESS) {
3097 isc_task_shutdown(disp->task[i]);
3098 isc_task_detach(&disp->task[i]);
3102 isc_task_setname(disp->task[i], "udpdispatch", disp);
3105 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
3106 DNS_EVENT_DISPATCHCONTROL,
3108 sizeof(isc_event_t));
3109 if (disp->ctlevent == NULL) {
3110 result = ISC_R_NOMEMORY;
3114 disp->sepool = NULL;
3115 if (isc_mempool_create(mgr->mctx, sizeof(isc_socketevent_t),
3116 &disp->sepool) != ISC_R_SUCCESS)
3118 result = ISC_R_NOMEMORY;
3122 result = isc_mutex_init(&disp->sepool_lock);
3123 if (result != ISC_R_SUCCESS)
3126 isc_mempool_setname(disp->sepool, "disp_sepool");
3127 isc_mempool_setmaxalloc(disp->sepool, 32768);
3128 isc_mempool_setfreemax(disp->sepool, 32768);
3129 isc_mempool_associatelock(disp->sepool, &disp->sepool_lock);
3130 isc_mempool_setfillcount(disp->sepool, 16);
3132 attributes &= ~DNS_DISPATCHATTR_TCP;
3133 attributes |= DNS_DISPATCHATTR_UDP;
3134 disp->attributes = attributes;
3137 * Append it to the dispatcher list.
3139 ISC_LIST_APPEND(mgr->list, disp, link);
3141 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
3142 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
3143 if (disp->socket != NULL)
3144 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
3154 isc_mempool_destroy(&disp->sepool);
3156 isc_event_free(&disp->ctlevent);
3158 for (i = 0; i < disp->ntasks; i++)
3159 isc_task_detach(&disp->task[i]);
3161 if (disp->socket != NULL)
3162 isc_socket_detach(&disp->socket);
3163 deallocate_dispatch:
3164 dispatch_free(&disp);
3170 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
3171 REQUIRE(VALID_DISPATCH(disp));
3172 REQUIRE(dispp != NULL && *dispp == NULL);
3176 UNLOCK(&disp->lock);
3182 * It is important to lock the manager while we are deleting the dispatch,
3183 * since dns_dispatch_getudp will call dispatch_find, which returns to
3184 * the caller a dispatch but does not attach to it until later. _getudp
3185 * locks the manager, however, so locking it here will keep us from attaching
3186 * to a dispatcher that is in the process of going away.
3189 dns_dispatch_detach(dns_dispatch_t **dispp) {
3190 dns_dispatch_t *disp;
3191 dispsocket_t *dispsock;
3192 isc_boolean_t killit;
3194 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
3201 INSIST(disp->refcount > 0);
3203 if (disp->refcount == 0) {
3204 if (disp->recv_pending > 0)
3205 isc_socket_cancel(disp->socket, disp->task[0],
3206 ISC_SOCKCANCEL_RECV);
3207 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3209 dispsock = ISC_LIST_NEXT(dispsock, link)) {
3210 isc_socket_cancel(dispsock->socket, dispsock->task,
3211 ISC_SOCKCANCEL_RECV);
3213 disp->shutting_down = 1;
3216 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
3218 killit = destroy_disp_ok(disp);
3219 UNLOCK(&disp->lock);
3221 isc_task_send(disp->task[0], &disp->ctlevent);
3225 dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3226 isc_task_t *task, isc_taskaction_t action, void *arg,
3227 dns_messageid_t *idp, dns_dispentry_t **resp,
3228 isc_socketmgr_t *sockmgr)
3230 dns_dispentry_t *res;
3231 unsigned int bucket;
3232 in_port_t localport = 0;
3237 dispsocket_t *dispsocket = NULL;
3238 isc_result_t result;
3240 REQUIRE(VALID_DISPATCH(disp));
3241 REQUIRE(task != NULL);
3242 REQUIRE(dest != NULL);
3243 REQUIRE(resp != NULL && *resp == NULL);
3244 REQUIRE(idp != NULL);
3245 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3246 REQUIRE(sockmgr != NULL);
3250 if (disp->shutting_down == 1) {
3251 UNLOCK(&disp->lock);
3252 return (ISC_R_SHUTTINGDOWN);
3255 if (disp->requests >= disp->maxrequests) {
3256 UNLOCK(&disp->lock);
3257 return (ISC_R_QUOTA);
3260 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
3261 disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
3262 dispsocket_t *oldestsocket;
3263 dns_dispentry_t *oldestresp;
3264 dns_dispatchevent_t *rev;
3267 * Kill oldest outstanding query if the number of sockets
3268 * exceeds the quota to keep the room for new queries.
3270 oldestsocket = ISC_LIST_HEAD(disp->activesockets);
3271 oldestresp = oldestsocket->resp;
3272 if (oldestresp != NULL && !oldestresp->item_out) {
3273 rev = allocate_devent(oldestresp->disp);
3275 rev->buffer.base = NULL;
3276 rev->result = ISC_R_CANCELED;
3277 rev->id = oldestresp->id;
3278 ISC_EVENT_INIT(rev, sizeof(*rev), 0,
3279 NULL, DNS_EVENT_DISPATCH,
3281 oldestresp->arg, oldestresp,
3283 oldestresp->item_out = ISC_TRUE;
3284 isc_task_send(oldestresp->task,
3285 ISC_EVENT_PTR(&rev));
3286 inc_stats(disp->mgr,
3287 dns_resstatscounter_dispabort);
3292 * Move this entry to the tail so that it won't (easily) be
3293 * examined before actually being canceled.
3295 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3296 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3299 qid = DNS_QID(disp);
3301 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3303 * Get a separate UDP socket with a random port number.
3305 result = get_dispsocket(disp, dest, sockmgr, &dispsocket,
3307 if (result != ISC_R_SUCCESS) {
3308 UNLOCK(&disp->lock);
3309 inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3313 localport = disp->localport;
3317 * Try somewhat hard to find an unique ID.
3320 id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp));
3324 bucket = dns_hash(qid, dest, id, localport);
3325 if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3329 id += qid->qid_increment;
3335 UNLOCK(&disp->lock);
3336 return (ISC_R_NOMORE);
3339 res = isc_mempool_get(disp->mgr->rpool);
3341 if (dispsocket != NULL)
3342 destroy_dispsocket(disp, &dispsocket);
3343 UNLOCK(&disp->lock);
3344 return (ISC_R_NOMEMORY);
3350 isc_task_attach(task, &res->task);
3353 res->port = localport;
3354 res->bucket = bucket;
3356 res->action = action;
3358 res->dispsocket = dispsocket;
3359 if (dispsocket != NULL)
3360 dispsocket->resp = res;
3361 res->item_out = ISC_FALSE;
3362 ISC_LIST_INIT(res->items);
3363 ISC_LINK_INIT(res, link);
3364 res->magic = RESPONSE_MAGIC;
3367 ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3370 request_log(disp, res, LVL(90),
3371 "attached to task %p", res->task);
3373 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3374 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3375 result = startrecv(disp, dispsocket);
3376 if (result != ISC_R_SUCCESS) {
3378 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3381 if (dispsocket != NULL)
3382 destroy_dispsocket(disp, &dispsocket);
3387 UNLOCK(&disp->lock);
3388 isc_task_detach(&res->task);
3389 isc_mempool_put(disp->mgr->rpool, res);
3394 if (dispsocket != NULL)
3395 ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3397 UNLOCK(&disp->lock);
3402 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3403 INSIST(res->dispsocket != NULL);
3405 return (ISC_R_SUCCESS);
3409 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3410 isc_task_t *task, isc_taskaction_t action, void *arg,
3411 dns_messageid_t *idp, dns_dispentry_t **resp)
3413 REQUIRE(VALID_DISPATCH(disp));
3414 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3416 return (dns_dispatch_addresponse2(disp, dest, task, action, arg,
3421 dns_dispatch_starttcp(dns_dispatch_t *disp) {
3423 REQUIRE(VALID_DISPATCH(disp));
3425 dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3428 disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3429 (void)startrecv(disp, NULL);
3430 UNLOCK(&disp->lock);
3434 dns_dispatch_removeresponse(dns_dispentry_t **resp,
3435 dns_dispatchevent_t **sockevent)
3437 dns_dispatchmgr_t *mgr;
3438 dns_dispatch_t *disp;
3439 dns_dispentry_t *res;
3440 dispsocket_t *dispsock;
3441 dns_dispatchevent_t *ev;
3442 unsigned int bucket;
3443 isc_boolean_t killit;
3445 isc_eventlist_t events;
3448 REQUIRE(resp != NULL);
3449 REQUIRE(VALID_RESPONSE(*resp));
3455 REQUIRE(VALID_DISPATCH(disp));
3457 REQUIRE(VALID_DISPATCHMGR(mgr));
3459 qid = DNS_QID(disp);
3461 if (sockevent != NULL) {
3462 REQUIRE(*sockevent != NULL);
3471 INSIST(disp->requests > 0);
3473 INSIST(disp->refcount > 0);
3475 if (disp->refcount == 0) {
3476 if (disp->recv_pending > 0)
3477 isc_socket_cancel(disp->socket, disp->task[0],
3478 ISC_SOCKCANCEL_RECV);
3479 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3481 dispsock = ISC_LIST_NEXT(dispsock, link)) {
3482 isc_socket_cancel(dispsock->socket, dispsock->task,
3483 ISC_SOCKCANCEL_RECV);
3485 disp->shutting_down = 1;
3488 bucket = res->bucket;
3491 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3494 if (ev == NULL && res->item_out) {
3496 * We've posted our event, but the caller hasn't gotten it
3497 * yet. Take it back.
3499 ISC_LIST_INIT(events);
3500 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3503 * We had better have gotten it back.
3506 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3510 REQUIRE(res->item_out == ISC_TRUE);
3511 res->item_out = ISC_FALSE;
3512 if (ev->buffer.base != NULL)
3513 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3514 free_devent(disp, ev);
3517 request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3518 isc_task_detach(&res->task);
3520 if (res->dispsocket != NULL) {
3521 isc_socket_cancel(res->dispsocket->socket,
3522 res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3523 res->dispsocket->resp = NULL;
3527 * Free any buffered requests as well
3529 ev = ISC_LIST_HEAD(res->items);
3530 while (ev != NULL) {
3531 ISC_LIST_UNLINK(res->items, ev, ev_link);
3532 if (ev->buffer.base != NULL)
3533 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3534 free_devent(disp, ev);
3535 ev = ISC_LIST_HEAD(res->items);
3538 isc_mempool_put(disp->mgr->rpool, res);
3539 if (disp->shutting_down == 1)
3542 (void)startrecv(disp, NULL);
3544 killit = destroy_disp_ok(disp);
3545 UNLOCK(&disp->lock);
3547 isc_task_send(disp->task[0], &disp->ctlevent);
3551 do_cancel(dns_dispatch_t *disp) {
3552 dns_dispatchevent_t *ev;
3553 dns_dispentry_t *resp;
3556 if (disp->shutdown_out == 1)
3559 qid = DNS_QID(disp);
3562 * Search for the first response handler without packets outstanding
3563 * unless a specific hander is given.
3566 for (resp = linear_first(qid);
3567 resp != NULL && resp->item_out;
3569 resp = linear_next(qid, resp);
3572 * No one to send the cancel event to, so nothing to do.
3578 * Send the shutdown failsafe event to this resp.
3580 ev = disp->failsafe_ev;
3581 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3582 resp->action, resp->arg, resp, NULL, NULL);
3583 ev->result = disp->shutdown_why;
3584 ev->buffer.base = NULL;
3585 ev->buffer.length = 0;
3586 disp->shutdown_out = 1;
3587 request_log(disp, resp, LVL(10),
3588 "cancel: failsafe event %p -> task %p",
3590 resp->item_out = ISC_TRUE;
3591 isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3597 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3598 REQUIRE(VALID_DISPATCH(disp));
3600 return (disp->socket);
3604 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3605 REQUIRE(VALID_RESPONSE(resp));
3607 if (resp->dispsocket != NULL)
3608 return (resp->dispsocket->socket);
3614 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3616 REQUIRE(VALID_DISPATCH(disp));
3617 REQUIRE(addrp != NULL);
3619 if (disp->socktype == isc_sockettype_udp) {
3620 *addrp = disp->local;
3621 return (ISC_R_SUCCESS);
3623 return (ISC_R_NOTIMPLEMENTED);
3627 dns_dispatch_cancel(dns_dispatch_t *disp) {
3628 REQUIRE(VALID_DISPATCH(disp));
3632 if (disp->shutting_down == 1) {
3633 UNLOCK(&disp->lock);
3637 disp->shutdown_why = ISC_R_CANCELED;
3638 disp->shutting_down = 1;
3641 UNLOCK(&disp->lock);
3647 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3648 REQUIRE(VALID_DISPATCH(disp));
3651 * We don't bother locking disp here; it's the caller's responsibility
3652 * to use only non volatile flags.
3654 return (disp->attributes);
3658 dns_dispatch_changeattributes(dns_dispatch_t *disp,
3659 unsigned int attributes, unsigned int mask)
3661 REQUIRE(VALID_DISPATCH(disp));
3662 /* Exclusive attribute can only be set on creation */
3663 REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3664 /* Also, a dispatch with randomport specified cannot start listening */
3665 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3666 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3669 * Should check for valid attributes here!
3674 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3675 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3676 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3677 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3678 (void)startrecv(disp, NULL);
3679 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3681 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3682 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3683 if (disp->recv_pending != 0)
3684 isc_socket_cancel(disp->socket, disp->task[0],
3685 ISC_SOCKCANCEL_RECV);
3689 disp->attributes &= ~mask;
3690 disp->attributes |= (attributes & mask);
3691 UNLOCK(&disp->lock);
3695 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3697 isc_socketevent_t *sevent, *newsevent;
3699 REQUIRE(VALID_DISPATCH(disp));
3700 REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3701 REQUIRE(event != NULL);
3703 sevent = (isc_socketevent_t *)event;
3705 INSIST(sevent->n <= disp->mgr->buffersize);
3706 newsevent = (isc_socketevent_t *)
3707 isc_event_allocate(disp->mgr->mctx, NULL,
3708 DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3709 disp, sizeof(isc_socketevent_t));
3710 if (newsevent == NULL)
3713 buf = allocate_udp_buffer(disp);
3715 isc_event_free(ISC_EVENT_PTR(&newsevent));
3718 memmove(buf, sevent->region.base, sevent->n);
3719 newsevent->region.base = buf;
3720 newsevent->region.length = disp->mgr->buffersize;
3721 newsevent->n = sevent->n;
3722 newsevent->result = sevent->result;
3723 newsevent->address = sevent->address;
3724 newsevent->timestamp = sevent->timestamp;
3725 newsevent->pktinfo = sevent->pktinfo;
3726 newsevent->attributes = sevent->attributes;
3728 isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3732 dns_dispatchset_get(dns_dispatchset_t *dset) {
3733 dns_dispatch_t *disp;
3735 /* check that dispatch set is configured */
3736 if (dset == NULL || dset->ndisp == 0)
3740 disp = dset->dispatches[dset->cur];
3742 if (dset->cur == dset->ndisp)
3744 UNLOCK(&dset->lock);
3750 dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr,
3751 isc_taskmgr_t *taskmgr, dns_dispatch_t *source,
3752 dns_dispatchset_t **dsetp, int n)
3754 isc_result_t result;
3755 dns_dispatchset_t *dset;
3756 dns_dispatchmgr_t *mgr;
3759 REQUIRE(VALID_DISPATCH(source));
3760 REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0);
3761 REQUIRE(dsetp != NULL && *dsetp == NULL);
3765 dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t));
3767 return (ISC_R_NOMEMORY);
3768 memset(dset, 0, sizeof(*dset));
3770 result = isc_mutex_init(&dset->lock);
3771 if (result != ISC_R_SUCCESS)
3774 dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n);
3776 result = ISC_R_NOMEMORY;
3780 isc_mem_attach(mctx, &dset->mctx);
3784 dset->dispatches[0] = NULL;
3785 dns_dispatch_attach(source, &dset->dispatches[0]);
3788 for (i = 1; i < n; i++) {
3789 dset->dispatches[i] = NULL;
3790 result = dispatch_createudp(mgr, sockmgr, taskmgr,
3792 source->maxrequests,
3794 &dset->dispatches[i],
3796 if (result != ISC_R_SUCCESS)
3803 return (ISC_R_SUCCESS);
3808 for (j = 0; j < i; j++)
3809 dns_dispatch_detach(&(dset->dispatches[j]));
3810 isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n);
3811 if (dset->mctx == mctx)
3812 isc_mem_detach(&dset->mctx);
3815 DESTROYLOCK(&dset->lock);
3818 isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t));
3823 dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) {
3826 REQUIRE(dset != NULL);
3828 for (i = 0; i < dset->ndisp; i++) {
3830 sock = dns_dispatch_getsocket(dset->dispatches[i]);
3831 isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL);
3836 dns_dispatchset_destroy(dns_dispatchset_t **dsetp) {
3837 dns_dispatchset_t *dset;
3840 REQUIRE(dsetp != NULL && *dsetp != NULL);
3843 for (i = 0; i < dset->ndisp; i++)
3844 dns_dispatch_detach(&(dset->dispatches[i]));
3845 isc_mem_put(dset->mctx, dset->dispatches,
3846 sizeof(dns_dispatch_t *) * dset->ndisp);
3847 DESTROYLOCK(&dset->lock);
3848 isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t));
3855 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3856 dns_dispatch_t *disp;
3859 disp = ISC_LIST_HEAD(mgr->list);
3860 while (disp != NULL) {
3861 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3862 printf("\tdispatch %p, addr %s\n", disp, foo);
3863 disp = ISC_LIST_NEXT(disp, link);