2 * Copyright (C) 2004-2009, 2011 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: dispatch.c,v 1.168.248.4 2011-04-06 10:30:08 marka Exp $ */
25 #include <sys/types.h>
29 #include <isc/entropy.h>
31 #include <isc/mutex.h>
32 #include <isc/portset.h>
33 #include <isc/print.h>
34 #include <isc/random.h>
35 #include <isc/stats.h>
36 #include <isc/string.h>
42 #include <dns/dispatch.h>
43 #include <dns/events.h>
45 #include <dns/message.h>
46 #include <dns/portlist.h>
47 #include <dns/stats.h>
48 #include <dns/tcpmsg.h>
49 #include <dns/types.h>
51 typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
53 typedef struct dispsocket dispsocket_t;
54 typedef ISC_LIST(dispsocket_t) dispsocketlist_t;
56 typedef struct dispportentry dispportentry_t;
57 typedef ISC_LIST(dispportentry_t) dispportlist_t;
59 /* ARC4 Random generator state */
60 typedef struct arc4ctx {
65 isc_entropy_t *entropy; /*%< entropy source for ARC4 */
69 typedef struct dns_qid {
71 unsigned int qid_nbuckets; /*%< hash table size */
72 unsigned int qid_increment; /*%< id increment on collision */
74 dns_displist_t *qid_table; /*%< the table itself */
75 dispsocketlist_t *sock_table; /*%< socket table */
78 struct dns_dispatchmgr {
83 dns_portlist_t *portlist;
85 isc_entropy_t *entropy; /*%< entropy source */
87 /* Locked by "lock". */
90 ISC_LIST(dns_dispatch_t) list;
92 /* Locked by arc4_lock. */
93 isc_mutex_t arc4_lock;
94 arc4ctx_t arc4ctx; /*%< ARC4 context for QID */
96 /* locked by buffer lock */
98 isc_mutex_t buffer_lock;
99 unsigned int buffers; /*%< allocated buffers */
100 unsigned int buffersize; /*%< size of each buffer */
101 unsigned int maxbuffers; /*%< max buffers */
103 /* Locked internally. */
104 isc_mutex_t pool_lock;
105 isc_mempool_t *epool; /*%< memory pool for events */
106 isc_mempool_t *rpool; /*%< memory pool for replies */
107 isc_mempool_t *dpool; /*%< dispatch allocations */
108 isc_mempool_t *bpool; /*%< memory pool for buffers */
109 isc_mempool_t *spool; /*%< memory pool for dispsocs */
112 * Locked by qid->lock if qid exists; otherwise, can be used without
114 * Memory footprint considerations: this is a simple implementation of
115 * available ports, i.e., an ordered array of the actual port numbers.
116 * This will require about 256KB of memory in the worst case (128KB for
117 * each of IPv4 and IPv6). We could reduce it by representing it as a
118 * more sophisticated way such as a list (or array) of ranges that are
119 * searched to identify a specific port. Our decision here is the saved
120 * memory isn't worth the implementation complexity, considering the
121 * fact that the whole BIND9 process (which is mainly named) already
122 * requires a pretty large memory footprint. We may, however, have to
123 * revisit the decision when we want to use it as a separate module for
124 * an environment where memory requirement is severer.
126 in_port_t *v4ports; /*%< available ports for IPv4 */
127 unsigned int nv4ports; /*%< # of available ports for IPv4 */
128 in_port_t *v6ports; /*%< available ports for IPv4 */
129 unsigned int nv6ports; /*%< # of available ports for IPv4 */
132 #define MGR_SHUTTINGDOWN 0x00000001U
133 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
135 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
137 struct dns_dispentry {
139 dns_dispatch_t *disp;
145 isc_taskaction_t action;
147 isc_boolean_t item_out;
148 dispsocket_t *dispsocket;
149 ISC_LIST(dns_dispatchevent_t) items;
150 ISC_LINK(dns_dispentry_t) link;
154 * Maximum number of dispatch sockets that can be pooled for reuse. The
155 * appropriate value may vary, but experiments have shown a busy caching server
156 * may need more than 1000 sockets concurrently opened. The maximum allowable
157 * number of dispatch sockets (per manager) will be set to the double of this
160 #ifndef DNS_DISPATCH_POOLSOCKS
161 #define DNS_DISPATCH_POOLSOCKS 2048
165 * Quota to control the number of dispatch sockets. If a dispatch has more
166 * than the quota of sockets, new queries will purge oldest ones, so that
167 * a massive number of outstanding queries won't prevent subsequent queries
168 * (especially if the older ones take longer time and result in timeout).
170 #ifndef DNS_DISPATCH_SOCKSQUOTA
171 #define DNS_DISPATCH_SOCKSQUOTA 3072
176 isc_socket_t *socket;
177 dns_dispatch_t *disp;
179 in_port_t localport; /* XXX: should be removed later */
180 dispportentry_t *portentry;
181 dns_dispentry_t *resp;
183 ISC_LINK(dispsocket_t) link;
185 ISC_LINK(dispsocket_t) blink;
189 * A port table entry. We remember every port we first open in a table with a
190 * reference counter so that we can 'reuse' the same port (with different
191 * destination addresses) using the SO_REUSEADDR socket option.
193 struct dispportentry {
196 ISC_LINK(struct dispportentry) link;
199 #ifndef DNS_DISPATCH_PORTTABLESIZE
200 #define DNS_DISPATCH_PORTTABLESIZE 1024
203 #define INVALID_BUCKET (0xffffdead)
206 * Number of tasks for each dispatch that use separate sockets for different
207 * transactions. This must be a power of 2 as it will divide 32 bit numbers
208 * to get an uniformly random tasks selection. See get_dispsocket().
210 #define MAX_INTERNAL_TASKS 64
212 struct dns_dispatch {
214 unsigned int magic; /*%< magic */
215 dns_dispatchmgr_t *mgr; /*%< dispatch manager */
218 * internal task buckets. We use multiple tasks to distribute various
219 * socket events well when using separate dispatch sockets. We use the
220 * 1st task (task[0]) for internal control events.
222 isc_task_t *task[MAX_INTERNAL_TASKS];
223 isc_socket_t *socket; /*%< isc socket attached to */
224 isc_sockaddr_t local; /*%< local address */
225 in_port_t localport; /*%< local UDP port */
226 unsigned int maxrequests; /*%< max requests */
227 isc_event_t *ctlevent;
229 /*% Locked by mgr->lock. */
230 ISC_LINK(dns_dispatch_t) link;
232 /* Locked by "lock". */
233 isc_mutex_t lock; /*%< locks all below */
234 isc_sockettype_t socktype;
235 unsigned int attributes;
236 unsigned int refcount; /*%< number of users */
237 dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */
238 unsigned int shutting_down : 1,
242 recv_pending : 1; /*%< is a recv() pending? */
243 isc_result_t shutdown_why;
244 ISC_LIST(dispsocket_t) activesockets;
245 ISC_LIST(dispsocket_t) inactivesockets;
246 unsigned int nsockets;
247 unsigned int requests; /*%< how many requests we have */
248 unsigned int tcpbuffers; /*%< allocated buffers */
249 dns_tcpmsg_t tcpmsg; /*%< for tcp streams */
251 arc4ctx_t arc4ctx; /*%< for QID/UDP port num */
252 dispportlist_t *port_table; /*%< hold ports 'owned' by us */
253 isc_mempool_t *portpool; /*%< port table entries */
256 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
257 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
259 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
260 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
262 #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
263 #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
265 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
266 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
268 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
269 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
271 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
272 (disp)->qid : (disp)->mgr->qid
273 #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
274 (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
277 * Locking a query port buffer is a bit tricky. We access the buffer without
278 * locking until qid is created. Technically, there is a possibility of race
279 * between the creation of qid and access to the port buffer; in practice,
280 * however, this should be safe because qid isn't created until the first
281 * dispatch is created and there should be no contending situation until then.
283 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
284 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
289 static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
290 dns_messageid_t, in_port_t, unsigned int);
291 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
292 static void destroy_disp(isc_task_t *task, isc_event_t *event);
293 static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
294 static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
295 static void udp_exrecv(isc_task_t *, isc_event_t *);
296 static void udp_shrecv(isc_task_t *, isc_event_t *);
297 static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
298 static void tcp_recv(isc_task_t *, isc_event_t *);
299 static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
300 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
302 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
303 static void *allocate_udp_buffer(dns_dispatch_t *disp);
304 static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
305 static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
306 static void do_cancel(dns_dispatch_t *disp);
307 static dns_dispentry_t *linear_first(dns_qid_t *disp);
308 static dns_dispentry_t *linear_next(dns_qid_t *disp,
309 dns_dispentry_t *resp);
310 static void dispatch_free(dns_dispatch_t **dispp);
311 static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
312 dns_dispatch_t *disp,
313 isc_socketmgr_t *sockmgr,
314 isc_sockaddr_t *localaddr,
315 isc_socket_t **sockp);
316 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
317 isc_socketmgr_t *sockmgr,
318 isc_taskmgr_t *taskmgr,
319 isc_sockaddr_t *localaddr,
320 unsigned int maxrequests,
321 unsigned int attributes,
322 dns_dispatch_t **dispp);
323 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
324 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
325 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
326 unsigned int increment, dns_qid_t **qidp,
327 isc_boolean_t needaddrtable);
328 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
329 static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
330 unsigned int options, isc_socket_t **sockp);
331 static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
332 isc_sockaddr_t *sockaddrp);
334 #define LVL(x) ISC_LOG_DEBUG(x)
337 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
338 ISC_FORMAT_PRINTF(3, 4);
341 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
345 if (! isc_log_wouldlog(dns_lctx, level))
349 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
352 isc_log_write(dns_lctx,
353 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
354 level, "dispatchmgr %p: %s", mgr, msgbuf);
358 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
359 if (mgr->stats != NULL)
360 isc_stats_increment(mgr->stats, counter);
364 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
365 ISC_FORMAT_PRINTF(3, 4);
368 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
372 if (! isc_log_wouldlog(dns_lctx, level))
376 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
379 isc_log_write(dns_lctx,
380 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
381 level, "dispatch %p: %s", disp, msgbuf);
385 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
386 int level, const char *fmt, ...)
387 ISC_FORMAT_PRINTF(4, 5);
390 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
391 int level, const char *fmt, ...)
397 if (! isc_log_wouldlog(dns_lctx, level))
401 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
404 if (VALID_RESPONSE(resp)) {
405 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
406 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
407 DNS_LOGMODULE_DISPATCH, level,
408 "dispatch %p response %p %s: %s", disp, resp,
411 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
412 DNS_LOGMODULE_DISPATCH, level,
413 "dispatch %p req/resp %p: %s", disp, resp,
419 * ARC4 random number generator derived from OpenBSD.
420 * Only dispatch_random() and dispatch_uniformrandom() are expected
421 * to be called from general dispatch routines; the rest of them are subroutines
424 * The original copyright follows:
425 * Copyright (c) 1996, David Mazieres <dm@uun.org>
426 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
428 * Permission to use, copy, modify, and distribute this software for any
429 * purpose with or without fee is hereby granted, provided that the above
430 * copyright notice and this permission notice appear in all copies.
432 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
433 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
434 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
435 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
436 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
437 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
438 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
442 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
446 for (n = 0; n < 256; n++)
451 actx->entropy = entropy; /* don't have to attach */
456 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
461 for (n = 0; n < 256; n++) {
462 actx->i = (actx->i + 1);
463 si = actx->s[actx->i];
464 actx->j = (actx->j + si + dat[n % datlen]);
465 actx->s[actx->i] = actx->s[actx->j];
466 actx->s[actx->j] = si;
471 static inline isc_uint8_t
472 dispatch_arc4get8(arc4ctx_t *actx) {
475 actx->i = (actx->i + 1);
476 si = actx->s[actx->i];
477 actx->j = (actx->j + si);
478 sj = actx->s[actx->j];
479 actx->s[actx->i] = sj;
480 actx->s[actx->j] = si;
482 return (actx->s[(si + sj) & 0xff]);
485 static inline isc_uint16_t
486 dispatch_arc4get16(arc4ctx_t *actx) {
489 val = dispatch_arc4get8(actx) << 8;
490 val |= dispatch_arc4get8(actx);
496 dispatch_arc4stir(arc4ctx_t *actx) {
499 unsigned char rnd[128];
500 isc_uint32_t rnd32[32];
504 if (actx->entropy != NULL) {
506 * We accept any quality of random data to avoid blocking.
508 result = isc_entropy_getdata(actx->entropy, rnd.rnd,
509 sizeof(rnd), NULL, 0);
510 RUNTIME_CHECK(result == ISC_R_SUCCESS);
512 for (i = 0; i < 32; i++)
513 isc_random_get(&rnd.rnd32[i]);
515 dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
518 * Discard early keystream, as per recommendations in:
519 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
521 for (i = 0; i < 256; i++)
522 (void)dispatch_arc4get8(actx);
525 * Derived from OpenBSD's implementation. The rationale is not clear,
526 * but should be conservative enough in safety, and reasonably large
529 actx->count = 1600000;
533 dispatch_random(arc4ctx_t *actx) {
536 if (actx->lock != NULL)
539 actx->count -= sizeof(isc_uint16_t);
540 if (actx->count <= 0)
541 dispatch_arc4stir(actx);
542 result = dispatch_arc4get16(actx);
544 if (actx->lock != NULL)
551 * For general purpose library, we don't have to be too strict about the
552 * quality of random values. Performance doesn't matter much, either.
553 * So we simply use the isc_random module to keep the library as small as
558 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
569 dispatch_random(arc4ctx_t *actx) {
580 dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
587 * Ensure the range of random numbers [min, 0xffff] be a multiple of
588 * upper_bound and contain at least a half of the 16 bit range.
591 if (upper_bound > 0x8000)
592 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
594 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
597 * This could theoretically loop forever but each retry has
598 * p > 0.5 (worst case, usually far better) of selecting a
599 * number inside the range we need, so it should rarely need
603 r = dispatch_random(actx);
608 return (r % upper_bound);
612 * Return a hash of the destination and message id.
615 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
620 ret = isc_sockaddr_hash(dest, ISC_TRUE);
621 ret ^= (id << 16) | port;
622 ret %= qid->qid_nbuckets;
624 INSIST(ret < qid->qid_nbuckets);
630 * Find the first entry in 'qid'. Returns NULL if there are no entries.
632 static dns_dispentry_t *
633 linear_first(dns_qid_t *qid) {
634 dns_dispentry_t *ret;
639 while (bucket < qid->qid_nbuckets) {
640 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
650 * Find the next entry after 'resp' in 'qid'. Return NULL if there are
653 static dns_dispentry_t *
654 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
655 dns_dispentry_t *ret;
658 ret = ISC_LIST_NEXT(resp, link);
662 bucket = resp->bucket;
664 while (bucket < qid->qid_nbuckets) {
665 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
675 * The dispatch must be locked.
678 destroy_disp_ok(dns_dispatch_t *disp)
680 if (disp->refcount != 0)
683 if (disp->recv_pending != 0)
686 if (!ISC_LIST_EMPTY(disp->activesockets))
689 if (disp->shutting_down == 0)
696 * Called when refcount reaches 0 (and safe to destroy).
698 * The dispatcher must not be locked.
699 * The manager must be locked.
702 destroy_disp(isc_task_t *task, isc_event_t *event) {
703 dns_dispatch_t *disp;
704 dns_dispatchmgr_t *mgr;
705 isc_boolean_t killmgr;
706 dispsocket_t *dispsocket;
709 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
713 disp = event->ev_arg;
717 ISC_LIST_UNLINK(mgr->list, disp, link);
719 dispatch_log(disp, LVL(90),
720 "shutting down; detaching from sock %p, task %p",
721 disp->socket, disp->task[0]); /* XXXX */
723 if (disp->socket != NULL)
724 isc_socket_detach(&disp->socket);
725 while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
726 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
727 destroy_dispsocket(disp, &dispsocket);
729 for (i = 0; i < disp->ntasks; i++)
730 isc_task_detach(&disp->task[i]);
731 isc_event_free(&event);
733 dispatch_free(&disp);
735 killmgr = destroy_mgr_ok(mgr);
742 * Manipulate port table per dispatch: find an entry for a given port number,
743 * create a new entry, and decrement a given entry with possible clean-up.
745 static dispportentry_t *
746 port_search(dns_dispatch_t *disp, in_port_t port) {
747 dispportentry_t *portentry;
749 REQUIRE(disp->port_table != NULL);
751 portentry = ISC_LIST_HEAD(disp->port_table[port %
752 DNS_DISPATCH_PORTTABLESIZE]);
753 while (portentry != NULL) {
754 if (portentry->port == port)
756 portentry = ISC_LIST_NEXT(portentry, link);
762 static dispportentry_t *
763 new_portentry(dns_dispatch_t *disp, in_port_t port) {
764 dispportentry_t *portentry;
766 REQUIRE(disp->port_table != NULL);
768 portentry = isc_mempool_get(disp->portpool);
769 if (portentry == NULL)
772 portentry->port = port;
774 ISC_LINK_INIT(portentry, link);
775 ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
782 * The caller must not hold the qid->lock.
785 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
786 dispportentry_t *portentry = *portentryp;
789 REQUIRE(disp->port_table != NULL);
790 REQUIRE(portentry != NULL && portentry->refs > 0);
795 if (portentry->refs == 0) {
796 ISC_LIST_UNLINK(disp->port_table[portentry->port %
797 DNS_DISPATCH_PORTTABLESIZE],
799 isc_mempool_put(disp->portpool, portentry);
807 * Find a dispsocket for socket address 'dest', and port number 'port'.
808 * Return NULL if no such entry exists.
810 static dispsocket_t *
811 socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
814 dispsocket_t *dispsock;
816 REQUIRE(bucket < qid->qid_nbuckets);
818 dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
820 while (dispsock != NULL) {
821 if (dispsock->portentry != NULL &&
822 dispsock->portentry->port == port &&
823 isc_sockaddr_equal(dest, &dispsock->host))
825 dispsock = ISC_LIST_NEXT(dispsock, blink);
832 * Make a new socket for a single dispatch with a random port number.
833 * The caller must hold the disp->lock and qid->lock.
836 get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
837 isc_socketmgr_t *sockmgr, dns_qid_t *qid,
838 dispsocket_t **dispsockp, in_port_t *portp)
842 dns_dispatchmgr_t *mgr = disp->mgr;
843 isc_socket_t *sock = NULL;
844 isc_result_t result = ISC_R_FAILURE;
846 isc_sockaddr_t localaddr;
847 unsigned int bucket = 0;
848 dispsocket_t *dispsock;
851 unsigned int bindoptions;
852 dispportentry_t *portentry = NULL;
854 if (isc_sockaddr_pf(&disp->local) == AF_INET) {
855 nports = disp->mgr->nv4ports;
856 ports = disp->mgr->v4ports;
858 nports = disp->mgr->nv6ports;
859 ports = disp->mgr->v6ports;
862 return (ISC_R_ADDRNOTAVAIL);
864 dispsock = ISC_LIST_HEAD(disp->inactivesockets);
865 if (dispsock != NULL) {
866 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
867 sock = dispsock->socket;
868 dispsock->socket = NULL;
870 dispsock = isc_mempool_get(mgr->spool);
871 if (dispsock == NULL)
872 return (ISC_R_NOMEMORY);
875 dispsock->socket = NULL;
876 dispsock->disp = disp;
877 dispsock->resp = NULL;
878 dispsock->portentry = NULL;
880 dispsock->task = NULL;
881 isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
882 ISC_LINK_INIT(dispsock, link);
883 ISC_LINK_INIT(dispsock, blink);
884 dispsock->magic = DISPSOCK_MAGIC;
888 * Pick up a random UDP port and open a new socket with it. Avoid
889 * choosing ports that share the same destination because it will be
890 * very likely to fail in bind(2) or connect(2).
892 localaddr = disp->local;
893 for (i = 0; i < 64; i++) {
894 port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
896 isc_sockaddr_setport(&localaddr, port);
898 bucket = dns_hash(qid, dest, 0, port);
899 if (socket_search(qid, dest, port, bucket) != NULL)
902 portentry = port_search(disp, port);
903 if (portentry != NULL)
904 bindoptions |= ISC_SOCKET_REUSEADDRESS;
905 result = open_socket(sockmgr, &localaddr, bindoptions, &sock);
906 if (result == ISC_R_SUCCESS) {
907 if (portentry == NULL) {
908 portentry = new_portentry(disp, port);
909 if (portentry == NULL) {
910 result = ISC_R_NOMEMORY;
916 } else if (result == ISC_R_NOPERM) {
917 char buf[ISC_SOCKADDR_FORMATSIZE];
918 isc_sockaddr_format(&localaddr, buf, sizeof(buf));
919 dispatch_log(disp, ISC_LOG_WARNING,
920 "open_socket(%s) -> %s: continuing",
921 buf, isc_result_totext(result));
922 } else if (result != ISC_R_ADDRINUSE)
926 if (result == ISC_R_SUCCESS) {
927 dispsock->socket = sock;
928 dispsock->host = *dest;
929 dispsock->portentry = portentry;
930 dispsock->bucket = bucket;
931 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
932 *dispsockp = dispsock;
936 * We could keep it in the inactive list, but since this should
937 * be an exceptional case and might be resource shortage, we'd
941 isc_socket_detach(&sock);
942 destroy_dispsocket(disp, &dispsock);
949 * Destroy a dedicated dispatch socket.
952 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
953 dispsocket_t *dispsock;
957 * The dispatch must be locked.
960 REQUIRE(dispsockp != NULL && *dispsockp != NULL);
961 dispsock = *dispsockp;
962 REQUIRE(!ISC_LINK_LINKED(dispsock, link));
966 if (dispsock->portentry != NULL)
967 deref_portentry(disp, &dispsock->portentry);
968 if (dispsock->socket != NULL)
969 isc_socket_detach(&dispsock->socket);
970 if (ISC_LINK_LINKED(dispsock, blink)) {
973 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
977 if (dispsock->task != NULL)
978 isc_task_detach(&dispsock->task);
979 isc_mempool_put(disp->mgr->spool, dispsock);
985 * Deactivate a dedicated dispatch socket. Move it to the inactive list for
986 * future reuse unless the total number of sockets are exceeding the maximum.
989 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
994 * The dispatch must be locked.
996 ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
997 if (dispsock->resp != NULL) {
998 INSIST(dispsock->resp->dispsocket == dispsock);
999 dispsock->resp->dispsocket = NULL;
1002 INSIST(dispsock->portentry != NULL);
1003 deref_portentry(disp, &dispsock->portentry);
1006 if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
1007 destroy_dispsocket(disp, &dispsock);
1009 result = isc_socket_close(dispsock->socket);
1011 qid = DNS_QID(disp);
1013 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1017 if (result == ISC_R_SUCCESS)
1018 ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
1021 * If the underlying system does not allow this
1022 * optimization, destroy this temporary structure (and
1023 * create a new one for a new transaction).
1025 INSIST(result == ISC_R_NOTIMPLEMENTED);
1026 destroy_dispsocket(disp, &dispsock);
1030 /* This kind of optimization isn't necessary for normal use */
1034 destroy_dispsocket(disp, &dispsock);
1039 * Find an entry for query ID 'id', socket address 'dest', and port number
1041 * Return NULL if no such entry exists.
1043 static dns_dispentry_t *
1044 entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
1045 in_port_t port, unsigned int bucket)
1047 dns_dispentry_t *res;
1049 REQUIRE(bucket < qid->qid_nbuckets);
1051 res = ISC_LIST_HEAD(qid->qid_table[bucket]);
1053 while (res != NULL) {
1054 if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1055 res->port == port) {
1058 res = ISC_LIST_NEXT(res, link);
1065 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1066 INSIST(buf != NULL && len != 0);
1069 switch (disp->socktype) {
1070 case isc_sockettype_tcp:
1071 INSIST(disp->tcpbuffers > 0);
1073 isc_mem_put(disp->mgr->mctx, buf, len);
1075 case isc_sockettype_udp:
1076 LOCK(&disp->mgr->buffer_lock);
1077 INSIST(disp->mgr->buffers > 0);
1078 INSIST(len == disp->mgr->buffersize);
1079 disp->mgr->buffers--;
1080 isc_mempool_put(disp->mgr->bpool, buf);
1081 UNLOCK(&disp->mgr->buffer_lock);
1090 allocate_udp_buffer(dns_dispatch_t *disp) {
1093 LOCK(&disp->mgr->buffer_lock);
1094 temp = isc_mempool_get(disp->mgr->bpool);
1097 disp->mgr->buffers++;
1098 UNLOCK(&disp->mgr->buffer_lock);
1104 free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1105 if (disp->failsafe_ev == ev) {
1106 INSIST(disp->shutdown_out == 1);
1107 disp->shutdown_out = 0;
1112 isc_mempool_put(disp->mgr->epool, ev);
1115 static inline dns_dispatchevent_t *
1116 allocate_event(dns_dispatch_t *disp) {
1117 dns_dispatchevent_t *ev;
1119 ev = isc_mempool_get(disp->mgr->epool);
1122 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1123 NULL, NULL, NULL, NULL, NULL);
1129 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1130 dispsocket_t *dispsock = ev->ev_arg;
1134 REQUIRE(VALID_DISPSOCK(dispsock));
1135 udp_recv(ev, dispsock->disp, dispsock);
1139 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1140 dns_dispatch_t *disp = ev->ev_arg;
1144 REQUIRE(VALID_DISPATCH(disp));
1145 udp_recv(ev, disp, NULL);
1151 * If I/O result == CANCELED or error, free the buffer.
1153 * If query, free the buffer, restart.
1156 * Allocate event, fill in details.
1157 * If cannot allocate, free buffer, restart.
1158 * find target. If not found, free buffer, restart.
1159 * if event queue is not empty, queue. else, send.
1163 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1164 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1167 isc_buffer_t source;
1169 dns_dispentry_t *resp = NULL;
1170 dns_dispatchevent_t *rev;
1171 unsigned int bucket;
1172 isc_boolean_t killit;
1173 isc_boolean_t queue_response;
1174 dns_dispatchmgr_t *mgr;
1176 isc_netaddr_t netaddr;
1179 isc_boolean_t qidlocked = ISC_FALSE;
1186 dispatch_log(disp, LVL(90),
1187 "got packet: requests %d, buffers %d, recvs %d",
1188 disp->requests, disp->mgr->buffers, disp->recv_pending);
1190 if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1192 * Unless the receive event was imported from a listening
1193 * interface, in which case the event type is
1194 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1196 INSIST(disp->recv_pending != 0);
1197 disp->recv_pending = 0;
1200 if (dispsock != NULL &&
1201 (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1203 * dispsock->resp can be NULL if this transaction was canceled
1204 * just after receiving a response. Since this socket is
1205 * exclusively used and there should be at most one receive
1206 * event the canceled event should have been no effect. So
1207 * we can (and should) deactivate the socket right now.
1209 deactivate_dispsocket(disp, dispsock);
1213 if (disp->shutting_down) {
1215 * This dispatcher is shutting down.
1217 free_buffer(disp, ev->region.base, ev->region.length);
1219 isc_event_free(&ev_in);
1222 killit = destroy_disp_ok(disp);
1223 UNLOCK(&disp->lock);
1225 isc_task_send(disp->task[0], &disp->ctlevent);
1230 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1231 if (dispsock != NULL) {
1232 resp = dispsock->resp;
1234 if (ev->result != ISC_R_SUCCESS) {
1236 * This is most likely a network error on a
1237 * connected socket. It makes no sense to
1238 * check the address or parse the packet, but it
1239 * will help to return the error to the caller.
1244 free_buffer(disp, ev->region.base, ev->region.length);
1246 UNLOCK(&disp->lock);
1247 isc_event_free(&ev_in);
1250 } else if (ev->result != ISC_R_SUCCESS) {
1251 free_buffer(disp, ev->region.base, ev->region.length);
1253 if (ev->result != ISC_R_CANCELED)
1254 dispatch_log(disp, ISC_LOG_ERROR,
1255 "odd socket result in udp_recv(): %s",
1256 isc_result_totext(ev->result));
1258 UNLOCK(&disp->lock);
1259 isc_event_free(&ev_in);
1264 * If this is from a blackholed address, drop it.
1266 isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1267 if (disp->mgr->blackhole != NULL &&
1268 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1269 NULL, &match, NULL) == ISC_R_SUCCESS &&
1272 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1273 char netaddrstr[ISC_NETADDR_FORMATSIZE];
1274 isc_netaddr_format(&netaddr, netaddrstr,
1275 sizeof(netaddrstr));
1276 dispatch_log(disp, LVL(10),
1277 "blackholed packet from %s",
1280 free_buffer(disp, ev->region.base, ev->region.length);
1285 * Peek into the buffer to see what we can see.
1287 isc_buffer_init(&source, ev->region.base, ev->region.length);
1288 isc_buffer_add(&source, ev->n);
1289 dres = dns_message_peekheader(&source, &id, &flags);
1290 if (dres != ISC_R_SUCCESS) {
1291 free_buffer(disp, ev->region.base, ev->region.length);
1292 dispatch_log(disp, LVL(10), "got garbage packet");
1296 dispatch_log(disp, LVL(92),
1297 "got valid DNS message header, /QR %c, id %u",
1298 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1301 * Look at flags. If query, drop it. If response,
1302 * look to see where it goes.
1304 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1306 free_buffer(disp, ev->region.base, ev->region.length);
1311 * Search for the corresponding response. If we are using an exclusive
1312 * socket, we've already identified it and we can skip the search; but
1313 * the ID and the address must match the expected ones.
1316 bucket = dns_hash(qid, &ev->address, id, disp->localport);
1318 qidlocked = ISC_TRUE;
1319 resp = entry_search(qid, &ev->address, id, disp->localport,
1321 dispatch_log(disp, LVL(90),
1322 "search for response in bucket %d: %s",
1323 bucket, (resp == NULL ? "not found" : "found"));
1326 inc_stats(mgr, dns_resstatscounter_mismatch);
1327 free_buffer(disp, ev->region.base, ev->region.length);
1330 } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1332 dispatch_log(disp, LVL(90),
1333 "response to an exclusive socket doesn't match");
1334 inc_stats(mgr, dns_resstatscounter_mismatch);
1335 free_buffer(disp, ev->region.base, ev->region.length);
1340 * Now that we have the original dispatch the query was sent
1341 * from check that the address and port the response was
1342 * sent to make sense.
1344 if (disp != resp->disp) {
1349 * Check that the socket types and ports match.
1351 if (disp->socktype != resp->disp->socktype ||
1352 isc_sockaddr_getport(&disp->local) !=
1353 isc_sockaddr_getport(&resp->disp->local)) {
1354 free_buffer(disp, ev->region.base, ev->region.length);
1359 * If both dispatches are bound to an address then fail as
1360 * the addresses can't be equal (enforced by the IP stack).
1362 * Note under Linux a packet can be sent out via IPv4 socket
1363 * and the response be received via a IPv6 socket.
1365 * Requests sent out via IPv6 should always come back in
1368 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1369 isc_sockaddr_pf(&disp->local) != PF_INET6) {
1370 free_buffer(disp, ev->region.base, ev->region.length);
1373 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1374 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1375 if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1376 !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1377 free_buffer(disp, ev->region.base, ev->region.length);
1383 queue_response = resp->item_out;
1384 rev = allocate_event(resp->disp);
1386 free_buffer(disp, ev->region.base, ev->region.length);
1391 * At this point, rev contains the event we want to fill in, and
1392 * resp contains the information on the place to send it to.
1393 * Send the event off.
1395 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1396 isc_buffer_add(&rev->buffer, ev->n);
1397 rev->result = ev->result;
1399 rev->addr = ev->address;
1400 rev->pktinfo = ev->pktinfo;
1401 rev->attributes = ev->attributes;
1402 if (queue_response) {
1403 ISC_LIST_APPEND(resp->items, rev, ev_link);
1405 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1407 resp->action, resp->arg, resp, NULL, NULL);
1408 request_log(disp, resp, LVL(90),
1409 "[a] Sent event %p buffer %p len %d to task %p",
1410 rev, rev->buffer.base, rev->buffer.length,
1412 resp->item_out = ISC_TRUE;
1413 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1420 * Restart recv() to get the next packet.
1423 result = startrecv(disp, dispsock);
1424 if (result != ISC_R_SUCCESS && dispsock != NULL) {
1426 * XXX: wired. There seems to be no recovery process other than
1427 * deactivate this socket anyway (since we cannot start
1428 * receiving, we won't be able to receive a cancel event
1431 deactivate_dispsocket(disp, dispsock);
1433 UNLOCK(&disp->lock);
1435 isc_event_free(&ev_in);
1441 * If I/O result == CANCELED, EOF, or error, notify everyone as the
1442 * various queues drain.
1444 * If query, restart.
1447 * Allocate event, fill in details.
1448 * If cannot allocate, restart.
1449 * find target. If not found, restart.
1450 * if event queue is not empty, queue. else, send.
1454 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1455 dns_dispatch_t *disp = ev_in->ev_arg;
1456 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1460 dns_dispentry_t *resp;
1461 dns_dispatchevent_t *rev;
1462 unsigned int bucket;
1463 isc_boolean_t killit;
1464 isc_boolean_t queue_response;
1467 char buf[ISC_SOCKADDR_FORMATSIZE];
1471 REQUIRE(VALID_DISPATCH(disp));
1475 dispatch_log(disp, LVL(90),
1476 "got TCP packet: requests %d, buffers %d, recvs %d",
1477 disp->requests, disp->tcpbuffers, disp->recv_pending);
1481 INSIST(disp->recv_pending != 0);
1482 disp->recv_pending = 0;
1484 if (disp->refcount == 0) {
1486 * This dispatcher is shutting down. Force cancelation.
1488 tcpmsg->result = ISC_R_CANCELED;
1491 if (tcpmsg->result != ISC_R_SUCCESS) {
1492 switch (tcpmsg->result) {
1493 case ISC_R_CANCELED:
1497 dispatch_log(disp, LVL(90), "shutting down on EOF");
1501 case ISC_R_CONNECTIONRESET:
1502 level = ISC_LOG_INFO;
1506 level = ISC_LOG_ERROR;
1508 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1509 dispatch_log(disp, level, "shutting down due to TCP "
1510 "receive error: %s: %s", buf,
1511 isc_result_totext(tcpmsg->result));
1517 * The event is statically allocated in the tcpmsg
1518 * structure, and destroy_disp() frees the tcpmsg, so we must
1519 * free the event *before* calling destroy_disp().
1521 isc_event_free(&ev_in);
1523 disp->shutting_down = 1;
1524 disp->shutdown_why = tcpmsg->result;
1527 * If the recv() was canceled pass the word on.
1529 killit = destroy_disp_ok(disp);
1530 UNLOCK(&disp->lock);
1532 isc_task_send(disp->task[0], &disp->ctlevent);
1536 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1538 tcpmsg->buffer.length, tcpmsg->buffer.base);
1541 * Peek into the buffer to see what we can see.
1543 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1544 if (dres != ISC_R_SUCCESS) {
1545 dispatch_log(disp, LVL(10), "got garbage packet");
1549 dispatch_log(disp, LVL(92),
1550 "got valid DNS message header, /QR %c, id %u",
1551 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1554 * Allocate an event to send to the query or response client, and
1555 * allocate a new buffer for our use.
1559 * Look at flags. If query, drop it. If response,
1560 * look to see where it goes.
1562 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1572 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1574 resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1575 dispatch_log(disp, LVL(90),
1576 "search for response in bucket %d: %s",
1577 bucket, (resp == NULL ? "not found" : "found"));
1581 queue_response = resp->item_out;
1582 rev = allocate_event(disp);
1587 * At this point, rev contains the event we want to fill in, and
1588 * resp contains the information on the place to send it to.
1589 * Send the event off.
1591 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1593 rev->result = ISC_R_SUCCESS;
1595 rev->addr = tcpmsg->address;
1596 if (queue_response) {
1597 ISC_LIST_APPEND(resp->items, rev, ev_link);
1599 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1600 resp->action, resp->arg, resp, NULL, NULL);
1601 request_log(disp, resp, LVL(90),
1602 "[b] Sent event %p buffer %p len %d to task %p",
1603 rev, rev->buffer.base, rev->buffer.length,
1605 resp->item_out = ISC_TRUE;
1606 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1612 * Restart recv() to get the next packet.
1615 (void)startrecv(disp, NULL);
1617 UNLOCK(&disp->lock);
1619 isc_event_free(&ev_in);
1623 * disp must be locked.
1626 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1628 isc_region_t region;
1629 isc_socket_t *socket;
1631 if (disp->shutting_down == 1)
1632 return (ISC_R_SUCCESS);
1634 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1635 return (ISC_R_SUCCESS);
1637 if (disp->recv_pending != 0 && dispsock == NULL)
1638 return (ISC_R_SUCCESS);
1640 if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1641 return (ISC_R_NOMEMORY);
1643 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1645 return (ISC_R_SUCCESS);
1647 if (dispsock != NULL)
1648 socket = dispsock->socket;
1650 socket = disp->socket;
1651 INSIST(socket != NULL);
1653 switch (disp->socktype) {
1655 * UDP reads are always maximal.
1657 case isc_sockettype_udp:
1658 region.length = disp->mgr->buffersize;
1659 region.base = allocate_udp_buffer(disp);
1660 if (region.base == NULL)
1661 return (ISC_R_NOMEMORY);
1662 if (dispsock != NULL) {
1663 res = isc_socket_recv(socket, ®ion, 1,
1664 dispsock->task, udp_exrecv,
1666 if (res != ISC_R_SUCCESS) {
1667 free_buffer(disp, region.base, region.length);
1671 res = isc_socket_recv(socket, ®ion, 1,
1672 disp->task[0], udp_shrecv, disp);
1673 if (res != ISC_R_SUCCESS) {
1674 free_buffer(disp, region.base, region.length);
1675 disp->shutdown_why = res;
1676 disp->shutting_down = 1;
1678 return (ISC_R_SUCCESS); /* recover by cancel */
1680 INSIST(disp->recv_pending == 0);
1681 disp->recv_pending = 1;
1685 case isc_sockettype_tcp:
1686 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1688 if (res != ISC_R_SUCCESS) {
1689 disp->shutdown_why = res;
1690 disp->shutting_down = 1;
1692 return (ISC_R_SUCCESS); /* recover by cancel */
1694 INSIST(disp->recv_pending == 0);
1695 disp->recv_pending = 1;
1702 return (ISC_R_SUCCESS);
1706 * Mgr must be locked when calling this function.
1708 static isc_boolean_t
1709 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1710 mgr_log(mgr, LVL(90),
1711 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1712 "epool=%d, rpool=%d, dpool=%d",
1713 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1714 isc_mempool_getallocated(mgr->epool),
1715 isc_mempool_getallocated(mgr->rpool),
1716 isc_mempool_getallocated(mgr->dpool));
1717 if (!MGR_IS_SHUTTINGDOWN(mgr))
1719 if (!ISC_LIST_EMPTY(mgr->list))
1721 if (isc_mempool_getallocated(mgr->epool) != 0)
1723 if (isc_mempool_getallocated(mgr->rpool) != 0)
1725 if (isc_mempool_getallocated(mgr->dpool) != 0)
1732 * Mgr must be unlocked when calling this function.
1735 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1737 dns_dispatchmgr_t *mgr;
1746 DESTROYLOCK(&mgr->lock);
1749 DESTROYLOCK(&mgr->arc4_lock);
1751 isc_mempool_destroy(&mgr->epool);
1752 isc_mempool_destroy(&mgr->rpool);
1753 isc_mempool_destroy(&mgr->dpool);
1754 if (mgr->bpool != NULL)
1755 isc_mempool_destroy(&mgr->bpool);
1756 if (mgr->spool != NULL)
1757 isc_mempool_destroy(&mgr->spool);
1759 DESTROYLOCK(&mgr->pool_lock);
1762 if (mgr->entropy != NULL)
1763 isc_entropy_detach(&mgr->entropy);
1765 if (mgr->qid != NULL)
1766 qid_destroy(mctx, &mgr->qid);
1768 DESTROYLOCK(&mgr->buffer_lock);
1770 if (mgr->blackhole != NULL)
1771 dns_acl_detach(&mgr->blackhole);
1773 if (mgr->stats != NULL)
1774 isc_stats_detach(&mgr->stats);
1776 if (mgr->v4ports != NULL) {
1777 isc_mem_put(mctx, mgr->v4ports,
1778 mgr->nv4ports * sizeof(in_port_t));
1780 if (mgr->v6ports != NULL) {
1781 isc_mem_put(mctx, mgr->v6ports,
1782 mgr->nv6ports * sizeof(in_port_t));
1784 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1785 isc_mem_detach(&mctx);
1789 open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1790 unsigned int options, isc_socket_t **sockp)
1793 isc_result_t result;
1797 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1798 isc_sockettype_udp, &sock);
1799 if (result != ISC_R_SUCCESS)
1801 isc_socket_setname(sock, "dispatcher", NULL);
1804 result = isc_socket_open(sock);
1805 if (result != ISC_R_SUCCESS)
1812 #ifndef ISC_ALLOW_MAPPED
1813 isc_socket_ipv6only(sock, ISC_TRUE);
1815 result = isc_socket_bind(sock, local, options);
1816 if (result != ISC_R_SUCCESS) {
1818 isc_socket_detach(&sock);
1821 isc_socket_close(sock);
1830 return (ISC_R_SUCCESS);
1834 * Create a temporary port list to set the initial default set of dispatch
1835 * ports: [1024, 65535]. This is almost meaningless as the application will
1836 * normally set the ports explicitly, but is provided to fill some minor corner
1840 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1841 isc_result_t result;
1843 result = isc_portset_create(mctx, portsetp);
1844 if (result != ISC_R_SUCCESS)
1846 isc_portset_addrange(*portsetp, 1024, 65535);
1848 return (ISC_R_SUCCESS);
1856 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1857 dns_dispatchmgr_t **mgrp)
1859 dns_dispatchmgr_t *mgr;
1860 isc_result_t result;
1861 isc_portset_t *v4portset = NULL;
1862 isc_portset_t *v6portset = NULL;
1864 REQUIRE(mctx != NULL);
1865 REQUIRE(mgrp != NULL && *mgrp == NULL);
1867 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1869 return (ISC_R_NOMEMORY);
1872 isc_mem_attach(mctx, &mgr->mctx);
1874 mgr->blackhole = NULL;
1877 result = isc_mutex_init(&mgr->lock);
1878 if (result != ISC_R_SUCCESS)
1881 result = isc_mutex_init(&mgr->arc4_lock);
1882 if (result != ISC_R_SUCCESS)
1885 result = isc_mutex_init(&mgr->buffer_lock);
1886 if (result != ISC_R_SUCCESS)
1887 goto kill_arc4_lock;
1889 result = isc_mutex_init(&mgr->pool_lock);
1890 if (result != ISC_R_SUCCESS)
1891 goto kill_buffer_lock;
1894 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1895 &mgr->epool) != ISC_R_SUCCESS) {
1896 result = ISC_R_NOMEMORY;
1897 goto kill_pool_lock;
1901 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1902 &mgr->rpool) != ISC_R_SUCCESS) {
1903 result = ISC_R_NOMEMORY;
1908 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1909 &mgr->dpool) != ISC_R_SUCCESS) {
1910 result = ISC_R_NOMEMORY;
1914 isc_mempool_setname(mgr->epool, "dispmgr_epool");
1915 isc_mempool_setfreemax(mgr->epool, 1024);
1916 isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
1918 isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1919 isc_mempool_setfreemax(mgr->rpool, 1024);
1920 isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
1922 isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1923 isc_mempool_setfreemax(mgr->dpool, 1024);
1924 isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
1927 mgr->buffersize = 0;
1928 mgr->maxbuffers = 0;
1931 mgr->entropy = NULL;
1934 ISC_LIST_INIT(mgr->list);
1935 mgr->v4ports = NULL;
1936 mgr->v6ports = NULL;
1939 mgr->magic = DNS_DISPATCHMGR_MAGIC;
1941 result = create_default_portset(mctx, &v4portset);
1942 if (result == ISC_R_SUCCESS) {
1943 result = create_default_portset(mctx, &v6portset);
1944 if (result == ISC_R_SUCCESS) {
1945 result = dns_dispatchmgr_setavailports(mgr,
1950 if (v4portset != NULL)
1951 isc_portset_destroy(mctx, &v4portset);
1952 if (v6portset != NULL)
1953 isc_portset_destroy(mctx, &v6portset);
1954 if (result != ISC_R_SUCCESS)
1958 if (entropy != NULL)
1959 isc_entropy_attach(entropy, &mgr->entropy);
1964 dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
1967 return (ISC_R_SUCCESS);
1970 isc_mempool_destroy(&mgr->dpool);
1972 isc_mempool_destroy(&mgr->rpool);
1974 isc_mempool_destroy(&mgr->epool);
1976 DESTROYLOCK(&mgr->pool_lock);
1978 DESTROYLOCK(&mgr->buffer_lock);
1980 DESTROYLOCK(&mgr->arc4_lock);
1982 DESTROYLOCK(&mgr->lock);
1984 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1985 isc_mem_detach(&mctx);
1991 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1992 REQUIRE(VALID_DISPATCHMGR(mgr));
1993 if (mgr->blackhole != NULL)
1994 dns_acl_detach(&mgr->blackhole);
1995 dns_acl_attach(blackhole, &mgr->blackhole);
1999 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
2000 REQUIRE(VALID_DISPATCHMGR(mgr));
2001 return (mgr->blackhole);
2005 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
2006 dns_portlist_t *portlist)
2008 REQUIRE(VALID_DISPATCHMGR(mgr));
2011 /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
2016 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
2017 REQUIRE(VALID_DISPATCHMGR(mgr));
2018 return (NULL); /* this function is deprecated */
2022 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
2023 isc_portset_t *v6portset)
2025 in_port_t *v4ports, *v6ports, p;
2026 unsigned int nv4ports, nv6ports, i4, i6;
2028 REQUIRE(VALID_DISPATCHMGR(mgr));
2030 nv4ports = isc_portset_nports(v4portset);
2031 nv6ports = isc_portset_nports(v6portset);
2034 if (nv4ports != 0) {
2035 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
2036 if (v4ports == NULL)
2037 return (ISC_R_NOMEMORY);
2040 if (nv6ports != 0) {
2041 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
2042 if (v6ports == NULL) {
2043 if (v4ports != NULL) {
2044 isc_mem_put(mgr->mctx, v4ports,
2046 isc_portset_nports(v4portset));
2048 return (ISC_R_NOMEMORY);
2056 if (isc_portset_isset(v4portset, p)) {
2057 INSIST(i4 < nv4ports);
2060 if (isc_portset_isset(v6portset, p)) {
2061 INSIST(i6 < nv6ports);
2064 } while (p++ < 65535);
2065 INSIST(i4 == nv4ports && i6 == nv6ports);
2068 if (mgr->v4ports != NULL) {
2069 isc_mem_put(mgr->mctx, mgr->v4ports,
2070 mgr->nv4ports * sizeof(in_port_t));
2072 mgr->v4ports = v4ports;
2073 mgr->nv4ports = nv4ports;
2075 if (mgr->v6ports != NULL) {
2076 isc_mem_put(mgr->mctx, mgr->v6ports,
2077 mgr->nv6ports * sizeof(in_port_t));
2079 mgr->v6ports = v6ports;
2080 mgr->nv6ports = nv6ports;
2083 return (ISC_R_SUCCESS);
2087 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2088 unsigned int buffersize, unsigned int maxbuffers,
2089 unsigned int maxrequests, unsigned int buckets,
2090 unsigned int increment)
2092 isc_result_t result;
2094 REQUIRE(VALID_DISPATCHMGR(mgr));
2095 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2096 REQUIRE(maxbuffers > 0);
2097 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2098 REQUIRE(increment > buckets);
2101 * Keep some number of items around. This should be a config
2102 * option. For now, keep 8, but later keep at least two even
2103 * if the caller wants less. This allows us to ensure certain
2104 * things, like an event can be "freed" and the next allocation
2105 * will always succeed.
2107 * Note that if limits are placed on anything here, we use one
2108 * event internally, so the actual limit should be "wanted + 1."
2116 LOCK(&mgr->buffer_lock);
2118 /* Create or adjust buffer pool */
2119 if (mgr->bpool != NULL) {
2121 * We only increase the maxbuffers to avoid accidental buffer
2122 * shortage. Ideally we'd separate the manager-wide maximum
2123 * from per-dispatch limits and respect the latter within the
2124 * global limit. But at this moment that's deemed to be
2125 * overkilling and isn't worth additional implementation
2128 if (maxbuffers > mgr->maxbuffers) {
2129 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2130 mgr->maxbuffers = maxbuffers;
2133 result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2134 if (result != ISC_R_SUCCESS) {
2135 UNLOCK(&mgr->buffer_lock);
2138 isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2139 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2140 isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
2143 /* Create or adjust socket pool */
2144 if (mgr->spool != NULL) {
2145 isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2146 UNLOCK(&mgr->buffer_lock);
2147 return (ISC_R_SUCCESS);
2149 result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2151 if (result != ISC_R_SUCCESS) {
2152 UNLOCK(&mgr->buffer_lock);
2155 isc_mempool_setname(mgr->spool, "dispmgr_spool");
2156 isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2157 isc_mempool_associatelock(mgr->spool, &mgr->pool_lock);
2159 result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
2160 if (result != ISC_R_SUCCESS)
2163 mgr->buffersize = buffersize;
2164 mgr->maxbuffers = maxbuffers;
2165 UNLOCK(&mgr->buffer_lock);
2166 return (ISC_R_SUCCESS);
2169 isc_mempool_destroy(&mgr->bpool);
2170 if (mgr->spool != NULL)
2171 isc_mempool_destroy(&mgr->spool);
2172 UNLOCK(&mgr->buffer_lock);
2177 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2178 dns_dispatchmgr_t *mgr;
2179 isc_boolean_t killit;
2181 REQUIRE(mgrp != NULL);
2182 REQUIRE(VALID_DISPATCHMGR(*mgrp));
2188 mgr->state |= MGR_SHUTTINGDOWN;
2190 killit = destroy_mgr_ok(mgr);
2193 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2200 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2201 REQUIRE(VALID_DISPATCHMGR(mgr));
2202 REQUIRE(ISC_LIST_EMPTY(mgr->list));
2203 REQUIRE(mgr->stats == NULL);
2205 isc_stats_attach(stats, &mgr->stats);
2209 port_cmp(const void *key, const void *ent) {
2210 in_port_t p1 = *(const in_port_t *)key;
2211 in_port_t p2 = *(const in_port_t *)ent;
2221 static isc_boolean_t
2222 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2223 isc_sockaddr_t *sockaddrp)
2225 isc_sockaddr_t sockaddr;
2226 isc_result_t result;
2227 in_port_t *ports, port;
2228 unsigned int nports;
2229 isc_boolean_t available = ISC_FALSE;
2231 REQUIRE(sock != NULL || sockaddrp != NULL);
2235 sockaddrp = &sockaddr;
2236 result = isc_socket_getsockname(sock, sockaddrp);
2237 if (result != ISC_R_SUCCESS)
2241 if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2242 ports = mgr->v4ports;
2243 nports = mgr->nv4ports;
2245 ports = mgr->v6ports;
2246 nports = mgr->nv6ports;
2251 port = isc_sockaddr_getport(sockaddrp);
2252 if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2253 available = ISC_TRUE;
2260 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2262 static isc_boolean_t
2263 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2264 isc_sockaddr_t sockaddr;
2265 isc_result_t result;
2267 REQUIRE(disp->socket != NULL);
2273 * Don't match wildcard ports unless the port is available in the
2274 * current configuration.
2276 if (isc_sockaddr_getport(addr) == 0 &&
2277 isc_sockaddr_getport(&disp->local) == 0 &&
2278 !portavailable(disp->mgr, disp->socket, NULL)) {
2283 * Check if we match the binding <address,port>.
2284 * Wildcard ports match/fail here.
2286 if (isc_sockaddr_equal(&disp->local, addr))
2288 if (isc_sockaddr_getport(addr) == 0)
2292 * Check if we match a bound wildcard port <address,port>.
2294 if (!isc_sockaddr_eqaddr(&disp->local, addr))
2296 result = isc_socket_getsockname(disp->socket, &sockaddr);
2297 if (result != ISC_R_SUCCESS)
2300 return (isc_sockaddr_equal(&sockaddr, addr));
2304 * Requires mgr be locked.
2306 * No dispatcher can be locked by this thread when calling this function.
2310 * If a matching dispatcher is found, it is locked after this function
2311 * returns, and must be unlocked by the caller.
2314 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2315 unsigned int attributes, unsigned int mask,
2316 dns_dispatch_t **dispp)
2318 dns_dispatch_t *disp;
2319 isc_result_t result;
2322 * Make certain that we will not match a private or exclusive dispatch.
2324 attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2325 mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2327 disp = ISC_LIST_HEAD(mgr->list);
2328 while (disp != NULL) {
2330 if ((disp->shutting_down == 0)
2331 && ATTRMATCH(disp->attributes, attributes, mask)
2332 && local_addr_match(disp, local))
2334 UNLOCK(&disp->lock);
2335 disp = ISC_LIST_NEXT(disp, link);
2339 result = ISC_R_NOTFOUND;
2344 result = ISC_R_SUCCESS;
2351 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2352 unsigned int increment, dns_qid_t **qidp,
2353 isc_boolean_t needsocktable)
2357 isc_result_t result;
2359 REQUIRE(VALID_DISPATCHMGR(mgr));
2360 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2361 REQUIRE(increment > buckets);
2362 REQUIRE(qidp != NULL && *qidp == NULL);
2364 qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2366 return (ISC_R_NOMEMORY);
2368 qid->qid_table = isc_mem_get(mgr->mctx,
2369 buckets * sizeof(dns_displist_t));
2370 if (qid->qid_table == NULL) {
2371 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2372 return (ISC_R_NOMEMORY);
2375 qid->sock_table = NULL;
2376 if (needsocktable) {
2377 qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2378 sizeof(dispsocketlist_t));
2379 if (qid->sock_table == NULL) {
2380 isc_mem_put(mgr->mctx, qid->qid_table,
2381 buckets * sizeof(dns_displist_t));
2382 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2383 return (ISC_R_NOMEMORY);
2387 result = isc_mutex_init(&qid->lock);
2388 if (result != ISC_R_SUCCESS) {
2389 if (qid->sock_table != NULL) {
2390 isc_mem_put(mgr->mctx, qid->sock_table,
2391 buckets * sizeof(dispsocketlist_t));
2393 isc_mem_put(mgr->mctx, qid->qid_table,
2394 buckets * sizeof(dns_displist_t));
2395 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2399 for (i = 0; i < buckets; i++) {
2400 ISC_LIST_INIT(qid->qid_table[i]);
2401 if (qid->sock_table != NULL)
2402 ISC_LIST_INIT(qid->sock_table[i]);
2405 qid->qid_nbuckets = buckets;
2406 qid->qid_increment = increment;
2407 qid->magic = QID_MAGIC;
2409 return (ISC_R_SUCCESS);
2413 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2416 REQUIRE(qidp != NULL);
2419 REQUIRE(VALID_QID(qid));
2423 isc_mem_put(mctx, qid->qid_table,
2424 qid->qid_nbuckets * sizeof(dns_displist_t));
2425 if (qid->sock_table != NULL) {
2426 isc_mem_put(mctx, qid->sock_table,
2427 qid->qid_nbuckets * sizeof(dispsocketlist_t));
2429 DESTROYLOCK(&qid->lock);
2430 isc_mem_put(mctx, qid, sizeof(*qid));
2434 * Allocate and set important limits.
2437 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2438 dns_dispatch_t **dispp)
2440 dns_dispatch_t *disp;
2441 isc_result_t result;
2443 REQUIRE(VALID_DISPATCHMGR(mgr));
2444 REQUIRE(dispp != NULL && *dispp == NULL);
2447 * Set up the dispatcher, mostly. Don't bother setting some of
2448 * the options that are controlled by tcp vs. udp, etc.
2451 disp = isc_mempool_get(mgr->dpool);
2453 return (ISC_R_NOMEMORY);
2457 disp->maxrequests = maxrequests;
2458 disp->attributes = 0;
2459 ISC_LINK_INIT(disp, link);
2461 disp->recv_pending = 0;
2462 memset(&disp->local, 0, sizeof(disp->local));
2463 disp->localport = 0;
2464 disp->shutting_down = 0;
2465 disp->shutdown_out = 0;
2466 disp->connected = 0;
2467 disp->tcpmsg_valid = 0;
2468 disp->shutdown_why = ISC_R_UNEXPECTED;
2470 disp->tcpbuffers = 0;
2472 ISC_LIST_INIT(disp->activesockets);
2473 ISC_LIST_INIT(disp->inactivesockets);
2475 dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL);
2476 disp->port_table = NULL;
2477 disp->portpool = NULL;
2479 result = isc_mutex_init(&disp->lock);
2480 if (result != ISC_R_SUCCESS)
2483 disp->failsafe_ev = allocate_event(disp);
2484 if (disp->failsafe_ev == NULL) {
2485 result = ISC_R_NOMEMORY;
2489 disp->magic = DISPATCH_MAGIC;
2492 return (ISC_R_SUCCESS);
2498 DESTROYLOCK(&disp->lock);
2500 isc_mempool_put(mgr->dpool, disp);
2507 * MUST be unlocked, and not used by anything.
2510 dispatch_free(dns_dispatch_t **dispp)
2512 dns_dispatch_t *disp;
2513 dns_dispatchmgr_t *mgr;
2516 REQUIRE(VALID_DISPATCH(*dispp));
2521 REQUIRE(VALID_DISPATCHMGR(mgr));
2523 if (disp->tcpmsg_valid) {
2524 dns_tcpmsg_invalidate(&disp->tcpmsg);
2525 disp->tcpmsg_valid = 0;
2528 INSIST(disp->tcpbuffers == 0);
2529 INSIST(disp->requests == 0);
2530 INSIST(disp->recv_pending == 0);
2531 INSIST(ISC_LIST_EMPTY(disp->activesockets));
2532 INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2534 isc_mempool_put(mgr->epool, disp->failsafe_ev);
2535 disp->failsafe_ev = NULL;
2537 if (disp->qid != NULL)
2538 qid_destroy(mgr->mctx, &disp->qid);
2540 if (disp->port_table != NULL) {
2541 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2542 INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2543 isc_mem_put(mgr->mctx, disp->port_table,
2544 sizeof(disp->port_table[0]) *
2545 DNS_DISPATCH_PORTTABLESIZE);
2548 if (disp->portpool != NULL)
2549 isc_mempool_destroy(&disp->portpool);
2552 DESTROYLOCK(&disp->lock);
2554 isc_mempool_put(mgr->dpool, disp);
2558 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2559 isc_taskmgr_t *taskmgr, unsigned int buffersize,
2560 unsigned int maxbuffers, unsigned int maxrequests,
2561 unsigned int buckets, unsigned int increment,
2562 unsigned int attributes, dns_dispatch_t **dispp)
2564 isc_result_t result;
2565 dns_dispatch_t *disp;
2570 REQUIRE(VALID_DISPATCHMGR(mgr));
2571 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2572 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2573 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2575 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */
2580 * dispatch_allocate() checks mgr for us.
2581 * qid_allocate() checks buckets and increment for us.
2584 result = dispatch_allocate(mgr, maxrequests, &disp);
2585 if (result != ISC_R_SUCCESS) {
2590 result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2591 if (result != ISC_R_SUCCESS)
2592 goto deallocate_dispatch;
2594 disp->socktype = isc_sockettype_tcp;
2595 disp->socket = NULL;
2596 isc_socket_attach(sock, &disp->socket);
2599 disp->task[0] = NULL;
2600 result = isc_task_create(taskmgr, 0, &disp->task[0]);
2601 if (result != ISC_R_SUCCESS)
2604 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2605 DNS_EVENT_DISPATCHCONTROL,
2607 sizeof(isc_event_t));
2608 if (disp->ctlevent == NULL) {
2609 result = ISC_R_NOMEMORY;
2613 isc_task_setname(disp->task[0], "tcpdispatch", disp);
2615 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2616 disp->tcpmsg_valid = 1;
2618 disp->attributes = attributes;
2621 * Append it to the dispatcher list.
2623 ISC_LIST_APPEND(mgr->list, disp, link);
2626 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2627 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2631 return (ISC_R_SUCCESS);
2637 isc_task_detach(&disp->task[0]);
2639 isc_socket_detach(&disp->socket);
2640 deallocate_dispatch:
2641 dispatch_free(&disp);
2649 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2650 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2651 unsigned int buffersize,
2652 unsigned int maxbuffers, unsigned int maxrequests,
2653 unsigned int buckets, unsigned int increment,
2654 unsigned int attributes, unsigned int mask,
2655 dns_dispatch_t **dispp)
2657 isc_result_t result;
2658 dns_dispatch_t *disp = NULL;
2660 REQUIRE(VALID_DISPATCHMGR(mgr));
2661 REQUIRE(sockmgr != NULL);
2662 REQUIRE(localaddr != NULL);
2663 REQUIRE(taskmgr != NULL);
2664 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2665 REQUIRE(maxbuffers > 0);
2666 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2667 REQUIRE(increment > buckets);
2668 REQUIRE(dispp != NULL && *dispp == NULL);
2669 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2671 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2672 maxrequests, buckets, increment);
2673 if (result != ISC_R_SUCCESS)
2678 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2679 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2684 * See if we have a dispatcher that matches.
2686 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2687 if (result == ISC_R_SUCCESS) {
2690 if (disp->maxrequests < maxrequests)
2691 disp->maxrequests = maxrequests;
2693 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
2694 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2696 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2697 if (disp->recv_pending != 0)
2698 isc_socket_cancel(disp->socket, disp->task[0],
2699 ISC_SOCKCANCEL_RECV);
2702 UNLOCK(&disp->lock);
2707 return (ISC_R_SUCCESS);
2714 result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2715 maxrequests, attributes, &disp);
2716 if (result != ISC_R_SUCCESS) {
2723 return (ISC_R_SUCCESS);
2727 * mgr should be locked.
2730 #ifndef DNS_DISPATCH_HELD
2731 #define DNS_DISPATCH_HELD 20U
2735 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2736 isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2737 isc_socket_t **sockp)
2740 isc_socket_t *held[DNS_DISPATCH_HELD];
2741 isc_sockaddr_t localaddr_bound;
2742 isc_socket_t *sock = NULL;
2743 isc_result_t result = ISC_R_SUCCESS;
2744 isc_boolean_t anyport;
2746 INSIST(sockp != NULL && *sockp == NULL);
2748 localaddr_bound = *localaddr;
2749 anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2752 unsigned int nports;
2756 * If no port is specified, we first try to pick up a random
2757 * port by ourselves.
2759 if (isc_sockaddr_pf(&disp->local) == AF_INET) {
2760 nports = disp->mgr->nv4ports;
2761 ports = disp->mgr->v4ports;
2763 nports = disp->mgr->nv6ports;
2764 ports = disp->mgr->v6ports;
2767 return (ISC_R_ADDRNOTAVAIL);
2769 for (i = 0; i < 1024; i++) {
2772 prt = ports[dispatch_uniformrandom(
2775 isc_sockaddr_setport(&localaddr_bound, prt);
2776 result = open_socket(sockmgr, &localaddr_bound,
2778 if (result == ISC_R_SUCCESS ||
2779 result != ISC_R_ADDRINUSE) {
2780 disp->localport = prt;
2787 * If this fails 1024 times, we then ask the kernel for
2791 /* Allow to reuse address for non-random ports. */
2792 result = open_socket(sockmgr, localaddr,
2793 ISC_SOCKET_REUSEADDRESS, &sock);
2795 if (result == ISC_R_SUCCESS)
2801 memset(held, 0, sizeof(held));
2804 for (j = 0; j < 0xffffU; j++) {
2805 result = open_socket(sockmgr, localaddr, 0, &sock);
2806 if (result != ISC_R_SUCCESS)
2810 else if (portavailable(mgr, sock, NULL))
2812 if (held[i] != NULL)
2813 isc_socket_detach(&held[i]);
2816 if (i == DNS_DISPATCH_HELD)
2820 mgr_log(mgr, ISC_LOG_ERROR,
2821 "avoid-v%s-udp-ports: unable to allocate "
2822 "an available port",
2823 isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2824 result = ISC_R_FAILURE;
2830 for (i = 0; i < DNS_DISPATCH_HELD; i++) {
2831 if (held[i] != NULL)
2832 isc_socket_detach(&held[i]);
2839 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2840 isc_taskmgr_t *taskmgr,
2841 isc_sockaddr_t *localaddr,
2842 unsigned int maxrequests,
2843 unsigned int attributes,
2844 dns_dispatch_t **dispp)
2846 isc_result_t result;
2847 dns_dispatch_t *disp;
2848 isc_socket_t *sock = NULL;
2852 * dispatch_allocate() checks mgr for us.
2855 result = dispatch_allocate(mgr, maxrequests, &disp);
2856 if (result != ISC_R_SUCCESS)
2859 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
2860 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock);
2861 if (result != ISC_R_SUCCESS)
2862 goto deallocate_dispatch;
2864 isc_sockaddr_t sa_any;
2867 * For dispatches using exclusive sockets with a specific
2868 * source address, we only check if the specified address is
2869 * available on the system. Query sockets will be created later
2872 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
2873 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
2874 result = open_socket(sockmgr, localaddr, 0, &sock);
2876 isc_socket_detach(&sock);
2877 if (result != ISC_R_SUCCESS)
2878 goto deallocate_dispatch;
2881 disp->port_table = isc_mem_get(mgr->mctx,
2882 sizeof(disp->port_table[0]) *
2883 DNS_DISPATCH_PORTTABLESIZE);
2884 if (disp->port_table == NULL)
2885 goto deallocate_dispatch;
2886 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2887 ISC_LIST_INIT(disp->port_table[i]);
2889 result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
2891 if (result != ISC_R_SUCCESS)
2892 goto deallocate_dispatch;
2893 isc_mempool_setname(disp->portpool, "disp_portpool");
2894 isc_mempool_setfreemax(disp->portpool, 128);
2896 disp->socktype = isc_sockettype_udp;
2897 disp->socket = sock;
2898 disp->local = *localaddr;
2900 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
2901 disp->ntasks = MAX_INTERNAL_TASKS;
2904 for (i = 0; i < disp->ntasks; i++) {
2905 disp->task[i] = NULL;
2906 result = isc_task_create(taskmgr, 0, &disp->task[i]);
2907 if (result != ISC_R_SUCCESS) {
2909 isc_task_shutdown(disp->task[i]);
2910 isc_task_detach(&disp->task[i]);
2914 isc_task_setname(disp->task[i], "udpdispatch", disp);
2917 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2918 DNS_EVENT_DISPATCHCONTROL,
2920 sizeof(isc_event_t));
2921 if (disp->ctlevent == NULL) {
2922 result = ISC_R_NOMEMORY;
2926 attributes &= ~DNS_DISPATCHATTR_TCP;
2927 attributes |= DNS_DISPATCHATTR_UDP;
2928 disp->attributes = attributes;
2931 * Append it to the dispatcher list.
2933 ISC_LIST_APPEND(mgr->list, disp, link);
2935 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
2936 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
2937 if (disp->socket != NULL)
2938 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
2947 for (i = 0; i < disp->ntasks; i++)
2948 isc_task_detach(&disp->task[i]);
2950 if (disp->socket != NULL)
2951 isc_socket_detach(&disp->socket);
2952 deallocate_dispatch:
2953 dispatch_free(&disp);
2959 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
2960 REQUIRE(VALID_DISPATCH(disp));
2961 REQUIRE(dispp != NULL && *dispp == NULL);
2965 UNLOCK(&disp->lock);
2971 * It is important to lock the manager while we are deleting the dispatch,
2972 * since dns_dispatch_getudp will call dispatch_find, which returns to
2973 * the caller a dispatch but does not attach to it until later. _getudp
2974 * locks the manager, however, so locking it here will keep us from attaching
2975 * to a dispatcher that is in the process of going away.
2978 dns_dispatch_detach(dns_dispatch_t **dispp) {
2979 dns_dispatch_t *disp;
2980 dispsocket_t *dispsock;
2981 isc_boolean_t killit;
2983 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2990 INSIST(disp->refcount > 0);
2992 if (disp->refcount == 0) {
2993 if (disp->recv_pending > 0)
2994 isc_socket_cancel(disp->socket, disp->task[0],
2995 ISC_SOCKCANCEL_RECV);
2996 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
2998 dispsock = ISC_LIST_NEXT(dispsock, link)) {
2999 isc_socket_cancel(dispsock->socket, dispsock->task,
3000 ISC_SOCKCANCEL_RECV);
3002 disp->shutting_down = 1;
3005 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
3007 killit = destroy_disp_ok(disp);
3008 UNLOCK(&disp->lock);
3010 isc_task_send(disp->task[0], &disp->ctlevent);
3014 dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3015 isc_task_t *task, isc_taskaction_t action, void *arg,
3016 dns_messageid_t *idp, dns_dispentry_t **resp,
3017 isc_socketmgr_t *sockmgr)
3019 dns_dispentry_t *res;
3020 unsigned int bucket;
3021 in_port_t localport = 0;
3026 dispsocket_t *dispsocket = NULL;
3027 isc_result_t result;
3029 REQUIRE(VALID_DISPATCH(disp));
3030 REQUIRE(task != NULL);
3031 REQUIRE(dest != NULL);
3032 REQUIRE(resp != NULL && *resp == NULL);
3033 REQUIRE(idp != NULL);
3034 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3035 REQUIRE(sockmgr != NULL);
3039 if (disp->shutting_down == 1) {
3040 UNLOCK(&disp->lock);
3041 return (ISC_R_SHUTTINGDOWN);
3044 if (disp->requests >= disp->maxrequests) {
3045 UNLOCK(&disp->lock);
3046 return (ISC_R_QUOTA);
3049 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
3050 disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
3051 dispsocket_t *oldestsocket;
3052 dns_dispentry_t *oldestresp;
3053 dns_dispatchevent_t *rev;
3056 * Kill oldest outstanding query if the number of sockets
3057 * exceeds the quota to keep the room for new queries.
3059 oldestsocket = ISC_LIST_HEAD(disp->activesockets);
3060 oldestresp = oldestsocket->resp;
3061 if (oldestresp != NULL && !oldestresp->item_out) {
3062 rev = allocate_event(oldestresp->disp);
3064 rev->buffer.base = NULL;
3065 rev->result = ISC_R_CANCELED;
3066 rev->id = oldestresp->id;
3067 ISC_EVENT_INIT(rev, sizeof(*rev), 0,
3068 NULL, DNS_EVENT_DISPATCH,
3070 oldestresp->arg, oldestresp,
3072 oldestresp->item_out = ISC_TRUE;
3073 isc_task_send(oldestresp->task,
3074 ISC_EVENT_PTR(&rev));
3075 inc_stats(disp->mgr,
3076 dns_resstatscounter_dispabort);
3081 * Move this entry to the tail so that it won't (easily) be
3082 * examined before actually being canceled.
3084 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3085 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3088 qid = DNS_QID(disp);
3091 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3093 * Get a separate UDP socket with a random port number.
3095 result = get_dispsocket(disp, dest, sockmgr, qid, &dispsocket,
3097 if (result != ISC_R_SUCCESS) {
3099 UNLOCK(&disp->lock);
3100 inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3104 localport = disp->localport;
3108 * Try somewhat hard to find an unique ID.
3110 id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp));
3111 bucket = dns_hash(qid, dest, id, localport);
3113 for (i = 0; i < 64; i++) {
3114 if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3118 id += qid->qid_increment;
3120 bucket = dns_hash(qid, dest, id, localport);
3125 UNLOCK(&disp->lock);
3126 return (ISC_R_NOMORE);
3129 res = isc_mempool_get(disp->mgr->rpool);
3132 UNLOCK(&disp->lock);
3133 if (dispsocket != NULL)
3134 destroy_dispsocket(disp, &dispsocket);
3135 return (ISC_R_NOMEMORY);
3141 isc_task_attach(task, &res->task);
3144 res->port = localport;
3145 res->bucket = bucket;
3147 res->action = action;
3149 res->dispsocket = dispsocket;
3150 if (dispsocket != NULL)
3151 dispsocket->resp = res;
3152 res->item_out = ISC_FALSE;
3153 ISC_LIST_INIT(res->items);
3154 ISC_LINK_INIT(res, link);
3155 res->magic = RESPONSE_MAGIC;
3156 ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3159 request_log(disp, res, LVL(90),
3160 "attached to task %p", res->task);
3162 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3163 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3164 result = startrecv(disp, dispsocket);
3165 if (result != ISC_R_SUCCESS) {
3167 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3170 if (dispsocket != NULL)
3171 destroy_dispsocket(disp, &dispsocket);
3176 UNLOCK(&disp->lock);
3177 isc_task_detach(&res->task);
3178 isc_mempool_put(disp->mgr->rpool, res);
3183 if (dispsocket != NULL)
3184 ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3186 UNLOCK(&disp->lock);
3191 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3192 INSIST(res->dispsocket != NULL);
3194 return (ISC_R_SUCCESS);
3198 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3199 isc_task_t *task, isc_taskaction_t action, void *arg,
3200 dns_messageid_t *idp, dns_dispentry_t **resp)
3202 REQUIRE(VALID_DISPATCH(disp));
3203 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3205 return (dns_dispatch_addresponse2(disp, dest, task, action, arg,
3210 dns_dispatch_starttcp(dns_dispatch_t *disp) {
3212 REQUIRE(VALID_DISPATCH(disp));
3214 dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3217 disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3218 (void)startrecv(disp, NULL);
3219 UNLOCK(&disp->lock);
3223 dns_dispatch_removeresponse(dns_dispentry_t **resp,
3224 dns_dispatchevent_t **sockevent)
3226 dns_dispatchmgr_t *mgr;
3227 dns_dispatch_t *disp;
3228 dns_dispentry_t *res;
3229 dispsocket_t *dispsock;
3230 dns_dispatchevent_t *ev;
3231 unsigned int bucket;
3232 isc_boolean_t killit;
3234 isc_eventlist_t events;
3237 REQUIRE(resp != NULL);
3238 REQUIRE(VALID_RESPONSE(*resp));
3244 REQUIRE(VALID_DISPATCH(disp));
3246 REQUIRE(VALID_DISPATCHMGR(mgr));
3248 qid = DNS_QID(disp);
3250 if (sockevent != NULL) {
3251 REQUIRE(*sockevent != NULL);
3260 INSIST(disp->requests > 0);
3262 INSIST(disp->refcount > 0);
3264 if (disp->refcount == 0) {
3265 if (disp->recv_pending > 0)
3266 isc_socket_cancel(disp->socket, disp->task[0],
3267 ISC_SOCKCANCEL_RECV);
3268 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3270 dispsock = ISC_LIST_NEXT(dispsock, link)) {
3271 isc_socket_cancel(dispsock->socket, dispsock->task,
3272 ISC_SOCKCANCEL_RECV);
3274 disp->shutting_down = 1;
3277 bucket = res->bucket;
3280 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3283 if (ev == NULL && res->item_out) {
3285 * We've posted our event, but the caller hasn't gotten it
3286 * yet. Take it back.
3288 ISC_LIST_INIT(events);
3289 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3292 * We had better have gotten it back.
3295 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3299 REQUIRE(res->item_out == ISC_TRUE);
3300 res->item_out = ISC_FALSE;
3301 if (ev->buffer.base != NULL)
3302 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3303 free_event(disp, ev);
3306 request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3307 isc_task_detach(&res->task);
3309 if (res->dispsocket != NULL) {
3310 isc_socket_cancel(res->dispsocket->socket,
3311 res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3312 res->dispsocket->resp = NULL;
3316 * Free any buffered requests as well
3318 ev = ISC_LIST_HEAD(res->items);
3319 while (ev != NULL) {
3320 ISC_LIST_UNLINK(res->items, ev, ev_link);
3321 if (ev->buffer.base != NULL)
3322 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3323 free_event(disp, ev);
3324 ev = ISC_LIST_HEAD(res->items);
3327 isc_mempool_put(disp->mgr->rpool, res);
3328 if (disp->shutting_down == 1)
3331 (void)startrecv(disp, NULL);
3333 killit = destroy_disp_ok(disp);
3334 UNLOCK(&disp->lock);
3336 isc_task_send(disp->task[0], &disp->ctlevent);
3340 do_cancel(dns_dispatch_t *disp) {
3341 dns_dispatchevent_t *ev;
3342 dns_dispentry_t *resp;
3345 if (disp->shutdown_out == 1)
3348 qid = DNS_QID(disp);
3351 * Search for the first response handler without packets outstanding
3352 * unless a specific hander is given.
3355 for (resp = linear_first(qid);
3356 resp != NULL && resp->item_out;
3358 resp = linear_next(qid, resp);
3361 * No one to send the cancel event to, so nothing to do.
3367 * Send the shutdown failsafe event to this resp.
3369 ev = disp->failsafe_ev;
3370 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3371 resp->action, resp->arg, resp, NULL, NULL);
3372 ev->result = disp->shutdown_why;
3373 ev->buffer.base = NULL;
3374 ev->buffer.length = 0;
3375 disp->shutdown_out = 1;
3376 request_log(disp, resp, LVL(10),
3377 "cancel: failsafe event %p -> task %p",
3379 resp->item_out = ISC_TRUE;
3380 isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3386 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3387 REQUIRE(VALID_DISPATCH(disp));
3389 return (disp->socket);
3393 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3394 REQUIRE(VALID_RESPONSE(resp));
3396 if (resp->dispsocket != NULL)
3397 return (resp->dispsocket->socket);
3403 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3405 REQUIRE(VALID_DISPATCH(disp));
3406 REQUIRE(addrp != NULL);
3408 if (disp->socktype == isc_sockettype_udp) {
3409 *addrp = disp->local;
3410 return (ISC_R_SUCCESS);
3412 return (ISC_R_NOTIMPLEMENTED);
3416 dns_dispatch_cancel(dns_dispatch_t *disp) {
3417 REQUIRE(VALID_DISPATCH(disp));
3421 if (disp->shutting_down == 1) {
3422 UNLOCK(&disp->lock);
3426 disp->shutdown_why = ISC_R_CANCELED;
3427 disp->shutting_down = 1;
3430 UNLOCK(&disp->lock);
3436 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3437 REQUIRE(VALID_DISPATCH(disp));
3440 * We don't bother locking disp here; it's the caller's responsibility
3441 * to use only non volatile flags.
3443 return (disp->attributes);
3447 dns_dispatch_changeattributes(dns_dispatch_t *disp,
3448 unsigned int attributes, unsigned int mask)
3450 REQUIRE(VALID_DISPATCH(disp));
3451 /* Exclusive attribute can only be set on creation */
3452 REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3453 /* Also, a dispatch with randomport specified cannot start listening */
3454 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3455 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3458 * Should check for valid attributes here!
3463 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3464 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3465 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3466 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3467 (void)startrecv(disp, NULL);
3468 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3470 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3471 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3472 if (disp->recv_pending != 0)
3473 isc_socket_cancel(disp->socket, disp->task[0],
3474 ISC_SOCKCANCEL_RECV);
3478 disp->attributes &= ~mask;
3479 disp->attributes |= (attributes & mask);
3480 UNLOCK(&disp->lock);
3484 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3486 isc_socketevent_t *sevent, *newsevent;
3488 REQUIRE(VALID_DISPATCH(disp));
3489 REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3490 REQUIRE(event != NULL);
3492 sevent = (isc_socketevent_t *)event;
3494 INSIST(sevent->n <= disp->mgr->buffersize);
3495 newsevent = (isc_socketevent_t *)
3496 isc_event_allocate(disp->mgr->mctx, NULL,
3497 DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3498 disp, sizeof(isc_socketevent_t));
3499 if (newsevent == NULL)
3502 buf = allocate_udp_buffer(disp);
3504 isc_event_free(ISC_EVENT_PTR(&newsevent));
3507 memcpy(buf, sevent->region.base, sevent->n);
3508 newsevent->region.base = buf;
3509 newsevent->region.length = disp->mgr->buffersize;
3510 newsevent->n = sevent->n;
3511 newsevent->result = sevent->result;
3512 newsevent->address = sevent->address;
3513 newsevent->timestamp = sevent->timestamp;
3514 newsevent->pktinfo = sevent->pktinfo;
3515 newsevent->attributes = sevent->attributes;
3517 isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3522 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3523 dns_dispatch_t *disp;
3526 disp = ISC_LIST_HEAD(mgr->list);
3527 while (disp != NULL) {
3528 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3529 printf("\tdispatch %p, addr %s\n", disp, foo);
3530 disp = ISC_LIST_NEXT(disp, link);