2 * Copyright (C) 2004-2009, 2011 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: dispatch.c,v 1.168.248.1.2.1 2011-06-02 23:47:34 tbox Exp $ */
25 #include <sys/types.h>
29 #include <isc/entropy.h>
31 #include <isc/mutex.h>
32 #include <isc/portset.h>
33 #include <isc/print.h>
34 #include <isc/random.h>
35 #include <isc/stats.h>
36 #include <isc/string.h>
42 #include <dns/dispatch.h>
43 #include <dns/events.h>
45 #include <dns/message.h>
46 #include <dns/portlist.h>
47 #include <dns/stats.h>
48 #include <dns/tcpmsg.h>
49 #include <dns/types.h>
51 typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
53 typedef struct dispsocket dispsocket_t;
54 typedef ISC_LIST(dispsocket_t) dispsocketlist_t;
56 typedef struct dispportentry dispportentry_t;
57 typedef ISC_LIST(dispportentry_t) dispportlist_t;
59 /* ARC4 Random generator state */
60 typedef struct arc4ctx {
65 isc_entropy_t *entropy; /*%< entropy source for ARC4 */
69 typedef struct dns_qid {
71 unsigned int qid_nbuckets; /*%< hash table size */
72 unsigned int qid_increment; /*%< id increment on collision */
74 dns_displist_t *qid_table; /*%< the table itself */
75 dispsocketlist_t *sock_table; /*%< socket table */
78 struct dns_dispatchmgr {
83 dns_portlist_t *portlist;
85 isc_entropy_t *entropy; /*%< entropy source */
87 /* Locked by "lock". */
90 ISC_LIST(dns_dispatch_t) list;
92 /* Locked by arc4_lock. */
93 isc_mutex_t arc4_lock;
94 arc4ctx_t arc4ctx; /*%< ARC4 context for QID */
96 /* locked by buffer lock */
98 isc_mutex_t buffer_lock;
99 unsigned int buffers; /*%< allocated buffers */
100 unsigned int buffersize; /*%< size of each buffer */
101 unsigned int maxbuffers; /*%< max buffers */
103 /* Locked internally. */
104 isc_mutex_t pool_lock;
105 isc_mempool_t *epool; /*%< memory pool for events */
106 isc_mempool_t *rpool; /*%< memory pool for replies */
107 isc_mempool_t *dpool; /*%< dispatch allocations */
108 isc_mempool_t *bpool; /*%< memory pool for buffers */
109 isc_mempool_t *spool; /*%< memory pool for dispsocs */
112 * Locked by qid->lock if qid exists; otherwise, can be used without
114 * Memory footprint considerations: this is a simple implementation of
115 * available ports, i.e., an ordered array of the actual port numbers.
116 * This will require about 256KB of memory in the worst case (128KB for
117 * each of IPv4 and IPv6). We could reduce it by representing it as a
118 * more sophisticated way such as a list (or array) of ranges that are
119 * searched to identify a specific port. Our decision here is the saved
120 * memory isn't worth the implementation complexity, considering the
121 * fact that the whole BIND9 process (which is mainly named) already
122 * requires a pretty large memory footprint. We may, however, have to
123 * revisit the decision when we want to use it as a separate module for
124 * an environment where memory requirement is severer.
126 in_port_t *v4ports; /*%< available ports for IPv4 */
127 unsigned int nv4ports; /*%< # of available ports for IPv4 */
128 in_port_t *v6ports; /*%< available ports for IPv4 */
129 unsigned int nv6ports; /*%< # of available ports for IPv4 */
132 #define MGR_SHUTTINGDOWN 0x00000001U
133 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
135 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
137 struct dns_dispentry {
139 dns_dispatch_t *disp;
145 isc_taskaction_t action;
147 isc_boolean_t item_out;
148 dispsocket_t *dispsocket;
149 ISC_LIST(dns_dispatchevent_t) items;
150 ISC_LINK(dns_dispentry_t) link;
154 * Maximum number of dispatch sockets that can be pooled for reuse. The
155 * appropriate value may vary, but experiments have shown a busy caching server
156 * may need more than 1000 sockets concurrently opened. The maximum allowable
157 * number of dispatch sockets (per manager) will be set to the double of this
160 #ifndef DNS_DISPATCH_POOLSOCKS
161 #define DNS_DISPATCH_POOLSOCKS 2048
165 * Quota to control the number of dispatch sockets. If a dispatch has more
166 * than the quota of sockets, new queries will purge oldest ones, so that
167 * a massive number of outstanding queries won't prevent subsequent queries
168 * (especially if the older ones take longer time and result in timeout).
170 #ifndef DNS_DISPATCH_SOCKSQUOTA
171 #define DNS_DISPATCH_SOCKSQUOTA 3072
176 isc_socket_t *socket;
177 dns_dispatch_t *disp;
179 in_port_t localport; /* XXX: should be removed later */
180 dispportentry_t *portentry;
181 dns_dispentry_t *resp;
183 ISC_LINK(dispsocket_t) link;
185 ISC_LINK(dispsocket_t) blink;
189 * A port table entry. We remember every port we first open in a table with a
190 * reference counter so that we can 'reuse' the same port (with different
191 * destination addresses) using the SO_REUSEADDR socket option.
193 struct dispportentry {
196 ISC_LINK(struct dispportentry) link;
199 #ifndef DNS_DISPATCH_PORTTABLESIZE
200 #define DNS_DISPATCH_PORTTABLESIZE 1024
203 #define INVALID_BUCKET (0xffffdead)
206 * Number of tasks for each dispatch that use separate sockets for different
207 * transactions. This must be a power of 2 as it will divide 32 bit numbers
208 * to get an uniformly random tasks selection. See get_dispsocket().
210 #define MAX_INTERNAL_TASKS 64
212 struct dns_dispatch {
214 unsigned int magic; /*%< magic */
215 dns_dispatchmgr_t *mgr; /*%< dispatch manager */
218 * internal task buckets. We use multiple tasks to distribute various
219 * socket events well when using separate dispatch sockets. We use the
220 * 1st task (task[0]) for internal control events.
222 isc_task_t *task[MAX_INTERNAL_TASKS];
223 isc_socket_t *socket; /*%< isc socket attached to */
224 isc_sockaddr_t local; /*%< local address */
225 in_port_t localport; /*%< local UDP port */
226 unsigned int maxrequests; /*%< max requests */
227 isc_event_t *ctlevent;
229 /*% Locked by mgr->lock. */
230 ISC_LINK(dns_dispatch_t) link;
232 /* Locked by "lock". */
233 isc_mutex_t lock; /*%< locks all below */
234 isc_sockettype_t socktype;
235 unsigned int attributes;
236 unsigned int refcount; /*%< number of users */
237 dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */
238 unsigned int shutting_down : 1,
242 recv_pending : 1; /*%< is a recv() pending? */
243 isc_result_t shutdown_why;
244 ISC_LIST(dispsocket_t) activesockets;
245 ISC_LIST(dispsocket_t) inactivesockets;
246 unsigned int nsockets;
247 unsigned int requests; /*%< how many requests we have */
248 unsigned int tcpbuffers; /*%< allocated buffers */
249 dns_tcpmsg_t tcpmsg; /*%< for tcp streams */
251 arc4ctx_t arc4ctx; /*%< for QID/UDP port num */
252 dispportlist_t *port_table; /*%< hold ports 'owned' by us */
253 isc_mempool_t *portpool; /*%< port table entries */
256 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
257 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
259 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
260 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
262 #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
263 #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
265 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
266 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
268 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
269 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
271 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
272 (disp)->qid : (disp)->mgr->qid
273 #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
274 (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
277 * Locking a query port buffer is a bit tricky. We access the buffer without
278 * locking until qid is created. Technically, there is a possibility of race
279 * between the creation of qid and access to the port buffer; in practice,
280 * however, this should be safe because qid isn't created until the first
281 * dispatch is created and there should be no contending situation until then.
283 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
284 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
289 static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
290 dns_messageid_t, in_port_t, unsigned int);
291 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
292 static void destroy_disp(isc_task_t *task, isc_event_t *event);
293 static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
294 static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
295 static void udp_exrecv(isc_task_t *, isc_event_t *);
296 static void udp_shrecv(isc_task_t *, isc_event_t *);
297 static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
298 static void tcp_recv(isc_task_t *, isc_event_t *);
299 static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
300 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
302 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
303 static void *allocate_udp_buffer(dns_dispatch_t *disp);
304 static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
305 static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
306 static void do_cancel(dns_dispatch_t *disp);
307 static dns_dispentry_t *linear_first(dns_qid_t *disp);
308 static dns_dispentry_t *linear_next(dns_qid_t *disp,
309 dns_dispentry_t *resp);
310 static void dispatch_free(dns_dispatch_t **dispp);
311 static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
312 dns_dispatch_t *disp,
313 isc_socketmgr_t *sockmgr,
314 isc_sockaddr_t *localaddr,
315 isc_socket_t **sockp);
316 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
317 isc_socketmgr_t *sockmgr,
318 isc_taskmgr_t *taskmgr,
319 isc_sockaddr_t *localaddr,
320 unsigned int maxrequests,
321 unsigned int attributes,
322 dns_dispatch_t **dispp);
323 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
324 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
325 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
326 unsigned int increment, dns_qid_t **qidp,
327 isc_boolean_t needaddrtable);
328 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
329 static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
330 unsigned int options, isc_socket_t **sockp);
331 static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
332 isc_sockaddr_t *sockaddrp);
334 #define LVL(x) ISC_LOG_DEBUG(x)
337 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
338 ISC_FORMAT_PRINTF(3, 4);
341 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
345 if (! isc_log_wouldlog(dns_lctx, level))
349 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
352 isc_log_write(dns_lctx,
353 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
354 level, "dispatchmgr %p: %s", mgr, msgbuf);
358 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
359 if (mgr->stats != NULL)
360 isc_stats_increment(mgr->stats, counter);
364 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
365 ISC_FORMAT_PRINTF(3, 4);
368 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
372 if (! isc_log_wouldlog(dns_lctx, level))
376 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
379 isc_log_write(dns_lctx,
380 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
381 level, "dispatch %p: %s", disp, msgbuf);
385 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
386 int level, const char *fmt, ...)
387 ISC_FORMAT_PRINTF(4, 5);
390 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
391 int level, const char *fmt, ...)
397 if (! isc_log_wouldlog(dns_lctx, level))
401 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
404 if (VALID_RESPONSE(resp)) {
405 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
406 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
407 DNS_LOGMODULE_DISPATCH, level,
408 "dispatch %p response %p %s: %s", disp, resp,
411 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
412 DNS_LOGMODULE_DISPATCH, level,
413 "dispatch %p req/resp %p: %s", disp, resp,
419 * ARC4 random number generator derived from OpenBSD.
420 * Only dispatch_random() and dispatch_uniformrandom() are expected
421 * to be called from general dispatch routines; the rest of them are subroutines
424 * The original copyright follows:
425 * Copyright (c) 1996, David Mazieres <dm@uun.org>
426 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
428 * Permission to use, copy, modify, and distribute this software for any
429 * purpose with or without fee is hereby granted, provided that the above
430 * copyright notice and this permission notice appear in all copies.
432 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
433 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
434 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
435 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
436 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
437 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
438 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
442 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
446 for (n = 0; n < 256; n++)
451 actx->entropy = entropy; /* don't have to attach */
456 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
461 for (n = 0; n < 256; n++) {
462 actx->i = (actx->i + 1);
463 si = actx->s[actx->i];
464 actx->j = (actx->j + si + dat[n % datlen]);
465 actx->s[actx->i] = actx->s[actx->j];
466 actx->s[actx->j] = si;
471 static inline isc_uint8_t
472 dispatch_arc4get8(arc4ctx_t *actx) {
475 actx->i = (actx->i + 1);
476 si = actx->s[actx->i];
477 actx->j = (actx->j + si);
478 sj = actx->s[actx->j];
479 actx->s[actx->i] = sj;
480 actx->s[actx->j] = si;
482 return (actx->s[(si + sj) & 0xff]);
485 static inline isc_uint16_t
486 dispatch_arc4get16(arc4ctx_t *actx) {
489 val = dispatch_arc4get8(actx) << 8;
490 val |= dispatch_arc4get8(actx);
496 dispatch_arc4stir(arc4ctx_t *actx) {
499 unsigned char rnd[128];
500 isc_uint32_t rnd32[32];
504 if (actx->entropy != NULL) {
506 * We accept any quality of random data to avoid blocking.
508 result = isc_entropy_getdata(actx->entropy, rnd.rnd,
509 sizeof(rnd), NULL, 0);
510 RUNTIME_CHECK(result == ISC_R_SUCCESS);
512 for (i = 0; i < 32; i++)
513 isc_random_get(&rnd.rnd32[i]);
515 dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
518 * Discard early keystream, as per recommendations in:
519 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
521 for (i = 0; i < 256; i++)
522 (void)dispatch_arc4get8(actx);
525 * Derived from OpenBSD's implementation. The rationale is not clear,
526 * but should be conservative enough in safety, and reasonably large
529 actx->count = 1600000;
533 dispatch_random(arc4ctx_t *actx) {
536 if (actx->lock != NULL)
539 actx->count -= sizeof(isc_uint16_t);
540 if (actx->count <= 0)
541 dispatch_arc4stir(actx);
542 result = dispatch_arc4get16(actx);
544 if (actx->lock != NULL)
551 * For general purpose library, we don't have to be too strict about the
552 * quality of random values. Performance doesn't matter much, either.
553 * So we simply use the isc_random module to keep the library as small as
558 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
569 dispatch_random(arc4ctx_t *actx) {
580 dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
587 * Ensure the range of random numbers [min, 0xffff] be a multiple of
588 * upper_bound and contain at least a half of the 16 bit range.
591 if (upper_bound > 0x8000)
592 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
594 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
597 * This could theoretically loop forever but each retry has
598 * p > 0.5 (worst case, usually far better) of selecting a
599 * number inside the range we need, so it should rarely need
603 r = dispatch_random(actx);
608 return (r % upper_bound);
612 * Return a hash of the destination and message id.
615 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
620 ret = isc_sockaddr_hash(dest, ISC_TRUE);
621 ret ^= (id << 16) | port;
622 ret %= qid->qid_nbuckets;
624 INSIST(ret < qid->qid_nbuckets);
630 * Find the first entry in 'qid'. Returns NULL if there are no entries.
632 static dns_dispentry_t *
633 linear_first(dns_qid_t *qid) {
634 dns_dispentry_t *ret;
639 while (bucket < qid->qid_nbuckets) {
640 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
650 * Find the next entry after 'resp' in 'qid'. Return NULL if there are
653 static dns_dispentry_t *
654 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
655 dns_dispentry_t *ret;
658 ret = ISC_LIST_NEXT(resp, link);
662 bucket = resp->bucket;
664 while (bucket < qid->qid_nbuckets) {
665 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
675 * The dispatch must be locked.
678 destroy_disp_ok(dns_dispatch_t *disp)
680 if (disp->refcount != 0)
683 if (disp->recv_pending != 0)
686 if (!ISC_LIST_EMPTY(disp->activesockets))
689 if (disp->shutting_down == 0)
696 * Called when refcount reaches 0 (and safe to destroy).
698 * The dispatcher must not be locked.
699 * The manager must be locked.
702 destroy_disp(isc_task_t *task, isc_event_t *event) {
703 dns_dispatch_t *disp;
704 dns_dispatchmgr_t *mgr;
705 isc_boolean_t killmgr;
706 dispsocket_t *dispsocket;
709 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
713 disp = event->ev_arg;
717 ISC_LIST_UNLINK(mgr->list, disp, link);
719 dispatch_log(disp, LVL(90),
720 "shutting down; detaching from sock %p, task %p",
721 disp->socket, disp->task[0]); /* XXXX */
723 if (disp->socket != NULL)
724 isc_socket_detach(&disp->socket);
725 while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
726 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
727 destroy_dispsocket(disp, &dispsocket);
729 for (i = 0; i < disp->ntasks; i++)
730 isc_task_detach(&disp->task[i]);
731 isc_event_free(&event);
733 dispatch_free(&disp);
735 killmgr = destroy_mgr_ok(mgr);
742 * Manipulate port table per dispatch: find an entry for a given port number,
743 * create a new entry, and decrement a given entry with possible clean-up.
745 static dispportentry_t *
746 port_search(dns_dispatch_t *disp, in_port_t port) {
747 dispportentry_t *portentry;
749 REQUIRE(disp->port_table != NULL);
751 portentry = ISC_LIST_HEAD(disp->port_table[port %
752 DNS_DISPATCH_PORTTABLESIZE]);
753 while (portentry != NULL) {
754 if (portentry->port == port)
756 portentry = ISC_LIST_NEXT(portentry, link);
762 static dispportentry_t *
763 new_portentry(dns_dispatch_t *disp, in_port_t port) {
764 dispportentry_t *portentry;
766 REQUIRE(disp->port_table != NULL);
768 portentry = isc_mempool_get(disp->portpool);
769 if (portentry == NULL)
772 portentry->port = port;
774 ISC_LINK_INIT(portentry, link);
775 ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
782 * The caller must not hold the qid->lock.
785 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
786 dispportentry_t *portentry = *portentryp;
789 REQUIRE(disp->port_table != NULL);
790 REQUIRE(portentry != NULL && portentry->refs > 0);
795 if (portentry->refs == 0) {
796 ISC_LIST_UNLINK(disp->port_table[portentry->port %
797 DNS_DISPATCH_PORTTABLESIZE],
799 isc_mempool_put(disp->portpool, portentry);
807 * Find a dispsocket for socket address 'dest', and port number 'port'.
808 * Return NULL if no such entry exists.
810 static dispsocket_t *
811 socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
814 dispsocket_t *dispsock;
816 REQUIRE(bucket < qid->qid_nbuckets);
818 dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
820 while (dispsock != NULL) {
821 if (dispsock->portentry != NULL &&
822 dispsock->portentry->port == port &&
823 isc_sockaddr_equal(dest, &dispsock->host))
825 dispsock = ISC_LIST_NEXT(dispsock, blink);
832 * Make a new socket for a single dispatch with a random port number.
833 * The caller must hold the disp->lock and qid->lock.
836 get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
837 isc_socketmgr_t *sockmgr, dns_qid_t *qid,
838 dispsocket_t **dispsockp, in_port_t *portp)
842 dns_dispatchmgr_t *mgr = disp->mgr;
843 isc_socket_t *sock = NULL;
844 isc_result_t result = ISC_R_FAILURE;
846 isc_sockaddr_t localaddr;
847 unsigned int bucket = 0;
848 dispsocket_t *dispsock;
851 unsigned int bindoptions;
852 dispportentry_t *portentry = NULL;
854 if (isc_sockaddr_pf(&disp->local) == AF_INET) {
855 nports = disp->mgr->nv4ports;
856 ports = disp->mgr->v4ports;
858 nports = disp->mgr->nv6ports;
859 ports = disp->mgr->v6ports;
862 return (ISC_R_ADDRNOTAVAIL);
864 dispsock = ISC_LIST_HEAD(disp->inactivesockets);
865 if (dispsock != NULL) {
866 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
867 sock = dispsock->socket;
868 dispsock->socket = NULL;
870 dispsock = isc_mempool_get(mgr->spool);
871 if (dispsock == NULL)
872 return (ISC_R_NOMEMORY);
875 dispsock->socket = NULL;
876 dispsock->disp = disp;
877 dispsock->resp = NULL;
878 dispsock->portentry = NULL;
880 dispsock->task = NULL;
881 isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
882 ISC_LINK_INIT(dispsock, link);
883 ISC_LINK_INIT(dispsock, blink);
884 dispsock->magic = DISPSOCK_MAGIC;
888 * Pick up a random UDP port and open a new socket with it. Avoid
889 * choosing ports that share the same destination because it will be
890 * very likely to fail in bind(2) or connect(2).
892 localaddr = disp->local;
893 for (i = 0; i < 64; i++) {
894 port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
896 isc_sockaddr_setport(&localaddr, port);
898 bucket = dns_hash(qid, dest, 0, port);
899 if (socket_search(qid, dest, port, bucket) != NULL)
902 portentry = port_search(disp, port);
903 if (portentry != NULL)
904 bindoptions |= ISC_SOCKET_REUSEADDRESS;
905 result = open_socket(sockmgr, &localaddr, bindoptions, &sock);
906 if (result == ISC_R_SUCCESS) {
907 if (portentry == NULL) {
908 portentry = new_portentry(disp, port);
909 if (portentry == NULL) {
910 result = ISC_R_NOMEMORY;
916 } else if (result != ISC_R_ADDRINUSE)
920 if (result == ISC_R_SUCCESS) {
921 dispsock->socket = sock;
922 dispsock->host = *dest;
923 dispsock->portentry = portentry;
924 dispsock->bucket = bucket;
925 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
926 *dispsockp = dispsock;
930 * We could keep it in the inactive list, but since this should
931 * be an exceptional case and might be resource shortage, we'd
935 isc_socket_detach(&sock);
936 destroy_dispsocket(disp, &dispsock);
943 * Destroy a dedicated dispatch socket.
946 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
947 dispsocket_t *dispsock;
951 * The dispatch must be locked.
954 REQUIRE(dispsockp != NULL && *dispsockp != NULL);
955 dispsock = *dispsockp;
956 REQUIRE(!ISC_LINK_LINKED(dispsock, link));
960 if (dispsock->portentry != NULL)
961 deref_portentry(disp, &dispsock->portentry);
962 if (dispsock->socket != NULL)
963 isc_socket_detach(&dispsock->socket);
964 if (ISC_LINK_LINKED(dispsock, blink)) {
967 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
971 if (dispsock->task != NULL)
972 isc_task_detach(&dispsock->task);
973 isc_mempool_put(disp->mgr->spool, dispsock);
979 * Deactivate a dedicated dispatch socket. Move it to the inactive list for
980 * future reuse unless the total number of sockets are exceeding the maximum.
983 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
988 * The dispatch must be locked.
990 ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
991 if (dispsock->resp != NULL) {
992 INSIST(dispsock->resp->dispsocket == dispsock);
993 dispsock->resp->dispsocket = NULL;
996 INSIST(dispsock->portentry != NULL);
997 deref_portentry(disp, &dispsock->portentry);
1000 if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
1001 destroy_dispsocket(disp, &dispsock);
1003 result = isc_socket_close(dispsock->socket);
1005 qid = DNS_QID(disp);
1007 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1011 if (result == ISC_R_SUCCESS)
1012 ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
1015 * If the underlying system does not allow this
1016 * optimization, destroy this temporary structure (and
1017 * create a new one for a new transaction).
1019 INSIST(result == ISC_R_NOTIMPLEMENTED);
1020 destroy_dispsocket(disp, &dispsock);
1024 /* This kind of optimization isn't necessary for normal use */
1028 destroy_dispsocket(disp, &dispsock);
1033 * Find an entry for query ID 'id', socket address 'dest', and port number
1035 * Return NULL if no such entry exists.
1037 static dns_dispentry_t *
1038 entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
1039 in_port_t port, unsigned int bucket)
1041 dns_dispentry_t *res;
1043 REQUIRE(bucket < qid->qid_nbuckets);
1045 res = ISC_LIST_HEAD(qid->qid_table[bucket]);
1047 while (res != NULL) {
1048 if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1049 res->port == port) {
1052 res = ISC_LIST_NEXT(res, link);
1059 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1060 INSIST(buf != NULL && len != 0);
1063 switch (disp->socktype) {
1064 case isc_sockettype_tcp:
1065 INSIST(disp->tcpbuffers > 0);
1067 isc_mem_put(disp->mgr->mctx, buf, len);
1069 case isc_sockettype_udp:
1070 LOCK(&disp->mgr->buffer_lock);
1071 INSIST(disp->mgr->buffers > 0);
1072 INSIST(len == disp->mgr->buffersize);
1073 disp->mgr->buffers--;
1074 isc_mempool_put(disp->mgr->bpool, buf);
1075 UNLOCK(&disp->mgr->buffer_lock);
1084 allocate_udp_buffer(dns_dispatch_t *disp) {
1087 LOCK(&disp->mgr->buffer_lock);
1088 temp = isc_mempool_get(disp->mgr->bpool);
1091 disp->mgr->buffers++;
1092 UNLOCK(&disp->mgr->buffer_lock);
1098 free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1099 if (disp->failsafe_ev == ev) {
1100 INSIST(disp->shutdown_out == 1);
1101 disp->shutdown_out = 0;
1106 isc_mempool_put(disp->mgr->epool, ev);
1109 static inline dns_dispatchevent_t *
1110 allocate_event(dns_dispatch_t *disp) {
1111 dns_dispatchevent_t *ev;
1113 ev = isc_mempool_get(disp->mgr->epool);
1116 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1117 NULL, NULL, NULL, NULL, NULL);
1123 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1124 dispsocket_t *dispsock = ev->ev_arg;
1128 REQUIRE(VALID_DISPSOCK(dispsock));
1129 udp_recv(ev, dispsock->disp, dispsock);
1133 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1134 dns_dispatch_t *disp = ev->ev_arg;
1138 REQUIRE(VALID_DISPATCH(disp));
1139 udp_recv(ev, disp, NULL);
1145 * If I/O result == CANCELED or error, free the buffer.
1147 * If query, free the buffer, restart.
1150 * Allocate event, fill in details.
1151 * If cannot allocate, free buffer, restart.
1152 * find target. If not found, free buffer, restart.
1153 * if event queue is not empty, queue. else, send.
1157 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1158 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1161 isc_buffer_t source;
1163 dns_dispentry_t *resp = NULL;
1164 dns_dispatchevent_t *rev;
1165 unsigned int bucket;
1166 isc_boolean_t killit;
1167 isc_boolean_t queue_response;
1168 dns_dispatchmgr_t *mgr;
1170 isc_netaddr_t netaddr;
1173 isc_boolean_t qidlocked = ISC_FALSE;
1180 dispatch_log(disp, LVL(90),
1181 "got packet: requests %d, buffers %d, recvs %d",
1182 disp->requests, disp->mgr->buffers, disp->recv_pending);
1184 if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1186 * Unless the receive event was imported from a listening
1187 * interface, in which case the event type is
1188 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1190 INSIST(disp->recv_pending != 0);
1191 disp->recv_pending = 0;
1194 if (dispsock != NULL &&
1195 (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1197 * dispsock->resp can be NULL if this transaction was canceled
1198 * just after receiving a response. Since this socket is
1199 * exclusively used and there should be at most one receive
1200 * event the canceled event should have been no effect. So
1201 * we can (and should) deactivate the socket right now.
1203 deactivate_dispsocket(disp, dispsock);
1207 if (disp->shutting_down) {
1209 * This dispatcher is shutting down.
1211 free_buffer(disp, ev->region.base, ev->region.length);
1213 isc_event_free(&ev_in);
1216 killit = destroy_disp_ok(disp);
1217 UNLOCK(&disp->lock);
1219 isc_task_send(disp->task[0], &disp->ctlevent);
1224 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1225 if (dispsock != NULL) {
1226 resp = dispsock->resp;
1228 if (ev->result != ISC_R_SUCCESS) {
1230 * This is most likely a network error on a
1231 * connected socket. It makes no sense to
1232 * check the address or parse the packet, but it
1233 * will help to return the error to the caller.
1238 free_buffer(disp, ev->region.base, ev->region.length);
1240 UNLOCK(&disp->lock);
1241 isc_event_free(&ev_in);
1244 } else if (ev->result != ISC_R_SUCCESS) {
1245 free_buffer(disp, ev->region.base, ev->region.length);
1247 if (ev->result != ISC_R_CANCELED)
1248 dispatch_log(disp, ISC_LOG_ERROR,
1249 "odd socket result in udp_recv(): %s",
1250 isc_result_totext(ev->result));
1252 UNLOCK(&disp->lock);
1253 isc_event_free(&ev_in);
1258 * If this is from a blackholed address, drop it.
1260 isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1261 if (disp->mgr->blackhole != NULL &&
1262 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1263 NULL, &match, NULL) == ISC_R_SUCCESS &&
1266 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1267 char netaddrstr[ISC_NETADDR_FORMATSIZE];
1268 isc_netaddr_format(&netaddr, netaddrstr,
1269 sizeof(netaddrstr));
1270 dispatch_log(disp, LVL(10),
1271 "blackholed packet from %s",
1274 free_buffer(disp, ev->region.base, ev->region.length);
1279 * Peek into the buffer to see what we can see.
1281 isc_buffer_init(&source, ev->region.base, ev->region.length);
1282 isc_buffer_add(&source, ev->n);
1283 dres = dns_message_peekheader(&source, &id, &flags);
1284 if (dres != ISC_R_SUCCESS) {
1285 free_buffer(disp, ev->region.base, ev->region.length);
1286 dispatch_log(disp, LVL(10), "got garbage packet");
1290 dispatch_log(disp, LVL(92),
1291 "got valid DNS message header, /QR %c, id %u",
1292 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1295 * Look at flags. If query, drop it. If response,
1296 * look to see where it goes.
1298 queue_response = ISC_FALSE;
1299 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1301 free_buffer(disp, ev->region.base, ev->region.length);
1306 * Search for the corresponding response. If we are using an exclusive
1307 * socket, we've already identified it and we can skip the search; but
1308 * the ID and the address must match the expected ones.
1311 bucket = dns_hash(qid, &ev->address, id, disp->localport);
1313 qidlocked = ISC_TRUE;
1314 resp = entry_search(qid, &ev->address, id, disp->localport,
1316 dispatch_log(disp, LVL(90),
1317 "search for response in bucket %d: %s",
1318 bucket, (resp == NULL ? "not found" : "found"));
1321 inc_stats(mgr, dns_resstatscounter_mismatch);
1322 free_buffer(disp, ev->region.base, ev->region.length);
1325 } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1327 dispatch_log(disp, LVL(90),
1328 "response to an exclusive socket doesn't match");
1329 inc_stats(mgr, dns_resstatscounter_mismatch);
1330 free_buffer(disp, ev->region.base, ev->region.length);
1335 * Now that we have the original dispatch the query was sent
1336 * from check that the address and port the response was
1337 * sent to make sense.
1339 if (disp != resp->disp) {
1344 * Check that the socket types and ports match.
1346 if (disp->socktype != resp->disp->socktype ||
1347 isc_sockaddr_getport(&disp->local) !=
1348 isc_sockaddr_getport(&resp->disp->local)) {
1349 free_buffer(disp, ev->region.base, ev->region.length);
1354 * If both dispatches are bound to an address then fail as
1355 * the addresses can't be equal (enforced by the IP stack).
1357 * Note under Linux a packet can be sent out via IPv4 socket
1358 * and the response be received via a IPv6 socket.
1360 * Requests sent out via IPv6 should always come back in
1363 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1364 isc_sockaddr_pf(&disp->local) != PF_INET6) {
1365 free_buffer(disp, ev->region.base, ev->region.length);
1368 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1369 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1370 if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1371 !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1372 free_buffer(disp, ev->region.base, ev->region.length);
1378 queue_response = resp->item_out;
1379 rev = allocate_event(resp->disp);
1381 free_buffer(disp, ev->region.base, ev->region.length);
1386 * At this point, rev contains the event we want to fill in, and
1387 * resp contains the information on the place to send it to.
1388 * Send the event off.
1390 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1391 isc_buffer_add(&rev->buffer, ev->n);
1392 rev->result = ev->result;
1394 rev->addr = ev->address;
1395 rev->pktinfo = ev->pktinfo;
1396 rev->attributes = ev->attributes;
1397 if (queue_response) {
1398 ISC_LIST_APPEND(resp->items, rev, ev_link);
1400 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1402 resp->action, resp->arg, resp, NULL, NULL);
1403 request_log(disp, resp, LVL(90),
1404 "[a] Sent event %p buffer %p len %d to task %p",
1405 rev, rev->buffer.base, rev->buffer.length,
1407 resp->item_out = ISC_TRUE;
1408 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1415 * Restart recv() to get the next packet.
1418 result = startrecv(disp, dispsock);
1419 if (result != ISC_R_SUCCESS && dispsock != NULL) {
1421 * XXX: wired. There seems to be no recovery process other than
1422 * deactivate this socket anyway (since we cannot start
1423 * receiving, we won't be able to receive a cancel event
1426 deactivate_dispsocket(disp, dispsock);
1428 UNLOCK(&disp->lock);
1430 isc_event_free(&ev_in);
1436 * If I/O result == CANCELED, EOF, or error, notify everyone as the
1437 * various queues drain.
1439 * If query, restart.
1442 * Allocate event, fill in details.
1443 * If cannot allocate, restart.
1444 * find target. If not found, restart.
1445 * if event queue is not empty, queue. else, send.
1449 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1450 dns_dispatch_t *disp = ev_in->ev_arg;
1451 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1455 dns_dispentry_t *resp;
1456 dns_dispatchevent_t *rev;
1457 unsigned int bucket;
1458 isc_boolean_t killit;
1459 isc_boolean_t queue_response;
1462 char buf[ISC_SOCKADDR_FORMATSIZE];
1466 REQUIRE(VALID_DISPATCH(disp));
1470 dispatch_log(disp, LVL(90),
1471 "got TCP packet: requests %d, buffers %d, recvs %d",
1472 disp->requests, disp->tcpbuffers, disp->recv_pending);
1476 INSIST(disp->recv_pending != 0);
1477 disp->recv_pending = 0;
1479 if (disp->refcount == 0) {
1481 * This dispatcher is shutting down. Force cancelation.
1483 tcpmsg->result = ISC_R_CANCELED;
1486 if (tcpmsg->result != ISC_R_SUCCESS) {
1487 switch (tcpmsg->result) {
1488 case ISC_R_CANCELED:
1492 dispatch_log(disp, LVL(90), "shutting down on EOF");
1496 case ISC_R_CONNECTIONRESET:
1497 level = ISC_LOG_INFO;
1501 level = ISC_LOG_ERROR;
1503 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1504 dispatch_log(disp, level, "shutting down due to TCP "
1505 "receive error: %s: %s", buf,
1506 isc_result_totext(tcpmsg->result));
1512 * The event is statically allocated in the tcpmsg
1513 * structure, and destroy_disp() frees the tcpmsg, so we must
1514 * free the event *before* calling destroy_disp().
1516 isc_event_free(&ev_in);
1518 disp->shutting_down = 1;
1519 disp->shutdown_why = tcpmsg->result;
1522 * If the recv() was canceled pass the word on.
1524 killit = destroy_disp_ok(disp);
1525 UNLOCK(&disp->lock);
1527 isc_task_send(disp->task[0], &disp->ctlevent);
1531 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1533 tcpmsg->buffer.length, tcpmsg->buffer.base);
1536 * Peek into the buffer to see what we can see.
1538 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1539 if (dres != ISC_R_SUCCESS) {
1540 dispatch_log(disp, LVL(10), "got garbage packet");
1544 dispatch_log(disp, LVL(92),
1545 "got valid DNS message header, /QR %c, id %u",
1546 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1549 * Allocate an event to send to the query or response client, and
1550 * allocate a new buffer for our use.
1554 * Look at flags. If query, drop it. If response,
1555 * look to see where it goes.
1557 queue_response = ISC_FALSE;
1558 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1568 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1570 resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1571 dispatch_log(disp, LVL(90),
1572 "search for response in bucket %d: %s",
1573 bucket, (resp == NULL ? "not found" : "found"));
1577 queue_response = resp->item_out;
1578 rev = allocate_event(disp);
1583 * At this point, rev contains the event we want to fill in, and
1584 * resp contains the information on the place to send it to.
1585 * Send the event off.
1587 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1589 rev->result = ISC_R_SUCCESS;
1591 rev->addr = tcpmsg->address;
1592 if (queue_response) {
1593 ISC_LIST_APPEND(resp->items, rev, ev_link);
1595 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1596 resp->action, resp->arg, resp, NULL, NULL);
1597 request_log(disp, resp, LVL(90),
1598 "[b] Sent event %p buffer %p len %d to task %p",
1599 rev, rev->buffer.base, rev->buffer.length,
1601 resp->item_out = ISC_TRUE;
1602 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1608 * Restart recv() to get the next packet.
1611 (void)startrecv(disp, NULL);
1613 UNLOCK(&disp->lock);
1615 isc_event_free(&ev_in);
1619 * disp must be locked.
1622 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1624 isc_region_t region;
1625 isc_socket_t *socket;
1627 if (disp->shutting_down == 1)
1628 return (ISC_R_SUCCESS);
1630 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1631 return (ISC_R_SUCCESS);
1633 if (disp->recv_pending != 0 && dispsock == NULL)
1634 return (ISC_R_SUCCESS);
1636 if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1637 return (ISC_R_NOMEMORY);
1639 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1641 return (ISC_R_SUCCESS);
1643 if (dispsock != NULL)
1644 socket = dispsock->socket;
1646 socket = disp->socket;
1647 INSIST(socket != NULL);
1649 switch (disp->socktype) {
1651 * UDP reads are always maximal.
1653 case isc_sockettype_udp:
1654 region.length = disp->mgr->buffersize;
1655 region.base = allocate_udp_buffer(disp);
1656 if (region.base == NULL)
1657 return (ISC_R_NOMEMORY);
1658 if (dispsock != NULL) {
1659 res = isc_socket_recv(socket, ®ion, 1,
1660 dispsock->task, udp_exrecv,
1662 if (res != ISC_R_SUCCESS) {
1663 free_buffer(disp, region.base, region.length);
1667 res = isc_socket_recv(socket, ®ion, 1,
1668 disp->task[0], udp_shrecv, disp);
1669 if (res != ISC_R_SUCCESS) {
1670 free_buffer(disp, region.base, region.length);
1671 disp->shutdown_why = res;
1672 disp->shutting_down = 1;
1674 return (ISC_R_SUCCESS); /* recover by cancel */
1676 INSIST(disp->recv_pending == 0);
1677 disp->recv_pending = 1;
1681 case isc_sockettype_tcp:
1682 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1684 if (res != ISC_R_SUCCESS) {
1685 disp->shutdown_why = res;
1686 disp->shutting_down = 1;
1688 return (ISC_R_SUCCESS); /* recover by cancel */
1690 INSIST(disp->recv_pending == 0);
1691 disp->recv_pending = 1;
1698 return (ISC_R_SUCCESS);
1702 * Mgr must be locked when calling this function.
1704 static isc_boolean_t
1705 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1706 mgr_log(mgr, LVL(90),
1707 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1708 "epool=%d, rpool=%d, dpool=%d",
1709 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1710 isc_mempool_getallocated(mgr->epool),
1711 isc_mempool_getallocated(mgr->rpool),
1712 isc_mempool_getallocated(mgr->dpool));
1713 if (!MGR_IS_SHUTTINGDOWN(mgr))
1715 if (!ISC_LIST_EMPTY(mgr->list))
1717 if (isc_mempool_getallocated(mgr->epool) != 0)
1719 if (isc_mempool_getallocated(mgr->rpool) != 0)
1721 if (isc_mempool_getallocated(mgr->dpool) != 0)
1728 * Mgr must be unlocked when calling this function.
1731 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1733 dns_dispatchmgr_t *mgr;
1742 DESTROYLOCK(&mgr->lock);
1745 DESTROYLOCK(&mgr->arc4_lock);
1747 isc_mempool_destroy(&mgr->epool);
1748 isc_mempool_destroy(&mgr->rpool);
1749 isc_mempool_destroy(&mgr->dpool);
1750 if (mgr->bpool != NULL)
1751 isc_mempool_destroy(&mgr->bpool);
1752 if (mgr->spool != NULL)
1753 isc_mempool_destroy(&mgr->spool);
1755 DESTROYLOCK(&mgr->pool_lock);
1758 if (mgr->entropy != NULL)
1759 isc_entropy_detach(&mgr->entropy);
1761 if (mgr->qid != NULL)
1762 qid_destroy(mctx, &mgr->qid);
1764 DESTROYLOCK(&mgr->buffer_lock);
1766 if (mgr->blackhole != NULL)
1767 dns_acl_detach(&mgr->blackhole);
1769 if (mgr->stats != NULL)
1770 isc_stats_detach(&mgr->stats);
1772 if (mgr->v4ports != NULL) {
1773 isc_mem_put(mctx, mgr->v4ports,
1774 mgr->nv4ports * sizeof(in_port_t));
1776 if (mgr->v6ports != NULL) {
1777 isc_mem_put(mctx, mgr->v6ports,
1778 mgr->nv6ports * sizeof(in_port_t));
1780 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1781 isc_mem_detach(&mctx);
1785 open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1786 unsigned int options, isc_socket_t **sockp)
1789 isc_result_t result;
1793 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1794 isc_sockettype_udp, &sock);
1795 if (result != ISC_R_SUCCESS)
1797 isc_socket_setname(sock, "dispatcher", NULL);
1800 result = isc_socket_open(sock);
1801 if (result != ISC_R_SUCCESS)
1808 #ifndef ISC_ALLOW_MAPPED
1809 isc_socket_ipv6only(sock, ISC_TRUE);
1811 result = isc_socket_bind(sock, local, options);
1812 if (result != ISC_R_SUCCESS) {
1814 isc_socket_detach(&sock);
1817 isc_socket_close(sock);
1826 return (ISC_R_SUCCESS);
1830 * Create a temporary port list to set the initial default set of dispatch
1831 * ports: [1024, 65535]. This is almost meaningless as the application will
1832 * normally set the ports explicitly, but is provided to fill some minor corner
1836 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1837 isc_result_t result;
1839 result = isc_portset_create(mctx, portsetp);
1840 if (result != ISC_R_SUCCESS)
1842 isc_portset_addrange(*portsetp, 1024, 65535);
1844 return (ISC_R_SUCCESS);
1852 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1853 dns_dispatchmgr_t **mgrp)
1855 dns_dispatchmgr_t *mgr;
1856 isc_result_t result;
1857 isc_portset_t *v4portset = NULL;
1858 isc_portset_t *v6portset = NULL;
1860 REQUIRE(mctx != NULL);
1861 REQUIRE(mgrp != NULL && *mgrp == NULL);
1863 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1865 return (ISC_R_NOMEMORY);
1868 isc_mem_attach(mctx, &mgr->mctx);
1870 mgr->blackhole = NULL;
1873 result = isc_mutex_init(&mgr->lock);
1874 if (result != ISC_R_SUCCESS)
1877 result = isc_mutex_init(&mgr->arc4_lock);
1878 if (result != ISC_R_SUCCESS)
1881 result = isc_mutex_init(&mgr->buffer_lock);
1882 if (result != ISC_R_SUCCESS)
1883 goto kill_arc4_lock;
1885 result = isc_mutex_init(&mgr->pool_lock);
1886 if (result != ISC_R_SUCCESS)
1887 goto kill_buffer_lock;
1890 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1891 &mgr->epool) != ISC_R_SUCCESS) {
1892 result = ISC_R_NOMEMORY;
1893 goto kill_pool_lock;
1897 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1898 &mgr->rpool) != ISC_R_SUCCESS) {
1899 result = ISC_R_NOMEMORY;
1904 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1905 &mgr->dpool) != ISC_R_SUCCESS) {
1906 result = ISC_R_NOMEMORY;
1910 isc_mempool_setname(mgr->epool, "dispmgr_epool");
1911 isc_mempool_setfreemax(mgr->epool, 1024);
1912 isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
1914 isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1915 isc_mempool_setfreemax(mgr->rpool, 1024);
1916 isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
1918 isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1919 isc_mempool_setfreemax(mgr->dpool, 1024);
1920 isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
1923 mgr->buffersize = 0;
1924 mgr->maxbuffers = 0;
1927 mgr->entropy = NULL;
1930 ISC_LIST_INIT(mgr->list);
1931 mgr->v4ports = NULL;
1932 mgr->v6ports = NULL;
1935 mgr->magic = DNS_DISPATCHMGR_MAGIC;
1937 result = create_default_portset(mctx, &v4portset);
1938 if (result == ISC_R_SUCCESS) {
1939 result = create_default_portset(mctx, &v6portset);
1940 if (result == ISC_R_SUCCESS) {
1941 result = dns_dispatchmgr_setavailports(mgr,
1946 if (v4portset != NULL)
1947 isc_portset_destroy(mctx, &v4portset);
1948 if (v6portset != NULL)
1949 isc_portset_destroy(mctx, &v6portset);
1950 if (result != ISC_R_SUCCESS)
1954 if (entropy != NULL)
1955 isc_entropy_attach(entropy, &mgr->entropy);
1960 dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
1963 return (ISC_R_SUCCESS);
1966 isc_mempool_destroy(&mgr->dpool);
1968 isc_mempool_destroy(&mgr->rpool);
1970 isc_mempool_destroy(&mgr->epool);
1972 DESTROYLOCK(&mgr->pool_lock);
1974 DESTROYLOCK(&mgr->buffer_lock);
1976 DESTROYLOCK(&mgr->arc4_lock);
1978 DESTROYLOCK(&mgr->lock);
1980 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1981 isc_mem_detach(&mctx);
1987 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1988 REQUIRE(VALID_DISPATCHMGR(mgr));
1989 if (mgr->blackhole != NULL)
1990 dns_acl_detach(&mgr->blackhole);
1991 dns_acl_attach(blackhole, &mgr->blackhole);
1995 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
1996 REQUIRE(VALID_DISPATCHMGR(mgr));
1997 return (mgr->blackhole);
2001 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
2002 dns_portlist_t *portlist)
2004 REQUIRE(VALID_DISPATCHMGR(mgr));
2007 /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
2012 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
2013 REQUIRE(VALID_DISPATCHMGR(mgr));
2014 return (NULL); /* this function is deprecated */
2018 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
2019 isc_portset_t *v6portset)
2021 in_port_t *v4ports, *v6ports, p;
2022 unsigned int nv4ports, nv6ports, i4, i6;
2024 REQUIRE(VALID_DISPATCHMGR(mgr));
2026 nv4ports = isc_portset_nports(v4portset);
2027 nv6ports = isc_portset_nports(v6portset);
2030 if (nv4ports != 0) {
2031 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
2032 if (v4ports == NULL)
2033 return (ISC_R_NOMEMORY);
2036 if (nv6ports != 0) {
2037 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
2038 if (v6ports == NULL) {
2039 if (v4ports != NULL) {
2040 isc_mem_put(mgr->mctx, v4ports,
2042 isc_portset_nports(v4portset));
2044 return (ISC_R_NOMEMORY);
2052 if (isc_portset_isset(v4portset, p)) {
2053 INSIST(i4 < nv4ports);
2056 if (isc_portset_isset(v6portset, p)) {
2057 INSIST(i6 < nv6ports);
2060 } while (p++ < 65535);
2061 INSIST(i4 == nv4ports && i6 == nv6ports);
2064 if (mgr->v4ports != NULL) {
2065 isc_mem_put(mgr->mctx, mgr->v4ports,
2066 mgr->nv4ports * sizeof(in_port_t));
2068 mgr->v4ports = v4ports;
2069 mgr->nv4ports = nv4ports;
2071 if (mgr->v6ports != NULL) {
2072 isc_mem_put(mgr->mctx, mgr->v6ports,
2073 mgr->nv6ports * sizeof(in_port_t));
2075 mgr->v6ports = v6ports;
2076 mgr->nv6ports = nv6ports;
2079 return (ISC_R_SUCCESS);
2083 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2084 unsigned int buffersize, unsigned int maxbuffers,
2085 unsigned int maxrequests, unsigned int buckets,
2086 unsigned int increment)
2088 isc_result_t result;
2090 REQUIRE(VALID_DISPATCHMGR(mgr));
2091 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2092 REQUIRE(maxbuffers > 0);
2093 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2094 REQUIRE(increment > buckets);
2097 * Keep some number of items around. This should be a config
2098 * option. For now, keep 8, but later keep at least two even
2099 * if the caller wants less. This allows us to ensure certain
2100 * things, like an event can be "freed" and the next allocation
2101 * will always succeed.
2103 * Note that if limits are placed on anything here, we use one
2104 * event internally, so the actual limit should be "wanted + 1."
2112 LOCK(&mgr->buffer_lock);
2114 /* Create or adjust buffer pool */
2115 if (mgr->bpool != NULL) {
2117 * We only increase the maxbuffers to avoid accidental buffer
2118 * shortage. Ideally we'd separate the manager-wide maximum
2119 * from per-dispatch limits and respect the latter within the
2120 * global limit. But at this moment that's deemed to be
2121 * overkilling and isn't worth additional implementation
2124 if (maxbuffers > mgr->maxbuffers) {
2125 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2126 mgr->maxbuffers = maxbuffers;
2129 result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2130 if (result != ISC_R_SUCCESS) {
2131 UNLOCK(&mgr->buffer_lock);
2134 isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2135 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2136 isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
2139 /* Create or adjust socket pool */
2140 if (mgr->spool != NULL) {
2141 isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2142 UNLOCK(&mgr->buffer_lock);
2143 return (ISC_R_SUCCESS);
2145 result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2147 if (result != ISC_R_SUCCESS) {
2148 UNLOCK(&mgr->buffer_lock);
2151 isc_mempool_setname(mgr->spool, "dispmgr_spool");
2152 isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2153 isc_mempool_associatelock(mgr->spool, &mgr->pool_lock);
2155 result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
2156 if (result != ISC_R_SUCCESS)
2159 mgr->buffersize = buffersize;
2160 mgr->maxbuffers = maxbuffers;
2161 UNLOCK(&mgr->buffer_lock);
2162 return (ISC_R_SUCCESS);
2165 isc_mempool_destroy(&mgr->bpool);
2166 if (mgr->spool != NULL)
2167 isc_mempool_destroy(&mgr->spool);
2168 UNLOCK(&mgr->buffer_lock);
2173 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2174 dns_dispatchmgr_t *mgr;
2175 isc_boolean_t killit;
2177 REQUIRE(mgrp != NULL);
2178 REQUIRE(VALID_DISPATCHMGR(*mgrp));
2184 mgr->state |= MGR_SHUTTINGDOWN;
2186 killit = destroy_mgr_ok(mgr);
2189 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2196 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2197 REQUIRE(VALID_DISPATCHMGR(mgr));
2198 REQUIRE(ISC_LIST_EMPTY(mgr->list));
2199 REQUIRE(mgr->stats == NULL);
2201 isc_stats_attach(stats, &mgr->stats);
2205 port_cmp(const void *key, const void *ent) {
2206 in_port_t p1 = *(const in_port_t *)key;
2207 in_port_t p2 = *(const in_port_t *)ent;
2217 static isc_boolean_t
2218 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2219 isc_sockaddr_t *sockaddrp)
2221 isc_sockaddr_t sockaddr;
2222 isc_result_t result;
2223 in_port_t *ports, port;
2224 unsigned int nports;
2225 isc_boolean_t available = ISC_FALSE;
2227 REQUIRE(sock != NULL || sockaddrp != NULL);
2231 sockaddrp = &sockaddr;
2232 result = isc_socket_getsockname(sock, sockaddrp);
2233 if (result != ISC_R_SUCCESS)
2237 if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2238 ports = mgr->v4ports;
2239 nports = mgr->nv4ports;
2241 ports = mgr->v6ports;
2242 nports = mgr->nv6ports;
2247 port = isc_sockaddr_getport(sockaddrp);
2248 if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2249 available = ISC_TRUE;
2256 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2258 static isc_boolean_t
2259 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2260 isc_sockaddr_t sockaddr;
2261 isc_result_t result;
2263 REQUIRE(disp->socket != NULL);
2269 * Don't match wildcard ports unless the port is available in the
2270 * current configuration.
2272 if (isc_sockaddr_getport(addr) == 0 &&
2273 isc_sockaddr_getport(&disp->local) == 0 &&
2274 !portavailable(disp->mgr, disp->socket, NULL)) {
2279 * Check if we match the binding <address,port>.
2280 * Wildcard ports match/fail here.
2282 if (isc_sockaddr_equal(&disp->local, addr))
2284 if (isc_sockaddr_getport(addr) == 0)
2288 * Check if we match a bound wildcard port <address,port>.
2290 if (!isc_sockaddr_eqaddr(&disp->local, addr))
2292 result = isc_socket_getsockname(disp->socket, &sockaddr);
2293 if (result != ISC_R_SUCCESS)
2296 return (isc_sockaddr_equal(&sockaddr, addr));
2300 * Requires mgr be locked.
2302 * No dispatcher can be locked by this thread when calling this function.
2306 * If a matching dispatcher is found, it is locked after this function
2307 * returns, and must be unlocked by the caller.
2310 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2311 unsigned int attributes, unsigned int mask,
2312 dns_dispatch_t **dispp)
2314 dns_dispatch_t *disp;
2315 isc_result_t result;
2318 * Make certain that we will not match a private or exclusive dispatch.
2320 attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2321 mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2323 disp = ISC_LIST_HEAD(mgr->list);
2324 while (disp != NULL) {
2326 if ((disp->shutting_down == 0)
2327 && ATTRMATCH(disp->attributes, attributes, mask)
2328 && local_addr_match(disp, local))
2330 UNLOCK(&disp->lock);
2331 disp = ISC_LIST_NEXT(disp, link);
2335 result = ISC_R_NOTFOUND;
2340 result = ISC_R_SUCCESS;
2347 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2348 unsigned int increment, dns_qid_t **qidp,
2349 isc_boolean_t needsocktable)
2353 isc_result_t result;
2355 REQUIRE(VALID_DISPATCHMGR(mgr));
2356 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2357 REQUIRE(increment > buckets);
2358 REQUIRE(qidp != NULL && *qidp == NULL);
2360 qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2362 return (ISC_R_NOMEMORY);
2364 qid->qid_table = isc_mem_get(mgr->mctx,
2365 buckets * sizeof(dns_displist_t));
2366 if (qid->qid_table == NULL) {
2367 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2368 return (ISC_R_NOMEMORY);
2371 qid->sock_table = NULL;
2372 if (needsocktable) {
2373 qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2374 sizeof(dispsocketlist_t));
2375 if (qid->sock_table == NULL) {
2376 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2377 isc_mem_put(mgr->mctx, qid->qid_table,
2378 buckets * sizeof(dns_displist_t));
2379 return (ISC_R_NOMEMORY);
2383 result = isc_mutex_init(&qid->lock);
2384 if (result != ISC_R_SUCCESS) {
2385 if (qid->sock_table != NULL) {
2386 isc_mem_put(mgr->mctx, qid->sock_table,
2387 buckets * sizeof(dispsocketlist_t));
2389 isc_mem_put(mgr->mctx, qid->qid_table,
2390 buckets * sizeof(dns_displist_t));
2391 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2395 for (i = 0; i < buckets; i++) {
2396 ISC_LIST_INIT(qid->qid_table[i]);
2397 if (qid->sock_table != NULL)
2398 ISC_LIST_INIT(qid->sock_table[i]);
2401 qid->qid_nbuckets = buckets;
2402 qid->qid_increment = increment;
2403 qid->magic = QID_MAGIC;
2405 return (ISC_R_SUCCESS);
2409 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2412 REQUIRE(qidp != NULL);
2415 REQUIRE(VALID_QID(qid));
2419 isc_mem_put(mctx, qid->qid_table,
2420 qid->qid_nbuckets * sizeof(dns_displist_t));
2421 if (qid->sock_table != NULL) {
2422 isc_mem_put(mctx, qid->sock_table,
2423 qid->qid_nbuckets * sizeof(dispsocketlist_t));
2425 DESTROYLOCK(&qid->lock);
2426 isc_mem_put(mctx, qid, sizeof(*qid));
2430 * Allocate and set important limits.
2433 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2434 dns_dispatch_t **dispp)
2436 dns_dispatch_t *disp;
2437 isc_result_t result;
2439 REQUIRE(VALID_DISPATCHMGR(mgr));
2440 REQUIRE(dispp != NULL && *dispp == NULL);
2443 * Set up the dispatcher, mostly. Don't bother setting some of
2444 * the options that are controlled by tcp vs. udp, etc.
2447 disp = isc_mempool_get(mgr->dpool);
2449 return (ISC_R_NOMEMORY);
2453 disp->maxrequests = maxrequests;
2454 disp->attributes = 0;
2455 ISC_LINK_INIT(disp, link);
2457 disp->recv_pending = 0;
2458 memset(&disp->local, 0, sizeof(disp->local));
2459 disp->localport = 0;
2460 disp->shutting_down = 0;
2461 disp->shutdown_out = 0;
2462 disp->connected = 0;
2463 disp->tcpmsg_valid = 0;
2464 disp->shutdown_why = ISC_R_UNEXPECTED;
2466 disp->tcpbuffers = 0;
2468 ISC_LIST_INIT(disp->activesockets);
2469 ISC_LIST_INIT(disp->inactivesockets);
2471 dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL);
2472 disp->port_table = NULL;
2473 disp->portpool = NULL;
2475 result = isc_mutex_init(&disp->lock);
2476 if (result != ISC_R_SUCCESS)
2479 disp->failsafe_ev = allocate_event(disp);
2480 if (disp->failsafe_ev == NULL) {
2481 result = ISC_R_NOMEMORY;
2485 disp->magic = DISPATCH_MAGIC;
2488 return (ISC_R_SUCCESS);
2494 DESTROYLOCK(&disp->lock);
2496 isc_mempool_put(mgr->dpool, disp);
2503 * MUST be unlocked, and not used by anything.
2506 dispatch_free(dns_dispatch_t **dispp)
2508 dns_dispatch_t *disp;
2509 dns_dispatchmgr_t *mgr;
2512 REQUIRE(VALID_DISPATCH(*dispp));
2517 REQUIRE(VALID_DISPATCHMGR(mgr));
2519 if (disp->tcpmsg_valid) {
2520 dns_tcpmsg_invalidate(&disp->tcpmsg);
2521 disp->tcpmsg_valid = 0;
2524 INSIST(disp->tcpbuffers == 0);
2525 INSIST(disp->requests == 0);
2526 INSIST(disp->recv_pending == 0);
2527 INSIST(ISC_LIST_EMPTY(disp->activesockets));
2528 INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2530 isc_mempool_put(mgr->epool, disp->failsafe_ev);
2531 disp->failsafe_ev = NULL;
2533 if (disp->qid != NULL)
2534 qid_destroy(mgr->mctx, &disp->qid);
2536 if (disp->port_table != NULL) {
2537 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2538 INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2539 isc_mem_put(mgr->mctx, disp->port_table,
2540 sizeof(disp->port_table[0]) *
2541 DNS_DISPATCH_PORTTABLESIZE);
2544 if (disp->portpool != NULL)
2545 isc_mempool_destroy(&disp->portpool);
2548 DESTROYLOCK(&disp->lock);
2550 isc_mempool_put(mgr->dpool, disp);
2554 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2555 isc_taskmgr_t *taskmgr, unsigned int buffersize,
2556 unsigned int maxbuffers, unsigned int maxrequests,
2557 unsigned int buckets, unsigned int increment,
2558 unsigned int attributes, dns_dispatch_t **dispp)
2560 isc_result_t result;
2561 dns_dispatch_t *disp;
2566 REQUIRE(VALID_DISPATCHMGR(mgr));
2567 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2568 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2569 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2571 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */
2576 * dispatch_allocate() checks mgr for us.
2577 * qid_allocate() checks buckets and increment for us.
2580 result = dispatch_allocate(mgr, maxrequests, &disp);
2581 if (result != ISC_R_SUCCESS) {
2586 result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2587 if (result != ISC_R_SUCCESS)
2588 goto deallocate_dispatch;
2590 disp->socktype = isc_sockettype_tcp;
2591 disp->socket = NULL;
2592 isc_socket_attach(sock, &disp->socket);
2595 disp->task[0] = NULL;
2596 result = isc_task_create(taskmgr, 0, &disp->task[0]);
2597 if (result != ISC_R_SUCCESS)
2600 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2601 DNS_EVENT_DISPATCHCONTROL,
2603 sizeof(isc_event_t));
2604 if (disp->ctlevent == NULL) {
2605 result = ISC_R_NOMEMORY;
2609 isc_task_setname(disp->task[0], "tcpdispatch", disp);
2611 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2612 disp->tcpmsg_valid = 1;
2614 disp->attributes = attributes;
2617 * Append it to the dispatcher list.
2619 ISC_LIST_APPEND(mgr->list, disp, link);
2622 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2623 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2627 return (ISC_R_SUCCESS);
2633 isc_task_detach(&disp->task[0]);
2635 isc_socket_detach(&disp->socket);
2636 deallocate_dispatch:
2637 dispatch_free(&disp);
2645 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2646 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2647 unsigned int buffersize,
2648 unsigned int maxbuffers, unsigned int maxrequests,
2649 unsigned int buckets, unsigned int increment,
2650 unsigned int attributes, unsigned int mask,
2651 dns_dispatch_t **dispp)
2653 isc_result_t result;
2654 dns_dispatch_t *disp = NULL;
2656 REQUIRE(VALID_DISPATCHMGR(mgr));
2657 REQUIRE(sockmgr != NULL);
2658 REQUIRE(localaddr != NULL);
2659 REQUIRE(taskmgr != NULL);
2660 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2661 REQUIRE(maxbuffers > 0);
2662 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2663 REQUIRE(increment > buckets);
2664 REQUIRE(dispp != NULL && *dispp == NULL);
2665 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2667 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2668 maxrequests, buckets, increment);
2669 if (result != ISC_R_SUCCESS)
2674 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2675 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2680 * See if we have a dispatcher that matches.
2682 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2683 if (result == ISC_R_SUCCESS) {
2686 if (disp->maxrequests < maxrequests)
2687 disp->maxrequests = maxrequests;
2689 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
2690 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2692 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2693 if (disp->recv_pending != 0)
2694 isc_socket_cancel(disp->socket, disp->task[0],
2695 ISC_SOCKCANCEL_RECV);
2698 UNLOCK(&disp->lock);
2703 return (ISC_R_SUCCESS);
2710 result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2711 maxrequests, attributes, &disp);
2712 if (result != ISC_R_SUCCESS) {
2719 return (ISC_R_SUCCESS);
2723 * mgr should be locked.
2726 #ifndef DNS_DISPATCH_HELD
2727 #define DNS_DISPATCH_HELD 20U
2731 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2732 isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2733 isc_socket_t **sockp)
2736 isc_socket_t *held[DNS_DISPATCH_HELD];
2737 isc_sockaddr_t localaddr_bound;
2738 isc_socket_t *sock = NULL;
2739 isc_result_t result = ISC_R_SUCCESS;
2740 isc_boolean_t anyport;
2742 INSIST(sockp != NULL && *sockp == NULL);
2744 localaddr_bound = *localaddr;
2745 anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2748 unsigned int nports;
2752 * If no port is specified, we first try to pick up a random
2753 * port by ourselves.
2755 if (isc_sockaddr_pf(&disp->local) == AF_INET) {
2756 nports = disp->mgr->nv4ports;
2757 ports = disp->mgr->v4ports;
2759 nports = disp->mgr->nv6ports;
2760 ports = disp->mgr->v6ports;
2763 return (ISC_R_ADDRNOTAVAIL);
2765 for (i = 0; i < 1024; i++) {
2768 prt = ports[dispatch_uniformrandom(
2771 isc_sockaddr_setport(&localaddr_bound, prt);
2772 result = open_socket(sockmgr, &localaddr_bound,
2774 if (result == ISC_R_SUCCESS ||
2775 result != ISC_R_ADDRINUSE) {
2776 disp->localport = prt;
2783 * If this fails 1024 times, we then ask the kernel for
2787 /* Allow to reuse address for non-random ports. */
2788 result = open_socket(sockmgr, localaddr,
2789 ISC_SOCKET_REUSEADDRESS, &sock);
2791 if (result == ISC_R_SUCCESS)
2797 memset(held, 0, sizeof(held));
2800 for (j = 0; j < 0xffffU; j++) {
2801 result = open_socket(sockmgr, localaddr, 0, &sock);
2802 if (result != ISC_R_SUCCESS)
2806 else if (portavailable(mgr, sock, NULL))
2808 if (held[i] != NULL)
2809 isc_socket_detach(&held[i]);
2812 if (i == DNS_DISPATCH_HELD)
2816 mgr_log(mgr, ISC_LOG_ERROR,
2817 "avoid-v%s-udp-ports: unable to allocate "
2818 "an available port",
2819 isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2820 result = ISC_R_FAILURE;
2826 for (i = 0; i < DNS_DISPATCH_HELD; i++) {
2827 if (held[i] != NULL)
2828 isc_socket_detach(&held[i]);
2835 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2836 isc_taskmgr_t *taskmgr,
2837 isc_sockaddr_t *localaddr,
2838 unsigned int maxrequests,
2839 unsigned int attributes,
2840 dns_dispatch_t **dispp)
2842 isc_result_t result;
2843 dns_dispatch_t *disp;
2844 isc_socket_t *sock = NULL;
2848 * dispatch_allocate() checks mgr for us.
2851 result = dispatch_allocate(mgr, maxrequests, &disp);
2852 if (result != ISC_R_SUCCESS)
2855 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
2856 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock);
2857 if (result != ISC_R_SUCCESS)
2858 goto deallocate_dispatch;
2860 isc_sockaddr_t sa_any;
2863 * For dispatches using exclusive sockets with a specific
2864 * source address, we only check if the specified address is
2865 * available on the system. Query sockets will be created later
2868 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
2869 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
2870 result = open_socket(sockmgr, localaddr, 0, &sock);
2872 isc_socket_detach(&sock);
2873 if (result != ISC_R_SUCCESS)
2874 goto deallocate_dispatch;
2877 disp->port_table = isc_mem_get(mgr->mctx,
2878 sizeof(disp->port_table[0]) *
2879 DNS_DISPATCH_PORTTABLESIZE);
2880 if (disp->port_table == NULL)
2881 goto deallocate_dispatch;
2882 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2883 ISC_LIST_INIT(disp->port_table[i]);
2885 result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
2887 if (result != ISC_R_SUCCESS)
2888 goto deallocate_dispatch;
2889 isc_mempool_setname(disp->portpool, "disp_portpool");
2890 isc_mempool_setfreemax(disp->portpool, 128);
2892 disp->socktype = isc_sockettype_udp;
2893 disp->socket = sock;
2894 disp->local = *localaddr;
2896 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
2897 disp->ntasks = MAX_INTERNAL_TASKS;
2900 for (i = 0; i < disp->ntasks; i++) {
2901 disp->task[i] = NULL;
2902 result = isc_task_create(taskmgr, 0, &disp->task[i]);
2903 if (result != ISC_R_SUCCESS) {
2905 isc_task_shutdown(disp->task[i]);
2906 isc_task_detach(&disp->task[i]);
2910 isc_task_setname(disp->task[i], "udpdispatch", disp);
2913 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2914 DNS_EVENT_DISPATCHCONTROL,
2916 sizeof(isc_event_t));
2917 if (disp->ctlevent == NULL) {
2918 result = ISC_R_NOMEMORY;
2922 attributes &= ~DNS_DISPATCHATTR_TCP;
2923 attributes |= DNS_DISPATCHATTR_UDP;
2924 disp->attributes = attributes;
2927 * Append it to the dispatcher list.
2929 ISC_LIST_APPEND(mgr->list, disp, link);
2931 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
2932 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
2933 if (disp->socket != NULL)
2934 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
2943 for (i = 0; i < disp->ntasks; i++)
2944 isc_task_detach(&disp->task[i]);
2946 if (disp->socket != NULL)
2947 isc_socket_detach(&disp->socket);
2948 deallocate_dispatch:
2949 dispatch_free(&disp);
2955 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
2956 REQUIRE(VALID_DISPATCH(disp));
2957 REQUIRE(dispp != NULL && *dispp == NULL);
2961 UNLOCK(&disp->lock);
2967 * It is important to lock the manager while we are deleting the dispatch,
2968 * since dns_dispatch_getudp will call dispatch_find, which returns to
2969 * the caller a dispatch but does not attach to it until later. _getudp
2970 * locks the manager, however, so locking it here will keep us from attaching
2971 * to a dispatcher that is in the process of going away.
2974 dns_dispatch_detach(dns_dispatch_t **dispp) {
2975 dns_dispatch_t *disp;
2976 dispsocket_t *dispsock;
2977 isc_boolean_t killit;
2979 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2986 INSIST(disp->refcount > 0);
2989 if (disp->refcount == 0) {
2990 if (disp->recv_pending > 0)
2991 isc_socket_cancel(disp->socket, disp->task[0],
2992 ISC_SOCKCANCEL_RECV);
2993 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
2995 dispsock = ISC_LIST_NEXT(dispsock, link)) {
2996 isc_socket_cancel(dispsock->socket, dispsock->task,
2997 ISC_SOCKCANCEL_RECV);
2999 disp->shutting_down = 1;
3002 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
3004 killit = destroy_disp_ok(disp);
3005 UNLOCK(&disp->lock);
3007 isc_task_send(disp->task[0], &disp->ctlevent);
3011 dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3012 isc_task_t *task, isc_taskaction_t action, void *arg,
3013 dns_messageid_t *idp, dns_dispentry_t **resp,
3014 isc_socketmgr_t *sockmgr)
3016 dns_dispentry_t *res;
3017 unsigned int bucket;
3018 in_port_t localport = 0;
3023 dispsocket_t *dispsocket = NULL;
3024 isc_result_t result;
3026 REQUIRE(VALID_DISPATCH(disp));
3027 REQUIRE(task != NULL);
3028 REQUIRE(dest != NULL);
3029 REQUIRE(resp != NULL && *resp == NULL);
3030 REQUIRE(idp != NULL);
3031 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3032 REQUIRE(sockmgr != NULL);
3036 if (disp->shutting_down == 1) {
3037 UNLOCK(&disp->lock);
3038 return (ISC_R_SHUTTINGDOWN);
3041 if (disp->requests >= disp->maxrequests) {
3042 UNLOCK(&disp->lock);
3043 return (ISC_R_QUOTA);
3046 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
3047 disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
3048 dispsocket_t *oldestsocket;
3049 dns_dispentry_t *oldestresp;
3050 dns_dispatchevent_t *rev;
3053 * Kill oldest outstanding query if the number of sockets
3054 * exceeds the quota to keep the room for new queries.
3056 oldestsocket = ISC_LIST_HEAD(disp->activesockets);
3057 oldestresp = oldestsocket->resp;
3058 if (oldestresp != NULL && !oldestresp->item_out) {
3059 rev = allocate_event(oldestresp->disp);
3061 rev->buffer.base = NULL;
3062 rev->result = ISC_R_CANCELED;
3063 rev->id = oldestresp->id;
3064 ISC_EVENT_INIT(rev, sizeof(*rev), 0,
3065 NULL, DNS_EVENT_DISPATCH,
3067 oldestresp->arg, oldestresp,
3069 oldestresp->item_out = ISC_TRUE;
3070 isc_task_send(oldestresp->task,
3071 ISC_EVENT_PTR(&rev));
3072 inc_stats(disp->mgr,
3073 dns_resstatscounter_dispabort);
3078 * Move this entry to the tail so that it won't (easily) be
3079 * examined before actually being canceled.
3081 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3082 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3085 qid = DNS_QID(disp);
3088 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3090 * Get a separate UDP socket with a random port number.
3092 result = get_dispsocket(disp, dest, sockmgr, qid, &dispsocket,
3094 if (result != ISC_R_SUCCESS) {
3096 UNLOCK(&disp->lock);
3097 inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3101 localport = disp->localport;
3105 * Try somewhat hard to find an unique ID.
3107 id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp));
3108 bucket = dns_hash(qid, dest, id, localport);
3110 for (i = 0; i < 64; i++) {
3111 if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3115 id += qid->qid_increment;
3117 bucket = dns_hash(qid, dest, id, localport);
3122 UNLOCK(&disp->lock);
3123 return (ISC_R_NOMORE);
3126 res = isc_mempool_get(disp->mgr->rpool);
3129 UNLOCK(&disp->lock);
3130 if (dispsocket != NULL)
3131 destroy_dispsocket(disp, &dispsocket);
3132 return (ISC_R_NOMEMORY);
3138 isc_task_attach(task, &res->task);
3141 res->port = localport;
3142 res->bucket = bucket;
3144 res->action = action;
3146 res->dispsocket = dispsocket;
3147 if (dispsocket != NULL)
3148 dispsocket->resp = res;
3149 res->item_out = ISC_FALSE;
3150 ISC_LIST_INIT(res->items);
3151 ISC_LINK_INIT(res, link);
3152 res->magic = RESPONSE_MAGIC;
3153 ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3156 request_log(disp, res, LVL(90),
3157 "attached to task %p", res->task);
3159 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3160 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3161 result = startrecv(disp, dispsocket);
3162 if (result != ISC_R_SUCCESS) {
3164 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3167 if (dispsocket != NULL)
3168 destroy_dispsocket(disp, &dispsocket);
3173 UNLOCK(&disp->lock);
3174 isc_task_detach(&res->task);
3175 isc_mempool_put(disp->mgr->rpool, res);
3180 if (dispsocket != NULL)
3181 ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3183 UNLOCK(&disp->lock);
3188 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3189 INSIST(res->dispsocket != NULL);
3191 return (ISC_R_SUCCESS);
3195 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3196 isc_task_t *task, isc_taskaction_t action, void *arg,
3197 dns_messageid_t *idp, dns_dispentry_t **resp)
3199 REQUIRE(VALID_DISPATCH(disp));
3200 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3202 return (dns_dispatch_addresponse2(disp, dest, task, action, arg,
3207 dns_dispatch_starttcp(dns_dispatch_t *disp) {
3209 REQUIRE(VALID_DISPATCH(disp));
3211 dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3214 disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3215 (void)startrecv(disp, NULL);
3216 UNLOCK(&disp->lock);
3220 dns_dispatch_removeresponse(dns_dispentry_t **resp,
3221 dns_dispatchevent_t **sockevent)
3223 dns_dispatchmgr_t *mgr;
3224 dns_dispatch_t *disp;
3225 dns_dispentry_t *res;
3226 dispsocket_t *dispsock;
3227 dns_dispatchevent_t *ev;
3228 unsigned int bucket;
3229 isc_boolean_t killit;
3231 isc_eventlist_t events;
3234 REQUIRE(resp != NULL);
3235 REQUIRE(VALID_RESPONSE(*resp));
3241 REQUIRE(VALID_DISPATCH(disp));
3243 REQUIRE(VALID_DISPATCHMGR(mgr));
3245 qid = DNS_QID(disp);
3247 if (sockevent != NULL) {
3248 REQUIRE(*sockevent != NULL);
3257 INSIST(disp->requests > 0);
3259 INSIST(disp->refcount > 0);
3262 if (disp->refcount == 0) {
3263 if (disp->recv_pending > 0)
3264 isc_socket_cancel(disp->socket, disp->task[0],
3265 ISC_SOCKCANCEL_RECV);
3266 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3268 dispsock = ISC_LIST_NEXT(dispsock, link)) {
3269 isc_socket_cancel(dispsock->socket, dispsock->task,
3270 ISC_SOCKCANCEL_RECV);
3272 disp->shutting_down = 1;
3275 bucket = res->bucket;
3278 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3281 if (ev == NULL && res->item_out) {
3283 * We've posted our event, but the caller hasn't gotten it
3284 * yet. Take it back.
3286 ISC_LIST_INIT(events);
3287 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3290 * We had better have gotten it back.
3293 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3297 REQUIRE(res->item_out == ISC_TRUE);
3298 res->item_out = ISC_FALSE;
3299 if (ev->buffer.base != NULL)
3300 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3301 free_event(disp, ev);
3304 request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3305 isc_task_detach(&res->task);
3307 if (res->dispsocket != NULL) {
3308 isc_socket_cancel(res->dispsocket->socket,
3309 res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3310 res->dispsocket->resp = NULL;
3314 * Free any buffered requests as well
3316 ev = ISC_LIST_HEAD(res->items);
3317 while (ev != NULL) {
3318 ISC_LIST_UNLINK(res->items, ev, ev_link);
3319 if (ev->buffer.base != NULL)
3320 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3321 free_event(disp, ev);
3322 ev = ISC_LIST_HEAD(res->items);
3325 isc_mempool_put(disp->mgr->rpool, res);
3326 if (disp->shutting_down == 1)
3329 (void)startrecv(disp, NULL);
3331 killit = destroy_disp_ok(disp);
3332 UNLOCK(&disp->lock);
3334 isc_task_send(disp->task[0], &disp->ctlevent);
3338 do_cancel(dns_dispatch_t *disp) {
3339 dns_dispatchevent_t *ev;
3340 dns_dispentry_t *resp;
3343 if (disp->shutdown_out == 1)
3346 qid = DNS_QID(disp);
3349 * Search for the first response handler without packets outstanding
3350 * unless a specific hander is given.
3353 for (resp = linear_first(qid);
3354 resp != NULL && resp->item_out;
3356 resp = linear_next(qid, resp);
3359 * No one to send the cancel event to, so nothing to do.
3365 * Send the shutdown failsafe event to this resp.
3367 ev = disp->failsafe_ev;
3368 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3369 resp->action, resp->arg, resp, NULL, NULL);
3370 ev->result = disp->shutdown_why;
3371 ev->buffer.base = NULL;
3372 ev->buffer.length = 0;
3373 disp->shutdown_out = 1;
3374 request_log(disp, resp, LVL(10),
3375 "cancel: failsafe event %p -> task %p",
3377 resp->item_out = ISC_TRUE;
3378 isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3384 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3385 REQUIRE(VALID_DISPATCH(disp));
3387 return (disp->socket);
3391 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3392 REQUIRE(VALID_RESPONSE(resp));
3394 if (resp->dispsocket != NULL)
3395 return (resp->dispsocket->socket);
3401 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3403 REQUIRE(VALID_DISPATCH(disp));
3404 REQUIRE(addrp != NULL);
3406 if (disp->socktype == isc_sockettype_udp) {
3407 *addrp = disp->local;
3408 return (ISC_R_SUCCESS);
3410 return (ISC_R_NOTIMPLEMENTED);
3414 dns_dispatch_cancel(dns_dispatch_t *disp) {
3415 REQUIRE(VALID_DISPATCH(disp));
3419 if (disp->shutting_down == 1) {
3420 UNLOCK(&disp->lock);
3424 disp->shutdown_why = ISC_R_CANCELED;
3425 disp->shutting_down = 1;
3428 UNLOCK(&disp->lock);
3434 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3435 REQUIRE(VALID_DISPATCH(disp));
3438 * We don't bother locking disp here; it's the caller's responsibility
3439 * to use only non volatile flags.
3441 return (disp->attributes);
3445 dns_dispatch_changeattributes(dns_dispatch_t *disp,
3446 unsigned int attributes, unsigned int mask)
3448 REQUIRE(VALID_DISPATCH(disp));
3449 /* Exclusive attribute can only be set on creation */
3450 REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3451 /* Also, a dispatch with randomport specified cannot start listening */
3452 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3453 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3456 * Should check for valid attributes here!
3461 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3462 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3463 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3464 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3465 (void)startrecv(disp, NULL);
3466 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3468 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3469 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3470 if (disp->recv_pending != 0)
3471 isc_socket_cancel(disp->socket, disp->task[0],
3472 ISC_SOCKCANCEL_RECV);
3476 disp->attributes &= ~mask;
3477 disp->attributes |= (attributes & mask);
3478 UNLOCK(&disp->lock);
3482 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3484 isc_socketevent_t *sevent, *newsevent;
3486 REQUIRE(VALID_DISPATCH(disp));
3487 REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3488 REQUIRE(event != NULL);
3490 sevent = (isc_socketevent_t *)event;
3492 INSIST(sevent->n <= disp->mgr->buffersize);
3493 newsevent = (isc_socketevent_t *)
3494 isc_event_allocate(disp->mgr->mctx, NULL,
3495 DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3496 disp, sizeof(isc_socketevent_t));
3497 if (newsevent == NULL)
3500 buf = allocate_udp_buffer(disp);
3502 isc_event_free(ISC_EVENT_PTR(&newsevent));
3505 memcpy(buf, sevent->region.base, sevent->n);
3506 newsevent->region.base = buf;
3507 newsevent->region.length = disp->mgr->buffersize;
3508 newsevent->n = sevent->n;
3509 newsevent->result = sevent->result;
3510 newsevent->address = sevent->address;
3511 newsevent->timestamp = sevent->timestamp;
3512 newsevent->pktinfo = sevent->pktinfo;
3513 newsevent->attributes = sevent->attributes;
3515 isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3520 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3521 dns_dispatch_t *disp;
3524 disp = ISC_LIST_HEAD(mgr->list);
3525 while (disp != NULL) {
3526 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3527 printf("\tdispatch %p, addr %s\n", disp, foo);
3528 disp = ISC_LIST_NEXT(disp, link);