2 * Copyright (C) 2004-2008 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: dispatch.c,v 1.116.18.37 2008/09/04 00:24:41 jinmei Exp $ */
25 #include <sys/types.h>
29 #include <isc/entropy.h>
31 #include <isc/mutex.h>
32 #include <isc/portset.h>
33 #include <isc/print.h>
34 #include <isc/random.h>
35 #include <isc/string.h>
41 #include <dns/dispatch.h>
42 #include <dns/events.h>
44 #include <dns/message.h>
45 #include <dns/portlist.h>
46 #include <dns/tcpmsg.h>
47 #include <dns/types.h>
49 typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
51 typedef struct dispsocket dispsocket_t;
52 typedef ISC_LIST(dispsocket_t) dispsocketlist_t;
54 /* ARC4 Random generator state */
55 typedef struct arc4ctx {
60 isc_entropy_t *entropy; /*%< entropy source for ARC4 */
64 typedef struct dns_qid {
66 unsigned int qid_nbuckets; /*%< hash table size */
67 unsigned int qid_increment; /*%< id increment on collision */
69 dns_displist_t *qid_table; /*%< the table itself */
70 dispsocketlist_t *sock_table; /*%< socket table */
73 struct dns_dispatchmgr {
78 dns_portlist_t *portlist;
79 isc_entropy_t *entropy; /*%< entropy source */
81 /* Locked by "lock". */
84 ISC_LIST(dns_dispatch_t) list;
86 /* Locked by arc4_lock. */
87 isc_mutex_t arc4_lock;
88 arc4ctx_t arc4ctx; /*%< ARC4 context for QID */
90 /* locked by buffer lock */
92 isc_mutex_t buffer_lock;
93 unsigned int buffers; /*%< allocated buffers */
94 unsigned int buffersize; /*%< size of each buffer */
95 unsigned int maxbuffers; /*%< max buffers */
97 /* Locked internally. */
98 isc_mutex_t pool_lock;
99 isc_mempool_t *epool; /*%< memory pool for events */
100 isc_mempool_t *rpool; /*%< memory pool for replies */
101 isc_mempool_t *dpool; /*%< dispatch allocations */
102 isc_mempool_t *bpool; /*%< memory pool for buffers */
103 isc_mempool_t *spool; /*%< memory pool for dispsocs */
106 * Locked by qid->lock if qid exists; otherwise, can be used without
108 * Memory footprint considerations: this is a simple implementation of
109 * available ports, i.e., an ordered array of the actual port numbers.
110 * This will require about 256KB of memory in the worst case (128KB for
111 * each of IPv4 and IPv6). We could reduce it by representing it as a
112 * more sophisticated way such as a list (or array) of ranges that are
113 * searched to identify a specific port. Our decision here is the saved
114 * memory isn't worth the implementation complexity, considering the
115 * fact that the whole BIND9 process (which is mainly named) already
116 * requires a pretty large memory footprint. We may, however, have to
117 * revisit the decision when we want to use it as a separate module for
118 * an environment where memory requirement is severer.
120 in_port_t *v4ports; /*%< available ports for IPv4 */
121 unsigned int nv4ports; /*%< # of available ports for IPv4 */
122 in_port_t *v6ports; /*%< available ports for IPv4 */
123 unsigned int nv6ports; /*%< # of available ports for IPv4 */
126 #define MGR_SHUTTINGDOWN 0x00000001U
127 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
129 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
131 struct dns_dispentry {
133 dns_dispatch_t *disp;
139 isc_taskaction_t action;
141 isc_boolean_t item_out;
142 dispsocket_t *dispsocket;
143 ISC_LIST(dns_dispatchevent_t) items;
144 ISC_LINK(dns_dispentry_t) link;
148 * Maximum number of dispatch sockets that can be pooled for reuse. The
149 * appropriate value may vary, but experiments have shown a busy caching server
150 * may need more than 1000 sockets concurrently opened. The maximum allowable
151 * number of dispatch sockets (per manager) will be set to the double of this
154 #ifndef DNS_DISPATCH_POOLSOCKS
155 #define DNS_DISPATCH_POOLSOCKS 2048
159 * Quota to control the number of dispatch sockets. If a dispatch has more
160 * than the quota of sockets, new queries will purge oldest ones, so that
161 * a massive number of outstanding queries won't prevent subsequent queries
162 * (especially if the older ones take longer time and result in timeout).
164 #ifndef DNS_DISPATCH_SOCKSQUOTA
165 #define DNS_DISPATCH_SOCKSQUOTA 3072
170 isc_socket_t *socket;
171 dns_dispatch_t *disp;
174 dns_dispentry_t *resp;
176 ISC_LINK(dispsocket_t) link;
178 ISC_LINK(dispsocket_t) blink;
181 #define INVALID_BUCKET (0xffffdead)
184 * Number of tasks for each dispatch that use separate sockets for different
185 * transactions. This must be a power of 2 as it will divide 32 bit numbers
186 * to get an uniformly random tasks selection. See get_dispsocket().
188 #define MAX_INTERNAL_TASKS 64
190 struct dns_dispatch {
192 unsigned int magic; /*%< magic */
193 dns_dispatchmgr_t *mgr; /*%< dispatch manager */
196 * internal task buckets. We use multiple tasks to distribute various
197 * socket events well when using separate dispatch sockets. We use the
198 * 1st task (task[0]) for internal control events.
200 isc_task_t *task[MAX_INTERNAL_TASKS];
201 isc_socket_t *socket; /*%< isc socket attached to */
202 isc_sockaddr_t local; /*%< local address */
203 in_port_t localport; /*%< local UDP port */
204 unsigned int maxrequests; /*%< max requests */
205 isc_event_t *ctlevent;
207 /*% Locked by mgr->lock. */
208 ISC_LINK(dns_dispatch_t) link;
210 /* Locked by "lock". */
211 isc_mutex_t lock; /*%< locks all below */
212 isc_sockettype_t socktype;
213 unsigned int attributes;
214 unsigned int refcount; /*%< number of users */
215 dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */
216 unsigned int shutting_down : 1,
220 recv_pending : 1; /*%< is a recv() pending? */
221 isc_result_t shutdown_why;
222 ISC_LIST(dispsocket_t) activesockets;
223 ISC_LIST(dispsocket_t) inactivesockets;
224 unsigned int nsockets;
225 unsigned int requests; /*%< how many requests we have */
226 unsigned int tcpbuffers; /*%< allocated buffers */
227 dns_tcpmsg_t tcpmsg; /*%< for tcp streams */
229 arc4ctx_t arc4ctx; /*%< for QID/UDP port num */
232 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
233 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
235 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
236 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
238 #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
239 #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
241 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
242 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
244 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
245 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
247 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
248 (disp)->qid : (disp)->mgr->qid
249 #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
250 (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
253 * Locking a query port buffer is a bit tricky. We access the buffer without
254 * locking until qid is created. Technically, there is a possibility of race
255 * between the creation of qid and access to the port buffer; in practice,
256 * however, this should be safe because qid isn't created until the first
257 * dispatch is created and there should be no contending situation until then.
259 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
260 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
265 static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
266 dns_messageid_t, in_port_t, unsigned int);
267 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
268 static void destroy_disp(isc_task_t *task, isc_event_t *event);
269 static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
270 static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
271 static void udp_exrecv(isc_task_t *, isc_event_t *);
272 static void udp_shrecv(isc_task_t *, isc_event_t *);
273 static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
274 static void tcp_recv(isc_task_t *, isc_event_t *);
275 static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
276 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
278 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
279 static void *allocate_udp_buffer(dns_dispatch_t *disp);
280 static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
281 static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
282 static void do_cancel(dns_dispatch_t *disp);
283 static dns_dispentry_t *linear_first(dns_qid_t *disp);
284 static dns_dispentry_t *linear_next(dns_qid_t *disp,
285 dns_dispentry_t *resp);
286 static void dispatch_free(dns_dispatch_t **dispp);
287 static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
288 dns_dispatch_t *disp,
289 isc_socketmgr_t *sockmgr,
290 isc_sockaddr_t *localaddr,
291 isc_socket_t **sockp);
292 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
293 isc_socketmgr_t *sockmgr,
294 isc_taskmgr_t *taskmgr,
295 isc_sockaddr_t *localaddr,
296 unsigned int maxrequests,
297 unsigned int attributes,
298 dns_dispatch_t **dispp);
299 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
300 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
301 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
302 unsigned int increment, dns_qid_t **qidp,
303 isc_boolean_t needaddrtable);
304 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
305 static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
306 unsigned int options, isc_socket_t **sockp);
307 static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
308 isc_sockaddr_t *sockaddrp);
310 #define LVL(x) ISC_LOG_DEBUG(x)
313 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
314 ISC_FORMAT_PRINTF(3, 4);
317 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
321 if (! isc_log_wouldlog(dns_lctx, level))
325 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
328 isc_log_write(dns_lctx,
329 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
330 level, "dispatchmgr %p: %s", mgr, msgbuf);
334 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
335 ISC_FORMAT_PRINTF(3, 4);
338 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
342 if (! isc_log_wouldlog(dns_lctx, level))
346 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
349 isc_log_write(dns_lctx,
350 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
351 level, "dispatch %p: %s", disp, msgbuf);
355 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
356 int level, const char *fmt, ...)
357 ISC_FORMAT_PRINTF(4, 5);
360 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
361 int level, const char *fmt, ...)
367 if (! isc_log_wouldlog(dns_lctx, level))
371 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
374 if (VALID_RESPONSE(resp)) {
375 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
376 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
377 DNS_LOGMODULE_DISPATCH, level,
378 "dispatch %p response %p %s: %s", disp, resp,
381 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
382 DNS_LOGMODULE_DISPATCH, level,
383 "dispatch %p req/resp %p: %s", disp, resp,
389 * ARC4 random number generator derived from OpenBSD.
390 * Only dispatch_arc4random() and dispatch_arc4uniformrandom() are expected
391 * to be called from general dispatch routines; the rest of them are subroutines
394 * The original copyright follows:
395 * Copyright (c) 1996, David Mazieres <dm@uun.org>
396 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
398 * Permission to use, copy, modify, and distribute this software for any
399 * purpose with or without fee is hereby granted, provided that the above
400 * copyright notice and this permission notice appear in all copies.
402 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
403 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
404 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
405 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
406 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
407 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
408 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
411 dispatch_arc4init(arc4ctx_t *actx, isc_entropy_t *entropy, isc_mutex_t *lock) {
413 for (n = 0; n < 256; n++)
418 actx->entropy = entropy; /* don't have to attach */
423 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
428 for (n = 0; n < 256; n++) {
429 actx->i = (actx->i + 1);
430 si = actx->s[actx->i];
431 actx->j = (actx->j + si + dat[n % datlen]);
432 actx->s[actx->i] = actx->s[actx->j];
433 actx->s[actx->j] = si;
438 static inline isc_uint8_t
439 dispatch_arc4get8(arc4ctx_t *actx) {
442 actx->i = (actx->i + 1);
443 si = actx->s[actx->i];
444 actx->j = (actx->j + si);
445 sj = actx->s[actx->j];
446 actx->s[actx->i] = sj;
447 actx->s[actx->j] = si;
449 return (actx->s[(si + sj) & 0xff]);
452 static inline isc_uint16_t
453 dispatch_arc4get16(arc4ctx_t *actx) {
456 val = dispatch_arc4get8(actx) << 8;
457 val |= dispatch_arc4get8(actx);
463 dispatch_arc4stir(arc4ctx_t *actx) {
466 unsigned char rnd[128];
467 isc_uint32_t rnd32[32];
471 if (actx->entropy != NULL) {
473 * We accept any quality of random data to avoid blocking.
475 result = isc_entropy_getdata(actx->entropy, rnd.rnd,
476 sizeof(rnd), NULL, 0);
477 RUNTIME_CHECK(result == ISC_R_SUCCESS);
479 for (i = 0; i < 32; i++)
480 isc_random_get(&rnd.rnd32[i]);
482 dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
485 * Discard early keystream, as per recommendations in:
486 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
488 for (i = 0; i < 256; i++)
489 (void)dispatch_arc4get8(actx);
492 * Derived from OpenBSD's implementation. The rationale is not clear,
493 * but should be conservative enough in safety, and reasonably large
496 actx->count = 1600000;
500 dispatch_arc4random(arc4ctx_t *actx) {
503 if (actx->lock != NULL)
506 actx->count -= sizeof(isc_uint16_t);
507 if (actx->count <= 0)
508 dispatch_arc4stir(actx);
509 result = dispatch_arc4get16(actx);
511 if (actx->lock != NULL)
518 dispatch_arc4uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
525 * Ensure the range of random numbers [min, 0xffff] be a multiple of
526 * upper_bound and contain at least a half of the 16 bit range.
529 if (upper_bound > 0x8000)
530 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
532 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
535 * This could theoretically loop forever but each retry has
536 * p > 0.5 (worst case, usually far better) of selecting a
537 * number inside the range we need, so it should rarely need
541 r = dispatch_arc4random(actx);
546 return (r % upper_bound);
550 * Return a hash of the destination and message id.
553 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
558 ret = isc_sockaddr_hash(dest, ISC_TRUE);
559 ret ^= (id << 16) | port;
560 ret %= qid->qid_nbuckets;
562 INSIST(ret < qid->qid_nbuckets);
568 * Find the first entry in 'qid'. Returns NULL if there are no entries.
570 static dns_dispentry_t *
571 linear_first(dns_qid_t *qid) {
572 dns_dispentry_t *ret;
577 while (bucket < qid->qid_nbuckets) {
578 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
588 * Find the next entry after 'resp' in 'qid'. Return NULL if there are
591 static dns_dispentry_t *
592 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
593 dns_dispentry_t *ret;
596 ret = ISC_LIST_NEXT(resp, link);
600 bucket = resp->bucket;
602 while (bucket < qid->qid_nbuckets) {
603 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
613 * The dispatch must be locked.
616 destroy_disp_ok(dns_dispatch_t *disp)
618 if (disp->refcount != 0)
621 if (disp->recv_pending != 0)
624 if (!ISC_LIST_EMPTY(disp->activesockets))
627 if (disp->shutting_down == 0)
634 * Called when refcount reaches 0 (and safe to destroy).
636 * The dispatcher must not be locked.
637 * The manager must be locked.
640 destroy_disp(isc_task_t *task, isc_event_t *event) {
641 dns_dispatch_t *disp;
642 dns_dispatchmgr_t *mgr;
643 isc_boolean_t killmgr;
644 dispsocket_t *dispsocket;
647 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
651 disp = event->ev_arg;
655 ISC_LIST_UNLINK(mgr->list, disp, link);
657 dispatch_log(disp, LVL(90),
658 "shutting down; detaching from sock %p, task %p",
659 disp->socket, disp->task[0]); /* XXXX */
661 if (disp->socket != NULL)
662 isc_socket_detach(&disp->socket);
663 while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
664 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
665 destroy_dispsocket(disp, &dispsocket);
667 for (i = 0; i < disp->ntasks; i++)
668 isc_task_detach(&disp->task[i]);
669 isc_event_free(&event);
671 dispatch_free(&disp);
673 killmgr = destroy_mgr_ok(mgr);
680 * Find a dispsocket for socket address 'dest', and port number 'port'.
681 * Return NULL if no such entry exists.
683 static dispsocket_t *
684 socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
687 dispsocket_t *dispsock;
689 REQUIRE(bucket < qid->qid_nbuckets);
691 dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
693 while (dispsock != NULL) {
694 if (isc_sockaddr_equal(dest, &dispsock->host) &&
695 dispsock->localport == port)
697 dispsock = ISC_LIST_NEXT(dispsock, blink);
704 * Make a new socket for a single dispatch with a random port number.
705 * The caller must hold the disp->lock and qid->lock.
708 get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
709 isc_socketmgr_t *sockmgr, dns_qid_t *qid,
710 dispsocket_t **dispsockp, in_port_t *portp)
714 dns_dispatchmgr_t *mgr = disp->mgr;
715 isc_socket_t *sock = NULL;
716 isc_result_t result = ISC_R_FAILURE;
718 isc_sockaddr_t localaddr;
719 unsigned int bucket = 0;
720 dispsocket_t *dispsock;
724 if (isc_sockaddr_pf(&disp->local) == AF_INET) {
725 nports = disp->mgr->nv4ports;
726 ports = disp->mgr->v4ports;
728 nports = disp->mgr->nv6ports;
729 ports = disp->mgr->v6ports;
732 return (ISC_R_ADDRNOTAVAIL);
734 dispsock = ISC_LIST_HEAD(disp->inactivesockets);
735 if (dispsock != NULL) {
736 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
737 sock = dispsock->socket;
738 dispsock->socket = NULL;
740 dispsock = isc_mempool_get(mgr->spool);
741 if (dispsock == NULL)
742 return (ISC_R_NOMEMORY);
745 dispsock->socket = NULL;
746 dispsock->disp = disp;
747 dispsock->resp = NULL;
749 dispsock->task = NULL;
750 isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
751 ISC_LINK_INIT(dispsock, link);
752 ISC_LINK_INIT(dispsock, blink);
753 dispsock->magic = DISPSOCK_MAGIC;
757 * Pick up a random UDP port and open a new socket with it. Avoid
758 * choosing ports that share the same destination because it will be
759 * very likely to fail in bind(2) or connect(2).
761 localaddr = disp->local;
762 for (i = 0; i < 64; i++) {
763 port = ports[dispatch_arc4uniformrandom(DISP_ARC4CTX(disp),
765 isc_sockaddr_setport(&localaddr, port);
767 bucket = dns_hash(qid, dest, 0, port);
768 if (socket_search(qid, dest, port, bucket) != NULL)
771 result = open_socket(sockmgr, &localaddr, 0, &sock);
772 if (result == ISC_R_SUCCESS || result != ISC_R_ADDRINUSE)
776 if (result == ISC_R_SUCCESS) {
777 dispsock->socket = sock;
778 dispsock->host = *dest;
779 dispsock->localport = port;
780 dispsock->bucket = bucket;
781 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
782 *dispsockp = dispsock;
786 * We could keep it in the inactive list, but since this should
787 * be an exceptional case and might be resource shortage, we'd
791 isc_socket_detach(&sock);
792 destroy_dispsocket(disp, &dispsock);
799 * Destroy a dedicated dispatch socket.
802 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
803 dispsocket_t *dispsock;
807 * The dispatch must be locked.
810 REQUIRE(dispsockp != NULL && *dispsockp != NULL);
811 dispsock = *dispsockp;
812 REQUIRE(!ISC_LINK_LINKED(dispsock, link));
816 if (dispsock->socket != NULL)
817 isc_socket_detach(&dispsock->socket);
818 if (ISC_LINK_LINKED(dispsock, blink)) {
821 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
825 if (dispsock->task != NULL)
826 isc_task_detach(&dispsock->task);
827 isc_mempool_put(disp->mgr->spool, dispsock);
833 * Deactivate a dedicated dispatch socket. Move it to the inactive list for
834 * future reuse unless the total number of sockets are exceeding the maximum.
837 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
842 * The dispatch must be locked.
844 ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
845 if (dispsock->resp != NULL) {
846 INSIST(dispsock->resp->dispsocket == dispsock);
847 dispsock->resp->dispsocket = NULL;
850 if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
851 destroy_dispsocket(disp, &dispsock);
853 result = isc_socket_close(dispsock->socket);
857 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
861 if (result == ISC_R_SUCCESS)
862 ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
865 * If the underlying system does not allow this
866 * optimization, destroy this temporary structure (and
867 * create a new one for a new transaction).
869 INSIST(result == ISC_R_NOTIMPLEMENTED);
870 destroy_dispsocket(disp, &dispsock);
876 * Find an entry for query ID 'id', socket address 'dest', and port number
878 * Return NULL if no such entry exists.
880 static dns_dispentry_t *
881 entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
882 in_port_t port, unsigned int bucket)
884 dns_dispentry_t *res;
886 REQUIRE(bucket < qid->qid_nbuckets);
888 res = ISC_LIST_HEAD(qid->qid_table[bucket]);
890 while (res != NULL) {
891 if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
895 res = ISC_LIST_NEXT(res, link);
902 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
903 INSIST(buf != NULL && len != 0);
906 switch (disp->socktype) {
907 case isc_sockettype_tcp:
908 INSIST(disp->tcpbuffers > 0);
910 isc_mem_put(disp->mgr->mctx, buf, len);
912 case isc_sockettype_udp:
913 LOCK(&disp->mgr->buffer_lock);
914 INSIST(disp->mgr->buffers > 0);
915 INSIST(len == disp->mgr->buffersize);
916 disp->mgr->buffers--;
917 isc_mempool_put(disp->mgr->bpool, buf);
918 UNLOCK(&disp->mgr->buffer_lock);
927 allocate_udp_buffer(dns_dispatch_t *disp) {
930 LOCK(&disp->mgr->buffer_lock);
931 temp = isc_mempool_get(disp->mgr->bpool);
934 disp->mgr->buffers++;
935 UNLOCK(&disp->mgr->buffer_lock);
941 free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
942 if (disp->failsafe_ev == ev) {
943 INSIST(disp->shutdown_out == 1);
944 disp->shutdown_out = 0;
949 isc_mempool_put(disp->mgr->epool, ev);
952 static inline dns_dispatchevent_t *
953 allocate_event(dns_dispatch_t *disp) {
954 dns_dispatchevent_t *ev;
956 ev = isc_mempool_get(disp->mgr->epool);
959 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
960 NULL, NULL, NULL, NULL, NULL);
966 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
967 dispsocket_t *dispsock = ev->ev_arg;
971 REQUIRE(VALID_DISPSOCK(dispsock));
972 udp_recv(ev, dispsock->disp, dispsock);
976 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
977 dns_dispatch_t *disp = ev->ev_arg;
981 REQUIRE(VALID_DISPATCH(disp));
982 udp_recv(ev, disp, NULL);
988 * If I/O result == CANCELED or error, free the buffer.
990 * If query, free the buffer, restart.
993 * Allocate event, fill in details.
994 * If cannot allocate, free buffer, restart.
995 * find target. If not found, free buffer, restart.
996 * if event queue is not empty, queue. else, send.
1000 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1001 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1004 isc_buffer_t source;
1006 dns_dispentry_t *resp = NULL;
1007 dns_dispatchevent_t *rev;
1008 unsigned int bucket;
1009 isc_boolean_t killit;
1010 isc_boolean_t queue_response;
1011 dns_dispatchmgr_t *mgr;
1013 isc_netaddr_t netaddr;
1016 isc_boolean_t qidlocked = ISC_FALSE;
1023 dispatch_log(disp, LVL(90),
1024 "got packet: requests %d, buffers %d, recvs %d",
1025 disp->requests, disp->mgr->buffers, disp->recv_pending);
1027 if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1029 * Unless the receive event was imported from a listening
1030 * interface, in which case the event type is
1031 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1033 INSIST(disp->recv_pending != 0);
1034 disp->recv_pending = 0;
1037 if (dispsock != NULL &&
1038 (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1040 * dispsock->resp can be NULL if this transaction was canceled
1041 * just after receiving a response. Since this socket is
1042 * exclusively used and there should be at most one receive
1043 * event the canceled event should have been no effect. So
1044 * we can (and should) deactivate the socket right now.
1046 deactivate_dispsocket(disp, dispsock);
1050 if (disp->shutting_down) {
1052 * This dispatcher is shutting down.
1054 free_buffer(disp, ev->region.base, ev->region.length);
1056 isc_event_free(&ev_in);
1059 killit = destroy_disp_ok(disp);
1060 UNLOCK(&disp->lock);
1062 isc_task_send(disp->task[0], &disp->ctlevent);
1067 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1068 if (dispsock != NULL) {
1069 resp = dispsock->resp;
1071 if (ev->result != ISC_R_SUCCESS) {
1073 * This is most likely a network error on a
1074 * connected socket. It makes no sense to
1075 * check the address or parse the packet, but it
1076 * will help to return the error to the caller.
1081 free_buffer(disp, ev->region.base, ev->region.length);
1083 UNLOCK(&disp->lock);
1084 isc_event_free(&ev_in);
1087 } else if (ev->result != ISC_R_SUCCESS) {
1088 free_buffer(disp, ev->region.base, ev->region.length);
1090 if (ev->result != ISC_R_CANCELED)
1091 dispatch_log(disp, ISC_LOG_ERROR,
1092 "odd socket result in udp_recv(): %s",
1093 isc_result_totext(ev->result));
1095 UNLOCK(&disp->lock);
1096 isc_event_free(&ev_in);
1101 * If this is from a blackholed address, drop it.
1103 isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1104 if (disp->mgr->blackhole != NULL &&
1105 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1106 NULL, &match, NULL) == ISC_R_SUCCESS &&
1109 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1110 char netaddrstr[ISC_NETADDR_FORMATSIZE];
1111 isc_netaddr_format(&netaddr, netaddrstr,
1112 sizeof(netaddrstr));
1113 dispatch_log(disp, LVL(10),
1114 "blackholed packet from %s",
1117 free_buffer(disp, ev->region.base, ev->region.length);
1122 * Peek into the buffer to see what we can see.
1124 isc_buffer_init(&source, ev->region.base, ev->region.length);
1125 isc_buffer_add(&source, ev->n);
1126 dres = dns_message_peekheader(&source, &id, &flags);
1127 if (dres != ISC_R_SUCCESS) {
1128 free_buffer(disp, ev->region.base, ev->region.length);
1129 dispatch_log(disp, LVL(10), "got garbage packet");
1133 dispatch_log(disp, LVL(92),
1134 "got valid DNS message header, /QR %c, id %u",
1135 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1138 * Look at flags. If query, drop it. If response,
1139 * look to see where it goes.
1141 queue_response = ISC_FALSE;
1142 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1144 free_buffer(disp, ev->region.base, ev->region.length);
1149 * Search for the corresponding response. If we are using an exclusive
1150 * socket, we've already identified it and we can skip the search; but
1151 * the ID and the address must match the expected ones.
1154 bucket = dns_hash(qid, &ev->address, id, disp->localport);
1156 qidlocked = ISC_TRUE;
1157 resp = entry_search(qid, &ev->address, id, disp->localport,
1159 dispatch_log(disp, LVL(90),
1160 "search for response in bucket %d: %s",
1161 bucket, (resp == NULL ? "not found" : "found"));
1164 free_buffer(disp, ev->region.base, ev->region.length);
1167 } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1169 dispatch_log(disp, LVL(90),
1170 "response to an exclusive socket doesn't match");
1171 free_buffer(disp, ev->region.base, ev->region.length);
1176 * Now that we have the original dispatch the query was sent
1177 * from check that the address and port the response was
1178 * sent to make sense.
1180 if (disp != resp->disp) {
1185 * Check that the socket types and ports match.
1187 if (disp->socktype != resp->disp->socktype ||
1188 isc_sockaddr_getport(&disp->local) !=
1189 isc_sockaddr_getport(&resp->disp->local)) {
1190 free_buffer(disp, ev->region.base, ev->region.length);
1195 * If both dispatches are bound to an address then fail as
1196 * the addresses can't be equal (enforced by the IP stack).
1198 * Note under Linux a packet can be sent out via IPv4 socket
1199 * and the response be received via a IPv6 socket.
1201 * Requests sent out via IPv6 should always come back in
1204 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1205 isc_sockaddr_pf(&disp->local) != PF_INET6) {
1206 free_buffer(disp, ev->region.base, ev->region.length);
1209 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1210 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1211 if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1212 !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1213 free_buffer(disp, ev->region.base, ev->region.length);
1219 queue_response = resp->item_out;
1220 rev = allocate_event(resp->disp);
1222 free_buffer(disp, ev->region.base, ev->region.length);
1227 * At this point, rev contains the event we want to fill in, and
1228 * resp contains the information on the place to send it to.
1229 * Send the event off.
1231 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1232 isc_buffer_add(&rev->buffer, ev->n);
1233 rev->result = ev->result;
1235 rev->addr = ev->address;
1236 rev->pktinfo = ev->pktinfo;
1237 rev->attributes = ev->attributes;
1238 if (queue_response) {
1239 ISC_LIST_APPEND(resp->items, rev, ev_link);
1241 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1243 resp->action, resp->arg, resp, NULL, NULL);
1244 request_log(disp, resp, LVL(90),
1245 "[a] Sent event %p buffer %p len %d to task %p",
1246 rev, rev->buffer.base, rev->buffer.length,
1248 resp->item_out = ISC_TRUE;
1249 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1256 * Restart recv() to get the next packet.
1259 result = startrecv(disp, dispsock);
1260 if (result != ISC_R_SUCCESS && dispsock != NULL) {
1262 * XXX: wired. There seems to be no recovery process other than
1263 * deactivate this socket anyway (since we cannot start
1264 * receiving, we won't be able to receive a cancel event
1267 deactivate_dispsocket(disp, dispsock);
1269 UNLOCK(&disp->lock);
1271 isc_event_free(&ev_in);
1277 * If I/O result == CANCELED, EOF, or error, notify everyone as the
1278 * various queues drain.
1280 * If query, restart.
1283 * Allocate event, fill in details.
1284 * If cannot allocate, restart.
1285 * find target. If not found, restart.
1286 * if event queue is not empty, queue. else, send.
1290 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1291 dns_dispatch_t *disp = ev_in->ev_arg;
1292 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1296 dns_dispentry_t *resp;
1297 dns_dispatchevent_t *rev;
1298 unsigned int bucket;
1299 isc_boolean_t killit;
1300 isc_boolean_t queue_response;
1303 char buf[ISC_SOCKADDR_FORMATSIZE];
1307 REQUIRE(VALID_DISPATCH(disp));
1311 dispatch_log(disp, LVL(90),
1312 "got TCP packet: requests %d, buffers %d, recvs %d",
1313 disp->requests, disp->tcpbuffers, disp->recv_pending);
1317 INSIST(disp->recv_pending != 0);
1318 disp->recv_pending = 0;
1320 if (disp->refcount == 0) {
1322 * This dispatcher is shutting down. Force cancelation.
1324 tcpmsg->result = ISC_R_CANCELED;
1327 if (tcpmsg->result != ISC_R_SUCCESS) {
1328 switch (tcpmsg->result) {
1329 case ISC_R_CANCELED:
1333 dispatch_log(disp, LVL(90), "shutting down on EOF");
1337 case ISC_R_CONNECTIONRESET:
1338 level = ISC_LOG_INFO;
1342 level = ISC_LOG_ERROR;
1344 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1345 dispatch_log(disp, level, "shutting down due to TCP "
1346 "receive error: %s: %s", buf,
1347 isc_result_totext(tcpmsg->result));
1353 * The event is statically allocated in the tcpmsg
1354 * structure, and destroy_disp() frees the tcpmsg, so we must
1355 * free the event *before* calling destroy_disp().
1357 isc_event_free(&ev_in);
1359 disp->shutting_down = 1;
1360 disp->shutdown_why = tcpmsg->result;
1363 * If the recv() was canceled pass the word on.
1365 killit = destroy_disp_ok(disp);
1366 UNLOCK(&disp->lock);
1368 isc_task_send(disp->task[0], &disp->ctlevent);
1372 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1374 tcpmsg->buffer.length, tcpmsg->buffer.base);
1377 * Peek into the buffer to see what we can see.
1379 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1380 if (dres != ISC_R_SUCCESS) {
1381 dispatch_log(disp, LVL(10), "got garbage packet");
1385 dispatch_log(disp, LVL(92),
1386 "got valid DNS message header, /QR %c, id %u",
1387 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1390 * Allocate an event to send to the query or response client, and
1391 * allocate a new buffer for our use.
1395 * Look at flags. If query, drop it. If response,
1396 * look to see where it goes.
1398 queue_response = ISC_FALSE;
1399 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1409 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1411 resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1412 dispatch_log(disp, LVL(90),
1413 "search for response in bucket %d: %s",
1414 bucket, (resp == NULL ? "not found" : "found"));
1418 queue_response = resp->item_out;
1419 rev = allocate_event(disp);
1424 * At this point, rev contains the event we want to fill in, and
1425 * resp contains the information on the place to send it to.
1426 * Send the event off.
1428 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1430 rev->result = ISC_R_SUCCESS;
1432 rev->addr = tcpmsg->address;
1433 if (queue_response) {
1434 ISC_LIST_APPEND(resp->items, rev, ev_link);
1436 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1437 resp->action, resp->arg, resp, NULL, NULL);
1438 request_log(disp, resp, LVL(90),
1439 "[b] Sent event %p buffer %p len %d to task %p",
1440 rev, rev->buffer.base, rev->buffer.length,
1442 resp->item_out = ISC_TRUE;
1443 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1449 * Restart recv() to get the next packet.
1452 (void)startrecv(disp, NULL);
1454 UNLOCK(&disp->lock);
1456 isc_event_free(&ev_in);
1460 * disp must be locked.
1463 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1465 isc_region_t region;
1466 isc_socket_t *socket;
1468 if (disp->shutting_down == 1)
1469 return (ISC_R_SUCCESS);
1471 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1472 return (ISC_R_SUCCESS);
1474 if (disp->recv_pending != 0 && dispsock == NULL)
1475 return (ISC_R_SUCCESS);
1477 if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1478 return (ISC_R_NOMEMORY);
1480 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1482 return (ISC_R_SUCCESS);
1484 if (dispsock != NULL)
1485 socket = dispsock->socket;
1487 socket = disp->socket;
1488 INSIST(socket != NULL);
1490 switch (disp->socktype) {
1492 * UDP reads are always maximal.
1494 case isc_sockettype_udp:
1495 region.length = disp->mgr->buffersize;
1496 region.base = allocate_udp_buffer(disp);
1497 if (region.base == NULL)
1498 return (ISC_R_NOMEMORY);
1499 if (dispsock != NULL) {
1500 res = isc_socket_recv(socket, ®ion, 1,
1501 dispsock->task, udp_exrecv,
1503 if (res != ISC_R_SUCCESS) {
1504 free_buffer(disp, region.base, region.length);
1508 res = isc_socket_recv(socket, ®ion, 1,
1509 disp->task[0], udp_shrecv, disp);
1510 if (res != ISC_R_SUCCESS) {
1511 free_buffer(disp, region.base, region.length);
1512 disp->shutdown_why = res;
1513 disp->shutting_down = 1;
1515 return (ISC_R_SUCCESS); /* recover by cancel */
1517 INSIST(disp->recv_pending == 0);
1518 disp->recv_pending = 1;
1522 case isc_sockettype_tcp:
1523 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1525 if (res != ISC_R_SUCCESS) {
1526 disp->shutdown_why = res;
1527 disp->shutting_down = 1;
1529 return (ISC_R_SUCCESS); /* recover by cancel */
1531 INSIST(disp->recv_pending == 0);
1532 disp->recv_pending = 1;
1539 return (ISC_R_SUCCESS);
1543 * Mgr must be locked when calling this function.
1545 static isc_boolean_t
1546 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1547 mgr_log(mgr, LVL(90),
1548 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1549 "epool=%d, rpool=%d, dpool=%d",
1550 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1551 isc_mempool_getallocated(mgr->epool),
1552 isc_mempool_getallocated(mgr->rpool),
1553 isc_mempool_getallocated(mgr->dpool));
1554 if (!MGR_IS_SHUTTINGDOWN(mgr))
1556 if (!ISC_LIST_EMPTY(mgr->list))
1558 if (isc_mempool_getallocated(mgr->epool) != 0)
1560 if (isc_mempool_getallocated(mgr->rpool) != 0)
1562 if (isc_mempool_getallocated(mgr->dpool) != 0)
1569 * Mgr must be unlocked when calling this function.
1572 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1574 dns_dispatchmgr_t *mgr;
1583 DESTROYLOCK(&mgr->lock);
1586 DESTROYLOCK(&mgr->arc4_lock);
1588 isc_mempool_destroy(&mgr->epool);
1589 isc_mempool_destroy(&mgr->rpool);
1590 isc_mempool_destroy(&mgr->dpool);
1591 isc_mempool_destroy(&mgr->bpool);
1592 isc_mempool_destroy(&mgr->spool);
1594 DESTROYLOCK(&mgr->pool_lock);
1596 if (mgr->entropy != NULL)
1597 isc_entropy_detach(&mgr->entropy);
1598 if (mgr->qid != NULL)
1599 qid_destroy(mctx, &mgr->qid);
1601 DESTROYLOCK(&mgr->buffer_lock);
1603 if (mgr->blackhole != NULL)
1604 dns_acl_detach(&mgr->blackhole);
1606 if (mgr->v4ports != NULL) {
1607 isc_mem_put(mctx, mgr->v4ports,
1608 mgr->nv4ports * sizeof(in_port_t));
1610 if (mgr->v6ports != NULL) {
1611 isc_mem_put(mctx, mgr->v6ports,
1612 mgr->nv6ports * sizeof(in_port_t));
1614 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1615 isc_mem_detach(&mctx);
1619 open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1620 unsigned int options, isc_socket_t **sockp)
1623 isc_result_t result;
1627 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1628 isc_sockettype_udp, &sock);
1629 if (result != ISC_R_SUCCESS)
1632 result = isc_socket_open(sock);
1633 if (result != ISC_R_SUCCESS)
1637 #ifndef ISC_ALLOW_MAPPED
1638 isc_socket_ipv6only(sock, ISC_TRUE);
1640 result = isc_socket_bind(sock, local, options);
1641 if (result != ISC_R_SUCCESS) {
1643 isc_socket_detach(&sock);
1645 isc_socket_close(sock);
1650 return (ISC_R_SUCCESS);
1654 * Create a temporary port list to set the initial default set of dispatch
1655 * ports: [1024, 65535]. This is almost meaningless as the application will
1656 * normally set the ports explicitly, but is provided to fill some minor corner
1660 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1661 isc_result_t result;
1663 result = isc_portset_create(mctx, portsetp);
1664 if (result != ISC_R_SUCCESS)
1666 isc_portset_addrange(*portsetp, 1024, 65535);
1668 return (ISC_R_SUCCESS);
1676 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1677 dns_dispatchmgr_t **mgrp)
1679 dns_dispatchmgr_t *mgr;
1680 isc_result_t result;
1681 isc_portset_t *v4portset = NULL;
1682 isc_portset_t *v6portset = NULL;
1684 REQUIRE(mctx != NULL);
1685 REQUIRE(mgrp != NULL && *mgrp == NULL);
1687 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1689 return (ISC_R_NOMEMORY);
1692 isc_mem_attach(mctx, &mgr->mctx);
1694 mgr->blackhole = NULL;
1696 result = isc_mutex_init(&mgr->lock);
1697 if (result != ISC_R_SUCCESS)
1700 result = isc_mutex_init(&mgr->arc4_lock);
1701 if (result != ISC_R_SUCCESS)
1704 result = isc_mutex_init(&mgr->buffer_lock);
1705 if (result != ISC_R_SUCCESS)
1706 goto kill_arc4_lock;
1708 result = isc_mutex_init(&mgr->pool_lock);
1709 if (result != ISC_R_SUCCESS)
1710 goto kill_buffer_lock;
1713 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1714 &mgr->epool) != ISC_R_SUCCESS) {
1715 result = ISC_R_NOMEMORY;
1716 goto kill_pool_lock;
1720 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1721 &mgr->rpool) != ISC_R_SUCCESS) {
1722 result = ISC_R_NOMEMORY;
1727 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1728 &mgr->dpool) != ISC_R_SUCCESS) {
1729 result = ISC_R_NOMEMORY;
1733 isc_mempool_setname(mgr->epool, "dispmgr_epool");
1734 isc_mempool_setfreemax(mgr->epool, 1024);
1735 isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
1737 isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1738 isc_mempool_setfreemax(mgr->rpool, 1024);
1739 isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
1741 isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1742 isc_mempool_setfreemax(mgr->dpool, 1024);
1743 isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
1746 mgr->buffersize = 0;
1747 mgr->maxbuffers = 0;
1750 mgr->entropy = NULL;
1753 ISC_LIST_INIT(mgr->list);
1754 mgr->v4ports = NULL;
1755 mgr->v6ports = NULL;
1758 mgr->magic = DNS_DISPATCHMGR_MAGIC;
1760 result = create_default_portset(mctx, &v4portset);
1761 if (result == ISC_R_SUCCESS) {
1762 result = create_default_portset(mctx, &v6portset);
1763 if (result == ISC_R_SUCCESS) {
1764 result = dns_dispatchmgr_setavailports(mgr,
1769 if (v4portset != NULL)
1770 isc_portset_destroy(mctx, &v4portset);
1771 if (v6portset != NULL)
1772 isc_portset_destroy(mctx, &v6portset);
1773 if (result != ISC_R_SUCCESS)
1776 if (entropy != NULL)
1777 isc_entropy_attach(entropy, &mgr->entropy);
1779 dispatch_arc4init(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
1782 return (ISC_R_SUCCESS);
1785 isc_mempool_destroy(&mgr->dpool);
1787 isc_mempool_destroy(&mgr->rpool);
1789 isc_mempool_destroy(&mgr->epool);
1791 DESTROYLOCK(&mgr->pool_lock);
1793 DESTROYLOCK(&mgr->buffer_lock);
1795 DESTROYLOCK(&mgr->arc4_lock);
1797 DESTROYLOCK(&mgr->lock);
1799 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1800 isc_mem_detach(&mctx);
1806 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1807 REQUIRE(VALID_DISPATCHMGR(mgr));
1808 if (mgr->blackhole != NULL)
1809 dns_acl_detach(&mgr->blackhole);
1810 dns_acl_attach(blackhole, &mgr->blackhole);
1814 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
1815 REQUIRE(VALID_DISPATCHMGR(mgr));
1816 return (mgr->blackhole);
1820 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
1821 dns_portlist_t *portlist)
1823 REQUIRE(VALID_DISPATCHMGR(mgr));
1826 /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
1831 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
1832 REQUIRE(VALID_DISPATCHMGR(mgr));
1833 return (NULL); /* this function is deprecated */
1837 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
1838 isc_portset_t *v6portset)
1840 in_port_t *v4ports, *v6ports, p;
1841 unsigned int nv4ports, nv6ports, i4, i6;
1843 REQUIRE(VALID_DISPATCHMGR(mgr));
1845 nv4ports = isc_portset_nports(v4portset);
1846 nv6ports = isc_portset_nports(v6portset);
1849 if (nv4ports != 0) {
1850 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
1851 if (v4ports == NULL)
1852 return (ISC_R_NOMEMORY);
1855 if (nv6ports != 0) {
1856 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
1857 if (v6ports == NULL) {
1858 if (v4ports != NULL) {
1859 isc_mem_put(mgr->mctx, v4ports,
1861 isc_portset_nports(v4portset));
1863 return (ISC_R_NOMEMORY);
1871 if (isc_portset_isset(v4portset, p)) {
1872 INSIST(i4 < nv4ports);
1875 if (isc_portset_isset(v6portset, p)) {
1876 INSIST(i6 < nv6ports);
1879 } while (p++ < 65535);
1880 INSIST(i4 == nv4ports && i6 == nv6ports);
1883 if (mgr->v4ports != NULL) {
1884 isc_mem_put(mgr->mctx, mgr->v4ports,
1885 mgr->nv4ports * sizeof(in_port_t));
1887 mgr->v4ports = v4ports;
1888 mgr->nv4ports = nv4ports;
1890 if (mgr->v6ports != NULL) {
1891 isc_mem_put(mgr->mctx, mgr->v6ports,
1892 mgr->nv6ports * sizeof(in_port_t));
1894 mgr->v6ports = v6ports;
1895 mgr->nv6ports = nv6ports;
1898 return (ISC_R_SUCCESS);
1902 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
1903 unsigned int buffersize, unsigned int maxbuffers,
1904 unsigned int maxrequests, unsigned int buckets,
1905 unsigned int increment)
1907 isc_result_t result;
1909 REQUIRE(VALID_DISPATCHMGR(mgr));
1910 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1911 REQUIRE(maxbuffers > 0);
1912 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
1913 REQUIRE(increment > buckets);
1916 * Keep some number of items around. This should be a config
1917 * option. For now, keep 8, but later keep at least two even
1918 * if the caller wants less. This allows us to ensure certain
1919 * things, like an event can be "freed" and the next allocation
1920 * will always succeed.
1922 * Note that if limits are placed on anything here, we use one
1923 * event internally, so the actual limit should be "wanted + 1."
1931 LOCK(&mgr->buffer_lock);
1933 /* Create or adjust buffer pool */
1934 if (mgr->bpool != NULL) {
1935 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1936 mgr->maxbuffers = maxbuffers;
1938 result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
1939 if (result != ISC_R_SUCCESS) {
1940 UNLOCK(&mgr->buffer_lock);
1943 isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
1944 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1945 isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
1948 /* Create or adjust socket pool */
1949 if (mgr->spool != NULL) {
1950 isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
1951 UNLOCK(&mgr->buffer_lock);
1952 return (ISC_R_SUCCESS);
1954 result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
1956 if (result != ISC_R_SUCCESS) {
1957 UNLOCK(&mgr->buffer_lock);
1960 isc_mempool_setname(mgr->spool, "dispmgr_spool");
1961 isc_mempool_setmaxalloc(mgr->spool, maxrequests);
1962 isc_mempool_associatelock(mgr->spool, &mgr->pool_lock);
1964 result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
1965 if (result != ISC_R_SUCCESS)
1968 mgr->buffersize = buffersize;
1969 mgr->maxbuffers = maxbuffers;
1970 UNLOCK(&mgr->buffer_lock);
1971 return (ISC_R_SUCCESS);
1974 isc_mempool_destroy(&mgr->bpool);
1975 if (mgr->spool != NULL)
1976 isc_mempool_destroy(&mgr->spool);
1977 UNLOCK(&mgr->buffer_lock);
1982 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
1983 dns_dispatchmgr_t *mgr;
1984 isc_boolean_t killit;
1986 REQUIRE(mgrp != NULL);
1987 REQUIRE(VALID_DISPATCHMGR(*mgrp));
1993 mgr->state |= MGR_SHUTTINGDOWN;
1995 killit = destroy_mgr_ok(mgr);
1998 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2005 port_cmp(const void *key, const void *ent) {
2006 in_port_t p1 = *(const in_port_t *)key;
2007 in_port_t p2 = *(const in_port_t *)ent;
2017 static isc_boolean_t
2018 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2019 isc_sockaddr_t *sockaddrp)
2021 isc_sockaddr_t sockaddr;
2022 isc_result_t result;
2023 in_port_t *ports, port;
2024 unsigned int nports;
2025 isc_boolean_t available = ISC_FALSE;
2027 REQUIRE(sock != NULL || sockaddrp != NULL);
2031 sockaddrp = &sockaddr;
2032 result = isc_socket_getsockname(sock, sockaddrp);
2033 if (result != ISC_R_SUCCESS)
2037 if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2038 ports = mgr->v4ports;
2039 nports = mgr->nv4ports;
2041 ports = mgr->v6ports;
2042 nports = mgr->nv6ports;
2047 port = isc_sockaddr_getport(sockaddrp);
2048 if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2049 available = ISC_TRUE;
2056 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2058 static isc_boolean_t
2059 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2060 isc_sockaddr_t sockaddr;
2061 isc_result_t result;
2063 REQUIRE(disp->socket != NULL);
2069 * Don't match wildcard ports unless the port is available in the
2070 * current configuration.
2072 if (isc_sockaddr_getport(addr) == 0 &&
2073 isc_sockaddr_getport(&disp->local) == 0 &&
2074 !portavailable(disp->mgr, disp->socket, NULL)) {
2079 * Check if we match the binding <address,port>.
2080 * Wildcard ports match/fail here.
2082 if (isc_sockaddr_equal(&disp->local, addr))
2084 if (isc_sockaddr_getport(addr) == 0)
2088 * Check if we match a bound wildcard port <address,port>.
2090 if (!isc_sockaddr_eqaddr(&disp->local, addr))
2092 result = isc_socket_getsockname(disp->socket, &sockaddr);
2093 if (result != ISC_R_SUCCESS)
2096 return (isc_sockaddr_equal(&sockaddr, addr));
2100 * Requires mgr be locked.
2102 * No dispatcher can be locked by this thread when calling this function.
2106 * If a matching dispatcher is found, it is locked after this function
2107 * returns, and must be unlocked by the caller.
2110 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2111 unsigned int attributes, unsigned int mask,
2112 dns_dispatch_t **dispp)
2114 dns_dispatch_t *disp;
2115 isc_result_t result;
2118 * Make certain that we will not match a private or exclusive dispatch.
2120 attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2121 mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2123 disp = ISC_LIST_HEAD(mgr->list);
2124 while (disp != NULL) {
2126 if ((disp->shutting_down == 0)
2127 && ATTRMATCH(disp->attributes, attributes, mask)
2128 && local_addr_match(disp, local))
2130 UNLOCK(&disp->lock);
2131 disp = ISC_LIST_NEXT(disp, link);
2135 result = ISC_R_NOTFOUND;
2140 result = ISC_R_SUCCESS;
2147 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2148 unsigned int increment, dns_qid_t **qidp,
2149 isc_boolean_t needsocktable)
2153 isc_result_t result;
2155 REQUIRE(VALID_DISPATCHMGR(mgr));
2156 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2157 REQUIRE(increment > buckets);
2158 REQUIRE(qidp != NULL && *qidp == NULL);
2160 qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2162 return (ISC_R_NOMEMORY);
2164 qid->qid_table = isc_mem_get(mgr->mctx,
2165 buckets * sizeof(dns_displist_t));
2166 if (qid->qid_table == NULL) {
2167 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2168 return (ISC_R_NOMEMORY);
2171 qid->sock_table = NULL;
2172 if (needsocktable) {
2173 qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2174 sizeof(dispsocketlist_t));
2175 if (qid->sock_table == NULL) {
2176 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2177 isc_mem_put(mgr->mctx, qid->qid_table,
2178 buckets * sizeof(dns_displist_t));
2179 return (ISC_R_NOMEMORY);
2183 result = isc_mutex_init(&qid->lock);
2184 if (result != ISC_R_SUCCESS) {
2185 if (qid->sock_table != NULL) {
2186 isc_mem_put(mgr->mctx, qid->sock_table,
2187 buckets * sizeof(dispsocketlist_t));
2189 isc_mem_put(mgr->mctx, qid->qid_table,
2190 buckets * sizeof(dns_displist_t));
2191 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2195 for (i = 0; i < buckets; i++) {
2196 ISC_LIST_INIT(qid->qid_table[i]);
2197 if (qid->sock_table != NULL)
2198 ISC_LIST_INIT(qid->sock_table[i]);
2201 qid->qid_nbuckets = buckets;
2202 qid->qid_increment = increment;
2203 qid->magic = QID_MAGIC;
2205 return (ISC_R_SUCCESS);
2209 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2212 REQUIRE(qidp != NULL);
2215 REQUIRE(VALID_QID(qid));
2219 isc_mem_put(mctx, qid->qid_table,
2220 qid->qid_nbuckets * sizeof(dns_displist_t));
2221 if (qid->sock_table != NULL) {
2222 isc_mem_put(mctx, qid->sock_table,
2223 qid->qid_nbuckets * sizeof(dispsocketlist_t));
2225 DESTROYLOCK(&qid->lock);
2226 isc_mem_put(mctx, qid, sizeof(*qid));
2230 * Allocate and set important limits.
2233 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2234 dns_dispatch_t **dispp)
2236 dns_dispatch_t *disp;
2237 isc_result_t result;
2239 REQUIRE(VALID_DISPATCHMGR(mgr));
2240 REQUIRE(dispp != NULL && *dispp == NULL);
2243 * Set up the dispatcher, mostly. Don't bother setting some of
2244 * the options that are controlled by tcp vs. udp, etc.
2247 disp = isc_mempool_get(mgr->dpool);
2249 return (ISC_R_NOMEMORY);
2253 disp->maxrequests = maxrequests;
2254 disp->attributes = 0;
2255 ISC_LINK_INIT(disp, link);
2257 disp->recv_pending = 0;
2258 memset(&disp->local, 0, sizeof(disp->local));
2259 disp->localport = 0;
2260 disp->shutting_down = 0;
2261 disp->shutdown_out = 0;
2262 disp->connected = 0;
2263 disp->tcpmsg_valid = 0;
2264 disp->shutdown_why = ISC_R_UNEXPECTED;
2266 disp->tcpbuffers = 0;
2268 ISC_LIST_INIT(disp->activesockets);
2269 ISC_LIST_INIT(disp->inactivesockets);
2271 dispatch_arc4init(&disp->arc4ctx, mgr->entropy, NULL);
2273 result = isc_mutex_init(&disp->lock);
2274 if (result != ISC_R_SUCCESS)
2277 disp->failsafe_ev = allocate_event(disp);
2278 if (disp->failsafe_ev == NULL) {
2279 result = ISC_R_NOMEMORY;
2283 disp->magic = DISPATCH_MAGIC;
2286 return (ISC_R_SUCCESS);
2292 DESTROYLOCK(&disp->lock);
2294 isc_mempool_put(mgr->dpool, disp);
2301 * MUST be unlocked, and not used by anthing.
2304 dispatch_free(dns_dispatch_t **dispp)
2306 dns_dispatch_t *disp;
2307 dns_dispatchmgr_t *mgr;
2309 REQUIRE(VALID_DISPATCH(*dispp));
2314 REQUIRE(VALID_DISPATCHMGR(mgr));
2316 if (disp->tcpmsg_valid) {
2317 dns_tcpmsg_invalidate(&disp->tcpmsg);
2318 disp->tcpmsg_valid = 0;
2321 INSIST(disp->tcpbuffers == 0);
2322 INSIST(disp->requests == 0);
2323 INSIST(disp->recv_pending == 0);
2324 INSIST(ISC_LIST_EMPTY(disp->activesockets));
2325 INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2327 isc_mempool_put(mgr->epool, disp->failsafe_ev);
2328 disp->failsafe_ev = NULL;
2330 if (disp->qid != NULL)
2331 qid_destroy(mgr->mctx, &disp->qid);
2333 DESTROYLOCK(&disp->lock);
2335 isc_mempool_put(mgr->dpool, disp);
2339 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2340 isc_taskmgr_t *taskmgr, unsigned int buffersize,
2341 unsigned int maxbuffers, unsigned int maxrequests,
2342 unsigned int buckets, unsigned int increment,
2343 unsigned int attributes, dns_dispatch_t **dispp)
2345 isc_result_t result;
2346 dns_dispatch_t *disp;
2351 REQUIRE(VALID_DISPATCHMGR(mgr));
2352 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2353 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2354 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2356 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */
2361 * dispatch_allocate() checks mgr for us.
2362 * qid_allocate() checks buckets and increment for us.
2365 result = dispatch_allocate(mgr, maxrequests, &disp);
2366 if (result != ISC_R_SUCCESS) {
2371 result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2372 if (result != ISC_R_SUCCESS)
2373 goto deallocate_dispatch;
2375 disp->socktype = isc_sockettype_tcp;
2376 disp->socket = NULL;
2377 isc_socket_attach(sock, &disp->socket);
2380 disp->task[0] = NULL;
2381 result = isc_task_create(taskmgr, 0, &disp->task[0]);
2382 if (result != ISC_R_SUCCESS)
2385 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2386 DNS_EVENT_DISPATCHCONTROL,
2388 sizeof(isc_event_t));
2389 if (disp->ctlevent == NULL) {
2390 result = ISC_R_NOMEMORY;
2394 isc_task_setname(disp->task[0], "tcpdispatch", disp);
2396 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2397 disp->tcpmsg_valid = 1;
2399 disp->attributes = attributes;
2402 * Append it to the dispatcher list.
2404 ISC_LIST_APPEND(mgr->list, disp, link);
2407 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2408 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2412 return (ISC_R_SUCCESS);
2418 isc_task_detach(&disp->task[0]);
2420 isc_socket_detach(&disp->socket);
2421 deallocate_dispatch:
2422 dispatch_free(&disp);
2430 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2431 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2432 unsigned int buffersize,
2433 unsigned int maxbuffers, unsigned int maxrequests,
2434 unsigned int buckets, unsigned int increment,
2435 unsigned int attributes, unsigned int mask,
2436 dns_dispatch_t **dispp)
2438 isc_result_t result;
2439 dns_dispatch_t *disp = NULL;
2441 REQUIRE(VALID_DISPATCHMGR(mgr));
2442 REQUIRE(sockmgr != NULL);
2443 REQUIRE(localaddr != NULL);
2444 REQUIRE(taskmgr != NULL);
2445 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2446 REQUIRE(maxbuffers > 0);
2447 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2448 REQUIRE(increment > buckets);
2449 REQUIRE(dispp != NULL && *dispp == NULL);
2450 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2452 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2453 maxrequests, buckets, increment);
2454 if (result != ISC_R_SUCCESS)
2459 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2460 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2465 * First, see if we have a dispatcher that matches.
2468 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2469 if (result == ISC_R_SUCCESS) {
2472 if (disp->maxrequests < maxrequests)
2473 disp->maxrequests = maxrequests;
2475 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
2476 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2478 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2479 if (disp->recv_pending != 0)
2480 isc_socket_cancel(disp->socket, disp->task[0],
2481 ISC_SOCKCANCEL_RECV);
2484 UNLOCK(&disp->lock);
2489 return (ISC_R_SUCCESS);
2496 result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2497 maxrequests, attributes, &disp);
2498 if (result != ISC_R_SUCCESS) {
2505 return (ISC_R_SUCCESS);
2509 * mgr should be locked.
2512 #ifndef DNS_DISPATCH_HELD
2513 #define DNS_DISPATCH_HELD 20U
2517 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2518 isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2519 isc_socket_t **sockp)
2522 isc_socket_t *held[DNS_DISPATCH_HELD];
2523 isc_sockaddr_t localaddr_bound;
2524 isc_socket_t *sock = NULL;
2525 isc_result_t result = ISC_R_SUCCESS;
2526 isc_boolean_t anyport;
2528 INSIST(sockp != NULL && *sockp == NULL);
2530 localaddr_bound = *localaddr;
2531 anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2534 unsigned int nports;
2538 * If no port is specified, we first try to pick up a random
2539 * port by ourselves.
2541 if (isc_sockaddr_pf(&disp->local) == AF_INET) {
2542 nports = disp->mgr->nv4ports;
2543 ports = disp->mgr->v4ports;
2545 nports = disp->mgr->nv6ports;
2546 ports = disp->mgr->v6ports;
2549 return (ISC_R_ADDRNOTAVAIL);
2551 for (i = 0; i < 1024; i++) {
2554 prt = ports[dispatch_arc4uniformrandom(
2557 isc_sockaddr_setport(&localaddr_bound, prt);
2558 result = open_socket(sockmgr, &localaddr_bound,
2560 if (result == ISC_R_SUCCESS ||
2561 result != ISC_R_ADDRINUSE) {
2562 disp->localport = prt;
2569 * If this fails 1024 times, we then ask the kernel for
2574 memset(held, 0, sizeof(held));
2577 for (j = 0; j < 0xffffU; j++) {
2578 result = open_socket(sockmgr, localaddr, 0, &sock);
2579 if (result != ISC_R_SUCCESS)
2583 else if (portavailable(mgr, sock, NULL))
2585 if (held[i] != NULL)
2586 isc_socket_detach(&held[i]);
2589 if (i == DNS_DISPATCH_HELD)
2593 mgr_log(mgr, ISC_LOG_ERROR,
2594 "avoid-v%s-udp-ports: unable to allocate "
2595 "an available port",
2596 isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2597 result = ISC_R_FAILURE;
2603 for (i = 0; i < DNS_DISPATCH_HELD; i++) {
2604 if (held[i] != NULL)
2605 isc_socket_detach(&held[i]);
2612 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2613 isc_taskmgr_t *taskmgr,
2614 isc_sockaddr_t *localaddr,
2615 unsigned int maxrequests,
2616 unsigned int attributes,
2617 dns_dispatch_t **dispp)
2619 isc_result_t result;
2620 dns_dispatch_t *disp;
2621 isc_socket_t *sock = NULL;
2625 * dispatch_allocate() checks mgr for us.
2628 result = dispatch_allocate(mgr, maxrequests, &disp);
2629 if (result != ISC_R_SUCCESS)
2632 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
2633 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock);
2634 if (result != ISC_R_SUCCESS)
2635 goto deallocate_dispatch;
2637 isc_sockaddr_t sa_any;
2640 * For dispatches using exclusive sockets with a specific
2641 * source address, we only check if the specified address is
2642 * available on the system. Query sockets will be created later
2645 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
2646 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
2647 result = open_socket(sockmgr, localaddr, 0, &sock);
2649 isc_socket_detach(&sock);
2650 if (result != ISC_R_SUCCESS)
2651 goto deallocate_dispatch;
2654 disp->socktype = isc_sockettype_udp;
2655 disp->socket = sock;
2656 disp->local = *localaddr;
2658 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
2659 disp->ntasks = MAX_INTERNAL_TASKS;
2662 for (i = 0; i < disp->ntasks; i++) {
2663 disp->task[i] = NULL;
2664 result = isc_task_create(taskmgr, 0, &disp->task[i]);
2665 if (result != ISC_R_SUCCESS) {
2667 isc_task_destroy(&disp->task[i]);
2670 isc_task_setname(disp->task[i], "udpdispatch", disp);
2673 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2674 DNS_EVENT_DISPATCHCONTROL,
2676 sizeof(isc_event_t));
2677 if (disp->ctlevent == NULL) {
2678 result = ISC_R_NOMEMORY;
2682 attributes &= ~DNS_DISPATCHATTR_TCP;
2683 attributes |= DNS_DISPATCHATTR_UDP;
2684 disp->attributes = attributes;
2687 * Append it to the dispatcher list.
2689 ISC_LIST_APPEND(mgr->list, disp, link);
2691 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
2692 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
2693 if (disp->socket != NULL)
2694 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
2703 for (i = 0; i < disp->ntasks; i++)
2704 isc_task_detach(&disp->task[i]);
2706 if (disp->socket != NULL)
2707 isc_socket_detach(&disp->socket);
2708 deallocate_dispatch:
2709 dispatch_free(&disp);
2715 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
2716 REQUIRE(VALID_DISPATCH(disp));
2717 REQUIRE(dispp != NULL && *dispp == NULL);
2721 UNLOCK(&disp->lock);
2727 * It is important to lock the manager while we are deleting the dispatch,
2728 * since dns_dispatch_getudp will call dispatch_find, which returns to
2729 * the caller a dispatch but does not attach to it until later. _getudp
2730 * locks the manager, however, so locking it here will keep us from attaching
2731 * to a dispatcher that is in the process of going away.
2734 dns_dispatch_detach(dns_dispatch_t **dispp) {
2735 dns_dispatch_t *disp;
2736 dispsocket_t *dispsock;
2737 isc_boolean_t killit;
2739 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2746 INSIST(disp->refcount > 0);
2749 if (disp->refcount == 0) {
2750 if (disp->recv_pending > 0)
2751 isc_socket_cancel(disp->socket, disp->task[0],
2752 ISC_SOCKCANCEL_RECV);
2753 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
2755 dispsock = ISC_LIST_NEXT(dispsock, link)) {
2756 isc_socket_cancel(dispsock->socket, dispsock->task,
2757 ISC_SOCKCANCEL_RECV);
2759 disp->shutting_down = 1;
2762 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
2764 killit = destroy_disp_ok(disp);
2765 UNLOCK(&disp->lock);
2767 isc_task_send(disp->task[0], &disp->ctlevent);
2771 dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
2772 isc_task_t *task, isc_taskaction_t action, void *arg,
2773 dns_messageid_t *idp, dns_dispentry_t **resp,
2774 isc_socketmgr_t *sockmgr)
2776 dns_dispentry_t *res;
2777 unsigned int bucket;
2778 in_port_t localport = 0;
2783 dispsocket_t *dispsocket = NULL;
2784 isc_result_t result;
2786 REQUIRE(VALID_DISPATCH(disp));
2787 REQUIRE(task != NULL);
2788 REQUIRE(dest != NULL);
2789 REQUIRE(resp != NULL && *resp == NULL);
2790 REQUIRE(idp != NULL);
2791 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
2792 REQUIRE(sockmgr != NULL);
2796 if (disp->shutting_down == 1) {
2797 UNLOCK(&disp->lock);
2798 return (ISC_R_SHUTTINGDOWN);
2801 if (disp->requests >= disp->maxrequests) {
2802 UNLOCK(&disp->lock);
2803 return (ISC_R_QUOTA);
2806 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
2807 disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
2808 dispsocket_t *oldestsocket;
2809 dns_dispentry_t *oldestresp;
2810 dns_dispatchevent_t *rev;
2813 * Kill oldest outstanding query if the number of sockets
2814 * exceeds the quota to keep the room for new queries.
2816 oldestsocket = ISC_LIST_HEAD(disp->activesockets);
2817 oldestresp = oldestsocket->resp;
2818 if (oldestresp != NULL && !oldestresp->item_out) {
2819 rev = allocate_event(oldestresp->disp);
2821 rev->buffer.base = NULL;
2822 rev->result = ISC_R_CANCELED;
2823 rev->id = oldestresp->id;
2824 ISC_EVENT_INIT(rev, sizeof(*rev), 0,
2825 NULL, DNS_EVENT_DISPATCH,
2827 oldestresp->arg, oldestresp,
2829 oldestresp->item_out = ISC_TRUE;
2830 isc_task_send(oldestresp->task,
2831 ISC_EVENT_PTR(&rev));
2836 * Move this entry to the tail so that it won't (easily) be
2837 * examined before actually being canceled.
2839 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
2840 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
2843 qid = DNS_QID(disp);
2846 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2848 * Get a separate UDP socket with a random port number.
2850 result = get_dispsocket(disp, dest, sockmgr, qid, &dispsocket,
2852 if (result != ISC_R_SUCCESS) {
2854 UNLOCK(&disp->lock);
2858 localport = disp->localport;
2862 * Try somewhat hard to find an unique ID.
2864 id = (dns_messageid_t)dispatch_arc4random(DISP_ARC4CTX(disp));
2865 bucket = dns_hash(qid, dest, id, localport);
2867 for (i = 0; i < 64; i++) {
2868 if (entry_search(qid, dest, id, localport, bucket) == NULL) {
2872 id += qid->qid_increment;
2874 bucket = dns_hash(qid, dest, id, localport);
2879 UNLOCK(&disp->lock);
2880 return (ISC_R_NOMORE);
2883 res = isc_mempool_get(disp->mgr->rpool);
2886 UNLOCK(&disp->lock);
2887 if (dispsocket != NULL)
2888 destroy_dispsocket(disp, &dispsocket);
2889 return (ISC_R_NOMEMORY);
2895 isc_task_attach(task, &res->task);
2898 res->port = localport;
2899 res->bucket = bucket;
2901 res->action = action;
2903 res->dispsocket = dispsocket;
2904 if (dispsocket != NULL)
2905 dispsocket->resp = res;
2906 res->item_out = ISC_FALSE;
2907 ISC_LIST_INIT(res->items);
2908 ISC_LINK_INIT(res, link);
2909 res->magic = RESPONSE_MAGIC;
2910 ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
2913 request_log(disp, res, LVL(90),
2914 "attached to task %p", res->task);
2916 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
2917 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
2918 result = startrecv(disp, dispsocket);
2919 if (result != ISC_R_SUCCESS) {
2921 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
2924 if (dispsocket != NULL)
2925 destroy_dispsocket(disp, &dispsocket);
2930 UNLOCK(&disp->lock);
2931 isc_task_detach(&res->task);
2932 isc_mempool_put(disp->mgr->rpool, res);
2937 if (dispsocket != NULL)
2938 ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
2940 UNLOCK(&disp->lock);
2945 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
2946 INSIST(res->dispsocket != NULL);
2948 return (ISC_R_SUCCESS);
2952 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
2953 isc_task_t *task, isc_taskaction_t action, void *arg,
2954 dns_messageid_t *idp, dns_dispentry_t **resp)
2956 REQUIRE(VALID_DISPATCH(disp));
2957 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
2959 return (dns_dispatch_addresponse2(disp, dest, task, action, arg,
2964 dns_dispatch_starttcp(dns_dispatch_t *disp) {
2966 REQUIRE(VALID_DISPATCH(disp));
2968 dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
2971 disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
2972 (void)startrecv(disp, NULL);
2973 UNLOCK(&disp->lock);
2977 dns_dispatch_removeresponse(dns_dispentry_t **resp,
2978 dns_dispatchevent_t **sockevent)
2980 dns_dispatchmgr_t *mgr;
2981 dns_dispatch_t *disp;
2982 dns_dispentry_t *res;
2983 dispsocket_t *dispsock;
2984 dns_dispatchevent_t *ev;
2985 unsigned int bucket;
2986 isc_boolean_t killit;
2988 isc_eventlist_t events;
2991 REQUIRE(resp != NULL);
2992 REQUIRE(VALID_RESPONSE(*resp));
2998 REQUIRE(VALID_DISPATCH(disp));
3000 REQUIRE(VALID_DISPATCHMGR(mgr));
3002 qid = DNS_QID(disp);
3004 if (sockevent != NULL) {
3005 REQUIRE(*sockevent != NULL);
3014 INSIST(disp->requests > 0);
3016 INSIST(disp->refcount > 0);
3019 if (disp->refcount == 0) {
3020 if (disp->recv_pending > 0)
3021 isc_socket_cancel(disp->socket, disp->task[0],
3022 ISC_SOCKCANCEL_RECV);
3023 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3025 dispsock = ISC_LIST_NEXT(dispsock, link)) {
3026 isc_socket_cancel(dispsock->socket, dispsock->task,
3027 ISC_SOCKCANCEL_RECV);
3029 disp->shutting_down = 1;
3032 bucket = res->bucket;
3035 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3038 if (ev == NULL && res->item_out) {
3040 * We've posted our event, but the caller hasn't gotten it
3041 * yet. Take it back.
3043 ISC_LIST_INIT(events);
3044 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3047 * We had better have gotten it back.
3050 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3054 REQUIRE(res->item_out == ISC_TRUE);
3055 res->item_out = ISC_FALSE;
3056 if (ev->buffer.base != NULL)
3057 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3058 free_event(disp, ev);
3061 request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3062 isc_task_detach(&res->task);
3064 if (res->dispsocket != NULL) {
3065 isc_socket_cancel(res->dispsocket->socket,
3066 res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3067 res->dispsocket->resp = NULL;
3071 * Free any buffered requests as well
3073 ev = ISC_LIST_HEAD(res->items);
3074 while (ev != NULL) {
3075 ISC_LIST_UNLINK(res->items, ev, ev_link);
3076 if (ev->buffer.base != NULL)
3077 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3078 free_event(disp, ev);
3079 ev = ISC_LIST_HEAD(res->items);
3082 isc_mempool_put(disp->mgr->rpool, res);
3083 if (disp->shutting_down == 1)
3086 (void)startrecv(disp, NULL);
3088 killit = destroy_disp_ok(disp);
3089 UNLOCK(&disp->lock);
3091 isc_task_send(disp->task[0], &disp->ctlevent);
3095 do_cancel(dns_dispatch_t *disp) {
3096 dns_dispatchevent_t *ev;
3097 dns_dispentry_t *resp;
3100 if (disp->shutdown_out == 1)
3103 qid = DNS_QID(disp);
3106 * Search for the first response handler without packets outstanding
3107 * unless a specific hander is given.
3110 for (resp = linear_first(qid);
3111 resp != NULL && resp->item_out;
3113 resp = linear_next(qid, resp);
3116 * No one to send the cancel event to, so nothing to do.
3122 * Send the shutdown failsafe event to this resp.
3124 ev = disp->failsafe_ev;
3125 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3126 resp->action, resp->arg, resp, NULL, NULL);
3127 ev->result = disp->shutdown_why;
3128 ev->buffer.base = NULL;
3129 ev->buffer.length = 0;
3130 disp->shutdown_out = 1;
3131 request_log(disp, resp, LVL(10),
3132 "cancel: failsafe event %p -> task %p",
3134 resp->item_out = ISC_TRUE;
3135 isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3141 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3142 REQUIRE(VALID_DISPATCH(disp));
3144 return (disp->socket);
3148 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3149 REQUIRE(VALID_RESPONSE(resp));
3151 if (resp->dispsocket != NULL)
3152 return (resp->dispsocket->socket);
3158 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3160 REQUIRE(VALID_DISPATCH(disp));
3161 REQUIRE(addrp != NULL);
3163 if (disp->socktype == isc_sockettype_udp) {
3164 *addrp = disp->local;
3165 return (ISC_R_SUCCESS);
3167 return (ISC_R_NOTIMPLEMENTED);
3171 dns_dispatch_cancel(dns_dispatch_t *disp) {
3172 REQUIRE(VALID_DISPATCH(disp));
3176 if (disp->shutting_down == 1) {
3177 UNLOCK(&disp->lock);
3181 disp->shutdown_why = ISC_R_CANCELED;
3182 disp->shutting_down = 1;
3185 UNLOCK(&disp->lock);
3191 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3192 REQUIRE(VALID_DISPATCH(disp));
3195 * We don't bother locking disp here; it's the caller's responsibility
3196 * to use only non volatile flags.
3198 return (disp->attributes);
3202 dns_dispatch_changeattributes(dns_dispatch_t *disp,
3203 unsigned int attributes, unsigned int mask)
3205 REQUIRE(VALID_DISPATCH(disp));
3206 /* Exclusive attribute can only be set on creation */
3207 REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3208 /* Also, a dispatch with randomport specified cannot start listening */
3209 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3210 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3213 * Should check for valid attributes here!
3218 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3219 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3220 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3221 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3222 (void)startrecv(disp, NULL);
3223 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3225 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3226 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3227 if (disp->recv_pending != 0)
3228 isc_socket_cancel(disp->socket, disp->task[0],
3229 ISC_SOCKCANCEL_RECV);
3233 disp->attributes &= ~mask;
3234 disp->attributes |= (attributes & mask);
3235 UNLOCK(&disp->lock);
3239 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3241 isc_socketevent_t *sevent, *newsevent;
3243 REQUIRE(VALID_DISPATCH(disp));
3244 REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3245 REQUIRE(event != NULL);
3247 sevent = (isc_socketevent_t *)event;
3249 INSIST(sevent->n <= disp->mgr->buffersize);
3250 newsevent = (isc_socketevent_t *)
3251 isc_event_allocate(disp->mgr->mctx, NULL,
3252 DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3253 disp, sizeof(isc_socketevent_t));
3254 if (newsevent == NULL)
3257 buf = allocate_udp_buffer(disp);
3259 isc_event_free(ISC_EVENT_PTR(&newsevent));
3262 memcpy(buf, sevent->region.base, sevent->n);
3263 newsevent->region.base = buf;
3264 newsevent->region.length = disp->mgr->buffersize;
3265 newsevent->n = sevent->n;
3266 newsevent->result = sevent->result;
3267 newsevent->address = sevent->address;
3268 newsevent->timestamp = sevent->timestamp;
3269 newsevent->pktinfo = sevent->pktinfo;
3270 newsevent->attributes = sevent->attributes;
3272 isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3277 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3278 dns_dispatch_t *disp;
3281 disp = ISC_LIST_HEAD(mgr->list);
3282 while (disp != NULL) {
3283 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3284 printf("\tdispatch %p, addr %s\n", disp, foo);
3285 disp = ISC_LIST_NEXT(disp, link);