2 * Copyright (C) 2004-2009, 2011-2013 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: dispatch.c,v 1.175 2011/11/29 01:03:47 marka Exp $ */
25 #include <sys/types.h>
29 #include <isc/entropy.h>
31 #include <isc/mutex.h>
32 #include <isc/portset.h>
33 #include <isc/print.h>
34 #include <isc/random.h>
35 #include <isc/socket.h>
36 #include <isc/stats.h>
37 #include <isc/string.h>
43 #include <dns/dispatch.h>
44 #include <dns/events.h>
46 #include <dns/message.h>
47 #include <dns/portlist.h>
48 #include <dns/stats.h>
49 #include <dns/tcpmsg.h>
50 #include <dns/types.h>
52 typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
54 typedef struct dispsocket dispsocket_t;
55 typedef ISC_LIST(dispsocket_t) dispsocketlist_t;
57 typedef struct dispportentry dispportentry_t;
58 typedef ISC_LIST(dispportentry_t) dispportlist_t;
60 /* ARC4 Random generator state */
61 typedef struct arc4ctx {
66 isc_entropy_t *entropy; /*%< entropy source for ARC4 */
70 typedef struct dns_qid {
72 unsigned int qid_nbuckets; /*%< hash table size */
73 unsigned int qid_increment; /*%< id increment on collision */
75 dns_displist_t *qid_table; /*%< the table itself */
76 dispsocketlist_t *sock_table; /*%< socket table */
79 struct dns_dispatchmgr {
84 dns_portlist_t *portlist;
86 isc_entropy_t *entropy; /*%< entropy source */
88 /* Locked by "lock". */
91 ISC_LIST(dns_dispatch_t) list;
93 /* Locked by arc4_lock. */
94 isc_mutex_t arc4_lock;
95 arc4ctx_t arc4ctx; /*%< ARC4 context for QID */
97 /* locked by buffer lock */
99 isc_mutex_t buffer_lock;
100 unsigned int buffers; /*%< allocated buffers */
101 unsigned int buffersize; /*%< size of each buffer */
102 unsigned int maxbuffers; /*%< max buffers */
104 /* Locked internally. */
105 isc_mutex_t depool_lock;
106 isc_mempool_t *depool; /*%< pool for dispatch events */
107 isc_mutex_t rpool_lock;
108 isc_mempool_t *rpool; /*%< pool for replies */
109 isc_mutex_t dpool_lock;
110 isc_mempool_t *dpool; /*%< dispatch allocations */
111 isc_mutex_t bpool_lock;
112 isc_mempool_t *bpool; /*%< pool for buffers */
113 isc_mutex_t spool_lock;
114 isc_mempool_t *spool; /*%< pool for dispsocks */
117 * Locked by qid->lock if qid exists; otherwise, can be used without
119 * Memory footprint considerations: this is a simple implementation of
120 * available ports, i.e., an ordered array of the actual port numbers.
121 * This will require about 256KB of memory in the worst case (128KB for
122 * each of IPv4 and IPv6). We could reduce it by representing it as a
123 * more sophisticated way such as a list (or array) of ranges that are
124 * searched to identify a specific port. Our decision here is the saved
125 * memory isn't worth the implementation complexity, considering the
126 * fact that the whole BIND9 process (which is mainly named) already
127 * requires a pretty large memory footprint. We may, however, have to
128 * revisit the decision when we want to use it as a separate module for
129 * an environment where memory requirement is severer.
131 in_port_t *v4ports; /*%< available ports for IPv4 */
132 unsigned int nv4ports; /*%< # of available ports for IPv4 */
133 in_port_t *v6ports; /*%< available ports for IPv4 */
134 unsigned int nv6ports; /*%< # of available ports for IPv4 */
137 #define MGR_SHUTTINGDOWN 0x00000001U
138 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
140 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
142 struct dns_dispentry {
144 dns_dispatch_t *disp;
150 isc_taskaction_t action;
152 isc_boolean_t item_out;
153 dispsocket_t *dispsocket;
154 ISC_LIST(dns_dispatchevent_t) items;
155 ISC_LINK(dns_dispentry_t) link;
159 * Maximum number of dispatch sockets that can be pooled for reuse. The
160 * appropriate value may vary, but experiments have shown a busy caching server
161 * may need more than 1000 sockets concurrently opened. The maximum allowable
162 * number of dispatch sockets (per manager) will be set to the double of this
165 #ifndef DNS_DISPATCH_POOLSOCKS
166 #define DNS_DISPATCH_POOLSOCKS 2048
170 * Quota to control the number of dispatch sockets. If a dispatch has more
171 * than the quota of sockets, new queries will purge oldest ones, so that
172 * a massive number of outstanding queries won't prevent subsequent queries
173 * (especially if the older ones take longer time and result in timeout).
175 #ifndef DNS_DISPATCH_SOCKSQUOTA
176 #define DNS_DISPATCH_SOCKSQUOTA 3072
181 isc_socket_t *socket;
182 dns_dispatch_t *disp;
184 in_port_t localport; /* XXX: should be removed later */
185 dispportentry_t *portentry;
186 dns_dispentry_t *resp;
188 ISC_LINK(dispsocket_t) link;
190 ISC_LINK(dispsocket_t) blink;
194 * A port table entry. We remember every port we first open in a table with a
195 * reference counter so that we can 'reuse' the same port (with different
196 * destination addresses) using the SO_REUSEADDR socket option.
198 struct dispportentry {
201 ISC_LINK(struct dispportentry) link;
204 #ifndef DNS_DISPATCH_PORTTABLESIZE
205 #define DNS_DISPATCH_PORTTABLESIZE 1024
208 #define INVALID_BUCKET (0xffffdead)
211 * Number of tasks for each dispatch that use separate sockets for different
212 * transactions. This must be a power of 2 as it will divide 32 bit numbers
213 * to get an uniformly random tasks selection. See get_dispsocket().
215 #define MAX_INTERNAL_TASKS 64
217 struct dns_dispatch {
219 unsigned int magic; /*%< magic */
220 dns_dispatchmgr_t *mgr; /*%< dispatch manager */
223 * internal task buckets. We use multiple tasks to distribute various
224 * socket events well when using separate dispatch sockets. We use the
225 * 1st task (task[0]) for internal control events.
227 isc_task_t *task[MAX_INTERNAL_TASKS];
228 isc_socket_t *socket; /*%< isc socket attached to */
229 isc_sockaddr_t local; /*%< local address */
230 in_port_t localport; /*%< local UDP port */
231 unsigned int maxrequests; /*%< max requests */
232 isc_event_t *ctlevent;
234 isc_mutex_t sepool_lock;
235 isc_mempool_t *sepool; /*%< pool for socket events */
237 /*% Locked by mgr->lock. */
238 ISC_LINK(dns_dispatch_t) link;
240 /* Locked by "lock". */
241 isc_mutex_t lock; /*%< locks all below */
242 isc_sockettype_t socktype;
243 unsigned int attributes;
244 unsigned int refcount; /*%< number of users */
245 dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */
246 unsigned int shutting_down : 1,
250 recv_pending : 1; /*%< is a recv() pending? */
251 isc_result_t shutdown_why;
252 ISC_LIST(dispsocket_t) activesockets;
253 ISC_LIST(dispsocket_t) inactivesockets;
254 unsigned int nsockets;
255 unsigned int requests; /*%< how many requests we have */
256 unsigned int tcpbuffers; /*%< allocated buffers */
257 dns_tcpmsg_t tcpmsg; /*%< for tcp streams */
259 arc4ctx_t arc4ctx; /*%< for QID/UDP port num */
260 dispportlist_t *port_table; /*%< hold ports 'owned' by us */
261 isc_mempool_t *portpool; /*%< port table entries */
264 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
265 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
267 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
268 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
270 #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
271 #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
273 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
274 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
276 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
277 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
279 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
280 (disp)->qid : (disp)->mgr->qid
281 #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
282 (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
285 * Locking a query port buffer is a bit tricky. We access the buffer without
286 * locking until qid is created. Technically, there is a possibility of race
287 * between the creation of qid and access to the port buffer; in practice,
288 * however, this should be safe because qid isn't created until the first
289 * dispatch is created and there should be no contending situation until then.
291 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
292 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
297 static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
298 dns_messageid_t, in_port_t, unsigned int);
299 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
300 static void destroy_disp(isc_task_t *task, isc_event_t *event);
301 static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
302 static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
303 static void udp_exrecv(isc_task_t *, isc_event_t *);
304 static void udp_shrecv(isc_task_t *, isc_event_t *);
305 static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
306 static void tcp_recv(isc_task_t *, isc_event_t *);
307 static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
308 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
310 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
311 static void *allocate_udp_buffer(dns_dispatch_t *disp);
312 static inline void free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
313 static inline dns_dispatchevent_t *allocate_devent(dns_dispatch_t *disp);
314 static void do_cancel(dns_dispatch_t *disp);
315 static dns_dispentry_t *linear_first(dns_qid_t *disp);
316 static dns_dispentry_t *linear_next(dns_qid_t *disp,
317 dns_dispentry_t *resp);
318 static void dispatch_free(dns_dispatch_t **dispp);
319 static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
320 dns_dispatch_t *disp,
321 isc_socketmgr_t *sockmgr,
322 isc_sockaddr_t *localaddr,
323 isc_socket_t **sockp,
324 isc_socket_t *dup_socket);
325 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
326 isc_socketmgr_t *sockmgr,
327 isc_taskmgr_t *taskmgr,
328 isc_sockaddr_t *localaddr,
329 unsigned int maxrequests,
330 unsigned int attributes,
331 dns_dispatch_t **dispp,
332 isc_socket_t *dup_socket);
333 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
334 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
335 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
336 unsigned int increment, dns_qid_t **qidp,
337 isc_boolean_t needaddrtable);
338 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
339 static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
340 unsigned int options, isc_socket_t **sockp,
341 isc_socket_t *dup_socket);
342 static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
343 isc_sockaddr_t *sockaddrp);
345 #define LVL(x) ISC_LOG_DEBUG(x)
348 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
349 ISC_FORMAT_PRINTF(3, 4);
352 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
356 if (! isc_log_wouldlog(dns_lctx, level))
360 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
363 isc_log_write(dns_lctx,
364 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
365 level, "dispatchmgr %p: %s", mgr, msgbuf);
369 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
370 if (mgr->stats != NULL)
371 isc_stats_increment(mgr->stats, counter);
375 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
376 ISC_FORMAT_PRINTF(3, 4);
379 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
383 if (! isc_log_wouldlog(dns_lctx, level))
387 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
390 isc_log_write(dns_lctx,
391 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
392 level, "dispatch %p: %s", disp, msgbuf);
396 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
397 int level, const char *fmt, ...)
398 ISC_FORMAT_PRINTF(4, 5);
401 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
402 int level, const char *fmt, ...)
408 if (! isc_log_wouldlog(dns_lctx, level))
412 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
415 if (VALID_RESPONSE(resp)) {
416 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
417 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
418 DNS_LOGMODULE_DISPATCH, level,
419 "dispatch %p response %p %s: %s", disp, resp,
422 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
423 DNS_LOGMODULE_DISPATCH, level,
424 "dispatch %p req/resp %p: %s", disp, resp,
430 * ARC4 random number generator derived from OpenBSD.
431 * Only dispatch_random() and dispatch_uniformrandom() are expected
432 * to be called from general dispatch routines; the rest of them are subroutines
435 * The original copyright follows:
436 * Copyright (c) 1996, David Mazieres <dm@uun.org>
437 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
439 * Permission to use, copy, modify, and distribute this software for any
440 * purpose with or without fee is hereby granted, provided that the above
441 * copyright notice and this permission notice appear in all copies.
443 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
444 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
445 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
446 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
447 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
448 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
449 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
453 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
457 for (n = 0; n < 256; n++)
462 actx->entropy = entropy; /* don't have to attach */
467 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
472 for (n = 0; n < 256; n++) {
473 actx->i = (actx->i + 1);
474 si = actx->s[actx->i];
475 actx->j = (actx->j + si + dat[n % datlen]);
476 actx->s[actx->i] = actx->s[actx->j];
477 actx->s[actx->j] = si;
482 static inline isc_uint8_t
483 dispatch_arc4get8(arc4ctx_t *actx) {
486 actx->i = (actx->i + 1);
487 si = actx->s[actx->i];
488 actx->j = (actx->j + si);
489 sj = actx->s[actx->j];
490 actx->s[actx->i] = sj;
491 actx->s[actx->j] = si;
493 return (actx->s[(si + sj) & 0xff]);
496 static inline isc_uint16_t
497 dispatch_arc4get16(arc4ctx_t *actx) {
500 val = dispatch_arc4get8(actx) << 8;
501 val |= dispatch_arc4get8(actx);
507 dispatch_arc4stir(arc4ctx_t *actx) {
510 unsigned char rnd[128];
511 isc_uint32_t rnd32[32];
515 if (actx->entropy != NULL) {
517 * We accept any quality of random data to avoid blocking.
519 result = isc_entropy_getdata(actx->entropy, rnd.rnd,
520 sizeof(rnd), NULL, 0);
521 RUNTIME_CHECK(result == ISC_R_SUCCESS);
523 for (i = 0; i < 32; i++)
524 isc_random_get(&rnd.rnd32[i]);
526 dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
529 * Discard early keystream, as per recommendations in:
530 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
532 for (i = 0; i < 256; i++)
533 (void)dispatch_arc4get8(actx);
536 * Derived from OpenBSD's implementation. The rationale is not clear,
537 * but should be conservative enough in safety, and reasonably large
540 actx->count = 1600000;
544 dispatch_random(arc4ctx_t *actx) {
547 if (actx->lock != NULL)
550 actx->count -= sizeof(isc_uint16_t);
551 if (actx->count <= 0)
552 dispatch_arc4stir(actx);
553 result = dispatch_arc4get16(actx);
555 if (actx->lock != NULL)
562 * For general purpose library, we don't have to be too strict about the
563 * quality of random values. Performance doesn't matter much, either.
564 * So we simply use the isc_random module to keep the library as small as
569 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
580 dispatch_random(arc4ctx_t *actx) {
591 dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
598 * Ensure the range of random numbers [min, 0xffff] be a multiple of
599 * upper_bound and contain at least a half of the 16 bit range.
602 if (upper_bound > 0x8000)
603 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
605 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
608 * This could theoretically loop forever but each retry has
609 * p > 0.5 (worst case, usually far better) of selecting a
610 * number inside the range we need, so it should rarely need
614 r = dispatch_random(actx);
619 return (r % upper_bound);
623 * Return a hash of the destination and message id.
626 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
631 ret = isc_sockaddr_hash(dest, ISC_TRUE);
632 ret ^= (id << 16) | port;
633 ret %= qid->qid_nbuckets;
635 INSIST(ret < qid->qid_nbuckets);
641 * Find the first entry in 'qid'. Returns NULL if there are no entries.
643 static dns_dispentry_t *
644 linear_first(dns_qid_t *qid) {
645 dns_dispentry_t *ret;
650 while (bucket < qid->qid_nbuckets) {
651 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
661 * Find the next entry after 'resp' in 'qid'. Return NULL if there are
664 static dns_dispentry_t *
665 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
666 dns_dispentry_t *ret;
669 ret = ISC_LIST_NEXT(resp, link);
673 bucket = resp->bucket;
675 while (bucket < qid->qid_nbuckets) {
676 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
686 * The dispatch must be locked.
689 destroy_disp_ok(dns_dispatch_t *disp)
691 if (disp->refcount != 0)
694 if (disp->recv_pending != 0)
697 if (!ISC_LIST_EMPTY(disp->activesockets))
700 if (disp->shutting_down == 0)
707 * Called when refcount reaches 0 (and safe to destroy).
709 * The dispatcher must not be locked.
710 * The manager must be locked.
713 destroy_disp(isc_task_t *task, isc_event_t *event) {
714 dns_dispatch_t *disp;
715 dns_dispatchmgr_t *mgr;
716 isc_boolean_t killmgr;
717 dispsocket_t *dispsocket;
720 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
724 disp = event->ev_arg;
728 ISC_LIST_UNLINK(mgr->list, disp, link);
730 dispatch_log(disp, LVL(90),
731 "shutting down; detaching from sock %p, task %p",
732 disp->socket, disp->task[0]); /* XXXX */
734 if (disp->sepool != NULL) {
735 isc_mempool_destroy(&disp->sepool);
736 (void)isc_mutex_destroy(&disp->sepool_lock);
739 if (disp->socket != NULL)
740 isc_socket_detach(&disp->socket);
741 while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
742 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
743 destroy_dispsocket(disp, &dispsocket);
745 for (i = 0; i < disp->ntasks; i++)
746 isc_task_detach(&disp->task[i]);
747 isc_event_free(&event);
749 dispatch_free(&disp);
751 killmgr = destroy_mgr_ok(mgr);
758 * Manipulate port table per dispatch: find an entry for a given port number,
759 * create a new entry, and decrement a given entry with possible clean-up.
761 static dispportentry_t *
762 port_search(dns_dispatch_t *disp, in_port_t port) {
763 dispportentry_t *portentry;
765 REQUIRE(disp->port_table != NULL);
767 portentry = ISC_LIST_HEAD(disp->port_table[port %
768 DNS_DISPATCH_PORTTABLESIZE]);
769 while (portentry != NULL) {
770 if (portentry->port == port)
772 portentry = ISC_LIST_NEXT(portentry, link);
778 static dispportentry_t *
779 new_portentry(dns_dispatch_t *disp, in_port_t port) {
780 dispportentry_t *portentry;
782 REQUIRE(disp->port_table != NULL);
784 portentry = isc_mempool_get(disp->portpool);
785 if (portentry == NULL)
788 portentry->port = port;
790 ISC_LINK_INIT(portentry, link);
791 ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
798 * The caller must not hold the qid->lock.
801 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
802 dispportentry_t *portentry = *portentryp;
803 isc_boolean_t unlink = ISC_FALSE;
806 REQUIRE(disp->port_table != NULL);
807 REQUIRE(portentry != NULL && portentry->refs > 0);
812 unlink = ISC_TF(portentry->refs == 0);
816 ISC_LIST_UNLINK(disp->port_table[portentry->port %
817 DNS_DISPATCH_PORTTABLESIZE],
819 isc_mempool_put(disp->portpool, portentry);
826 * Find a dispsocket for socket address 'dest', and port number 'port'.
827 * Return NULL if no such entry exists.
829 static dispsocket_t *
830 socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
833 dispsocket_t *dispsock;
835 REQUIRE(bucket < qid->qid_nbuckets);
837 dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
839 while (dispsock != NULL) {
840 if (dispsock->portentry != NULL &&
841 dispsock->portentry->port == port &&
842 isc_sockaddr_equal(dest, &dispsock->host))
844 dispsock = ISC_LIST_NEXT(dispsock, blink);
851 * Make a new socket for a single dispatch with a random port number.
852 * The caller must hold the disp->lock
855 get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
856 isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp,
861 dns_dispatchmgr_t *mgr = disp->mgr;
862 isc_socket_t *sock = NULL;
863 isc_result_t result = ISC_R_FAILURE;
865 isc_sockaddr_t localaddr;
866 unsigned int bucket = 0;
867 dispsocket_t *dispsock;
870 unsigned int bindoptions;
871 dispportentry_t *portentry = NULL;
874 if (isc_sockaddr_pf(&disp->local) == AF_INET) {
875 nports = disp->mgr->nv4ports;
876 ports = disp->mgr->v4ports;
878 nports = disp->mgr->nv6ports;
879 ports = disp->mgr->v6ports;
882 return (ISC_R_ADDRNOTAVAIL);
884 dispsock = ISC_LIST_HEAD(disp->inactivesockets);
885 if (dispsock != NULL) {
886 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
887 sock = dispsock->socket;
888 dispsock->socket = NULL;
890 dispsock = isc_mempool_get(mgr->spool);
891 if (dispsock == NULL)
892 return (ISC_R_NOMEMORY);
895 dispsock->socket = NULL;
896 dispsock->disp = disp;
897 dispsock->resp = NULL;
898 dispsock->portentry = NULL;
900 dispsock->task = NULL;
901 isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
902 ISC_LINK_INIT(dispsock, link);
903 ISC_LINK_INIT(dispsock, blink);
904 dispsock->magic = DISPSOCK_MAGIC;
908 * Pick up a random UDP port and open a new socket with it. Avoid
909 * choosing ports that share the same destination because it will be
910 * very likely to fail in bind(2) or connect(2).
912 localaddr = disp->local;
915 for (i = 0; i < 64; i++) {
916 port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
918 isc_sockaddr_setport(&localaddr, port);
921 bucket = dns_hash(qid, dest, 0, port);
922 if (socket_search(qid, dest, port, bucket) != NULL) {
928 portentry = port_search(disp, port);
930 if (portentry != NULL)
931 bindoptions |= ISC_SOCKET_REUSEADDRESS;
932 result = open_socket(sockmgr, &localaddr, bindoptions, &sock,
934 if (result == ISC_R_SUCCESS) {
935 if (portentry == NULL) {
936 portentry = new_portentry(disp, port);
937 if (portentry == NULL) {
938 result = ISC_R_NOMEMORY;
944 } else if (result == ISC_R_NOPERM) {
945 char buf[ISC_SOCKADDR_FORMATSIZE];
946 isc_sockaddr_format(&localaddr, buf, sizeof(buf));
947 dispatch_log(disp, ISC_LOG_WARNING,
948 "open_socket(%s) -> %s: continuing",
949 buf, isc_result_totext(result));
950 } else if (result != ISC_R_ADDRINUSE)
954 if (result == ISC_R_SUCCESS) {
955 dispsock->socket = sock;
956 dispsock->host = *dest;
957 dispsock->portentry = portentry;
958 dispsock->bucket = bucket;
960 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
962 *dispsockp = dispsock;
966 * We could keep it in the inactive list, but since this should
967 * be an exceptional case and might be resource shortage, we'd
971 isc_socket_detach(&sock);
972 destroy_dispsocket(disp, &dispsock);
979 * Destroy a dedicated dispatch socket.
982 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
983 dispsocket_t *dispsock;
987 * The dispatch must be locked.
990 REQUIRE(dispsockp != NULL && *dispsockp != NULL);
991 dispsock = *dispsockp;
992 REQUIRE(!ISC_LINK_LINKED(dispsock, link));
996 if (dispsock->portentry != NULL)
997 deref_portentry(disp, &dispsock->portentry);
998 if (dispsock->socket != NULL)
999 isc_socket_detach(&dispsock->socket);
1000 if (ISC_LINK_LINKED(dispsock, blink)) {
1001 qid = DNS_QID(disp);
1003 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1007 if (dispsock->task != NULL)
1008 isc_task_detach(&dispsock->task);
1009 isc_mempool_put(disp->mgr->spool, dispsock);
1015 * Deactivate a dedicated dispatch socket. Move it to the inactive list for
1016 * future reuse unless the total number of sockets are exceeding the maximum.
1019 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1020 isc_result_t result;
1024 * The dispatch must be locked.
1026 ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
1027 if (dispsock->resp != NULL) {
1028 INSIST(dispsock->resp->dispsocket == dispsock);
1029 dispsock->resp->dispsocket = NULL;
1032 INSIST(dispsock->portentry != NULL);
1033 deref_portentry(disp, &dispsock->portentry);
1036 if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
1037 destroy_dispsocket(disp, &dispsock);
1039 result = isc_socket_close(dispsock->socket);
1041 qid = DNS_QID(disp);
1043 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1047 if (result == ISC_R_SUCCESS)
1048 ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
1051 * If the underlying system does not allow this
1052 * optimization, destroy this temporary structure (and
1053 * create a new one for a new transaction).
1055 INSIST(result == ISC_R_NOTIMPLEMENTED);
1056 destroy_dispsocket(disp, &dispsock);
1060 /* This kind of optimization isn't necessary for normal use */
1064 destroy_dispsocket(disp, &dispsock);
1069 * Find an entry for query ID 'id', socket address 'dest', and port number
1071 * Return NULL if no such entry exists.
1073 static dns_dispentry_t *
1074 entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
1075 in_port_t port, unsigned int bucket)
1077 dns_dispentry_t *res;
1079 REQUIRE(bucket < qid->qid_nbuckets);
1081 res = ISC_LIST_HEAD(qid->qid_table[bucket]);
1083 while (res != NULL) {
1084 if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1085 res->port == port) {
1088 res = ISC_LIST_NEXT(res, link);
1095 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1096 isc_mempool_t *bpool;
1097 INSIST(buf != NULL && len != 0);
1100 switch (disp->socktype) {
1101 case isc_sockettype_tcp:
1102 INSIST(disp->tcpbuffers > 0);
1104 isc_mem_put(disp->mgr->mctx, buf, len);
1106 case isc_sockettype_udp:
1107 LOCK(&disp->mgr->buffer_lock);
1108 INSIST(disp->mgr->buffers > 0);
1109 INSIST(len == disp->mgr->buffersize);
1110 disp->mgr->buffers--;
1111 bpool = disp->mgr->bpool;
1112 UNLOCK(&disp->mgr->buffer_lock);
1113 isc_mempool_put(bpool, buf);
1122 allocate_udp_buffer(dns_dispatch_t *disp) {
1123 isc_mempool_t *bpool;
1126 LOCK(&disp->mgr->buffer_lock);
1127 bpool = disp->mgr->bpool;
1128 disp->mgr->buffers++;
1129 UNLOCK(&disp->mgr->buffer_lock);
1131 temp = isc_mempool_get(bpool);
1134 LOCK(&disp->mgr->buffer_lock);
1135 disp->mgr->buffers--;
1136 UNLOCK(&disp->mgr->buffer_lock);
1143 free_sevent(isc_event_t *ev) {
1144 isc_mempool_t *pool = ev->ev_destroy_arg;
1145 isc_socketevent_t *sev = (isc_socketevent_t *) ev;
1146 isc_mempool_put(pool, sev);
1149 static inline isc_socketevent_t *
1150 allocate_sevent(dns_dispatch_t *disp, isc_socket_t *socket,
1151 isc_eventtype_t type, isc_taskaction_t action, const void *arg)
1153 isc_socketevent_t *ev;
1156 ev = isc_mempool_get(disp->sepool);
1159 DE_CONST(arg, deconst_arg);
1160 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type,
1161 action, deconst_arg, socket,
1162 free_sevent, disp->sepool);
1163 ev->result = ISC_R_UNSET;
1164 ISC_LINK_INIT(ev, ev_link);
1165 ISC_LIST_INIT(ev->bufferlist);
1166 ev->region.base = NULL;
1176 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1177 if (disp->failsafe_ev == ev) {
1178 INSIST(disp->shutdown_out == 1);
1179 disp->shutdown_out = 0;
1184 isc_mempool_put(disp->mgr->depool, ev);
1187 static inline dns_dispatchevent_t *
1188 allocate_devent(dns_dispatch_t *disp) {
1189 dns_dispatchevent_t *ev;
1191 ev = isc_mempool_get(disp->mgr->depool);
1194 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1195 NULL, NULL, NULL, NULL, NULL);
1201 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1202 dispsocket_t *dispsock = ev->ev_arg;
1206 REQUIRE(VALID_DISPSOCK(dispsock));
1207 udp_recv(ev, dispsock->disp, dispsock);
1211 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1212 dns_dispatch_t *disp = ev->ev_arg;
1216 REQUIRE(VALID_DISPATCH(disp));
1217 udp_recv(ev, disp, NULL);
1223 * If I/O result == CANCELED or error, free the buffer.
1225 * If query, free the buffer, restart.
1228 * Allocate event, fill in details.
1229 * If cannot allocate, free buffer, restart.
1230 * find target. If not found, free buffer, restart.
1231 * if event queue is not empty, queue. else, send.
1235 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1236 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1239 isc_buffer_t source;
1241 dns_dispentry_t *resp = NULL;
1242 dns_dispatchevent_t *rev;
1243 unsigned int bucket;
1244 isc_boolean_t killit;
1245 isc_boolean_t queue_response;
1246 dns_dispatchmgr_t *mgr;
1248 isc_netaddr_t netaddr;
1251 isc_boolean_t qidlocked = ISC_FALSE;
1258 dispatch_log(disp, LVL(90),
1259 "got packet: requests %d, buffers %d, recvs %d",
1260 disp->requests, disp->mgr->buffers, disp->recv_pending);
1262 if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1264 * Unless the receive event was imported from a listening
1265 * interface, in which case the event type is
1266 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1268 INSIST(disp->recv_pending != 0);
1269 disp->recv_pending = 0;
1272 if (dispsock != NULL &&
1273 (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1275 * dispsock->resp can be NULL if this transaction was canceled
1276 * just after receiving a response. Since this socket is
1277 * exclusively used and there should be at most one receive
1278 * event the canceled event should have been no effect. So
1279 * we can (and should) deactivate the socket right now.
1281 deactivate_dispsocket(disp, dispsock);
1285 if (disp->shutting_down) {
1287 * This dispatcher is shutting down.
1289 free_buffer(disp, ev->region.base, ev->region.length);
1291 isc_event_free(&ev_in);
1294 killit = destroy_disp_ok(disp);
1295 UNLOCK(&disp->lock);
1297 isc_task_send(disp->task[0], &disp->ctlevent);
1302 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1303 if (dispsock != NULL) {
1304 resp = dispsock->resp;
1306 if (ev->result != ISC_R_SUCCESS) {
1308 * This is most likely a network error on a
1309 * connected socket. It makes no sense to
1310 * check the address or parse the packet, but it
1311 * will help to return the error to the caller.
1316 free_buffer(disp, ev->region.base, ev->region.length);
1318 UNLOCK(&disp->lock);
1319 isc_event_free(&ev_in);
1322 } else if (ev->result != ISC_R_SUCCESS) {
1323 free_buffer(disp, ev->region.base, ev->region.length);
1325 if (ev->result != ISC_R_CANCELED)
1326 dispatch_log(disp, ISC_LOG_ERROR,
1327 "odd socket result in udp_recv(): %s",
1328 isc_result_totext(ev->result));
1330 UNLOCK(&disp->lock);
1331 isc_event_free(&ev_in);
1336 * If this is from a blackholed address, drop it.
1338 isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1339 if (disp->mgr->blackhole != NULL &&
1340 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1341 NULL, &match, NULL) == ISC_R_SUCCESS &&
1344 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1345 char netaddrstr[ISC_NETADDR_FORMATSIZE];
1346 isc_netaddr_format(&netaddr, netaddrstr,
1347 sizeof(netaddrstr));
1348 dispatch_log(disp, LVL(10),
1349 "blackholed packet from %s",
1352 free_buffer(disp, ev->region.base, ev->region.length);
1357 * Peek into the buffer to see what we can see.
1359 isc_buffer_init(&source, ev->region.base, ev->region.length);
1360 isc_buffer_add(&source, ev->n);
1361 dres = dns_message_peekheader(&source, &id, &flags);
1362 if (dres != ISC_R_SUCCESS) {
1363 free_buffer(disp, ev->region.base, ev->region.length);
1364 dispatch_log(disp, LVL(10), "got garbage packet");
1368 dispatch_log(disp, LVL(92),
1369 "got valid DNS message header, /QR %c, id %u",
1370 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1373 * Look at flags. If query, drop it. If response,
1374 * look to see where it goes.
1376 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1378 free_buffer(disp, ev->region.base, ev->region.length);
1383 * Search for the corresponding response. If we are using an exclusive
1384 * socket, we've already identified it and we can skip the search; but
1385 * the ID and the address must match the expected ones.
1388 bucket = dns_hash(qid, &ev->address, id, disp->localport);
1390 qidlocked = ISC_TRUE;
1391 resp = entry_search(qid, &ev->address, id, disp->localport,
1393 dispatch_log(disp, LVL(90),
1394 "search for response in bucket %d: %s",
1395 bucket, (resp == NULL ? "not found" : "found"));
1398 inc_stats(mgr, dns_resstatscounter_mismatch);
1399 free_buffer(disp, ev->region.base, ev->region.length);
1402 } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1404 dispatch_log(disp, LVL(90),
1405 "response to an exclusive socket doesn't match");
1406 inc_stats(mgr, dns_resstatscounter_mismatch);
1407 free_buffer(disp, ev->region.base, ev->region.length);
1412 * Now that we have the original dispatch the query was sent
1413 * from check that the address and port the response was
1414 * sent to make sense.
1416 if (disp != resp->disp) {
1421 * Check that the socket types and ports match.
1423 if (disp->socktype != resp->disp->socktype ||
1424 isc_sockaddr_getport(&disp->local) !=
1425 isc_sockaddr_getport(&resp->disp->local)) {
1426 free_buffer(disp, ev->region.base, ev->region.length);
1431 * If both dispatches are bound to an address then fail as
1432 * the addresses can't be equal (enforced by the IP stack).
1434 * Note under Linux a packet can be sent out via IPv4 socket
1435 * and the response be received via a IPv6 socket.
1437 * Requests sent out via IPv6 should always come back in
1440 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1441 isc_sockaddr_pf(&disp->local) != PF_INET6) {
1442 free_buffer(disp, ev->region.base, ev->region.length);
1445 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1446 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1447 if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1448 !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1449 free_buffer(disp, ev->region.base, ev->region.length);
1455 queue_response = resp->item_out;
1456 rev = allocate_devent(resp->disp);
1458 free_buffer(disp, ev->region.base, ev->region.length);
1463 * At this point, rev contains the event we want to fill in, and
1464 * resp contains the information on the place to send it to.
1465 * Send the event off.
1467 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1468 isc_buffer_add(&rev->buffer, ev->n);
1469 rev->result = ev->result;
1471 rev->addr = ev->address;
1472 rev->pktinfo = ev->pktinfo;
1473 rev->attributes = ev->attributes;
1474 if (queue_response) {
1475 ISC_LIST_APPEND(resp->items, rev, ev_link);
1477 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1479 resp->action, resp->arg, resp, NULL, NULL);
1480 request_log(disp, resp, LVL(90),
1481 "[a] Sent event %p buffer %p len %d to task %p",
1482 rev, rev->buffer.base, rev->buffer.length,
1484 resp->item_out = ISC_TRUE;
1485 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1492 * Restart recv() to get the next packet.
1495 result = startrecv(disp, dispsock);
1496 if (result != ISC_R_SUCCESS && dispsock != NULL) {
1498 * XXX: wired. There seems to be no recovery process other than
1499 * deactivate this socket anyway (since we cannot start
1500 * receiving, we won't be able to receive a cancel event
1503 deactivate_dispsocket(disp, dispsock);
1505 UNLOCK(&disp->lock);
1507 isc_event_free(&ev_in);
1513 * If I/O result == CANCELED, EOF, or error, notify everyone as the
1514 * various queues drain.
1516 * If query, restart.
1519 * Allocate event, fill in details.
1520 * If cannot allocate, restart.
1521 * find target. If not found, restart.
1522 * if event queue is not empty, queue. else, send.
1526 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1527 dns_dispatch_t *disp = ev_in->ev_arg;
1528 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1532 dns_dispentry_t *resp;
1533 dns_dispatchevent_t *rev;
1534 unsigned int bucket;
1535 isc_boolean_t killit;
1536 isc_boolean_t queue_response;
1539 char buf[ISC_SOCKADDR_FORMATSIZE];
1543 REQUIRE(VALID_DISPATCH(disp));
1547 dispatch_log(disp, LVL(90),
1548 "got TCP packet: requests %d, buffers %d, recvs %d",
1549 disp->requests, disp->tcpbuffers, disp->recv_pending);
1553 INSIST(disp->recv_pending != 0);
1554 disp->recv_pending = 0;
1556 if (disp->refcount == 0) {
1558 * This dispatcher is shutting down. Force cancelation.
1560 tcpmsg->result = ISC_R_CANCELED;
1563 if (tcpmsg->result != ISC_R_SUCCESS) {
1564 switch (tcpmsg->result) {
1565 case ISC_R_CANCELED:
1569 dispatch_log(disp, LVL(90), "shutting down on EOF");
1573 case ISC_R_CONNECTIONRESET:
1574 level = ISC_LOG_INFO;
1578 level = ISC_LOG_ERROR;
1580 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1581 dispatch_log(disp, level, "shutting down due to TCP "
1582 "receive error: %s: %s", buf,
1583 isc_result_totext(tcpmsg->result));
1589 * The event is statically allocated in the tcpmsg
1590 * structure, and destroy_disp() frees the tcpmsg, so we must
1591 * free the event *before* calling destroy_disp().
1593 isc_event_free(&ev_in);
1595 disp->shutting_down = 1;
1596 disp->shutdown_why = tcpmsg->result;
1599 * If the recv() was canceled pass the word on.
1601 killit = destroy_disp_ok(disp);
1602 UNLOCK(&disp->lock);
1604 isc_task_send(disp->task[0], &disp->ctlevent);
1608 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1610 tcpmsg->buffer.length, tcpmsg->buffer.base);
1613 * Peek into the buffer to see what we can see.
1615 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1616 if (dres != ISC_R_SUCCESS) {
1617 dispatch_log(disp, LVL(10), "got garbage packet");
1621 dispatch_log(disp, LVL(92),
1622 "got valid DNS message header, /QR %c, id %u",
1623 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1626 * Allocate an event to send to the query or response client, and
1627 * allocate a new buffer for our use.
1631 * Look at flags. If query, drop it. If response,
1632 * look to see where it goes.
1634 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1644 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1646 resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1647 dispatch_log(disp, LVL(90),
1648 "search for response in bucket %d: %s",
1649 bucket, (resp == NULL ? "not found" : "found"));
1653 queue_response = resp->item_out;
1654 rev = allocate_devent(disp);
1659 * At this point, rev contains the event we want to fill in, and
1660 * resp contains the information on the place to send it to.
1661 * Send the event off.
1663 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1665 rev->result = ISC_R_SUCCESS;
1667 rev->addr = tcpmsg->address;
1668 if (queue_response) {
1669 ISC_LIST_APPEND(resp->items, rev, ev_link);
1671 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1672 resp->action, resp->arg, resp, NULL, NULL);
1673 request_log(disp, resp, LVL(90),
1674 "[b] Sent event %p buffer %p len %d to task %p",
1675 rev, rev->buffer.base, rev->buffer.length,
1677 resp->item_out = ISC_TRUE;
1678 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1684 * Restart recv() to get the next packet.
1687 (void)startrecv(disp, NULL);
1689 UNLOCK(&disp->lock);
1691 isc_event_free(&ev_in);
1695 * disp must be locked.
1698 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1700 isc_region_t region;
1701 isc_socket_t *socket;
1703 if (disp->shutting_down == 1)
1704 return (ISC_R_SUCCESS);
1706 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1707 return (ISC_R_SUCCESS);
1709 if (disp->recv_pending != 0 && dispsock == NULL)
1710 return (ISC_R_SUCCESS);
1712 if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1713 return (ISC_R_NOMEMORY);
1715 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1717 return (ISC_R_SUCCESS);
1719 if (dispsock != NULL)
1720 socket = dispsock->socket;
1722 socket = disp->socket;
1723 INSIST(socket != NULL);
1725 switch (disp->socktype) {
1727 * UDP reads are always maximal.
1729 case isc_sockettype_udp:
1730 region.length = disp->mgr->buffersize;
1731 region.base = allocate_udp_buffer(disp);
1732 if (region.base == NULL)
1733 return (ISC_R_NOMEMORY);
1734 if (dispsock != NULL) {
1735 isc_task_t *dt = dispsock->task;
1736 isc_socketevent_t *sev =
1737 allocate_sevent(disp, socket,
1738 ISC_SOCKEVENT_RECVDONE,
1739 udp_exrecv, dispsock);
1741 free_buffer(disp, region.base, region.length);
1742 return (ISC_R_NOMEMORY);
1745 res = isc_socket_recv2(socket, ®ion, 1, dt, sev, 0);
1746 if (res != ISC_R_SUCCESS) {
1747 free_buffer(disp, region.base, region.length);
1751 isc_task_t *dt = disp->task[0];
1752 isc_socketevent_t *sev =
1753 allocate_sevent(disp, socket,
1754 ISC_SOCKEVENT_RECVDONE,
1757 free_buffer(disp, region.base, region.length);
1758 return (ISC_R_NOMEMORY);
1761 res = isc_socket_recv2(socket, ®ion, 1, dt, sev, 0);
1762 if (res != ISC_R_SUCCESS) {
1763 free_buffer(disp, region.base, region.length);
1764 disp->shutdown_why = res;
1765 disp->shutting_down = 1;
1767 return (ISC_R_SUCCESS); /* recover by cancel */
1769 INSIST(disp->recv_pending == 0);
1770 disp->recv_pending = 1;
1774 case isc_sockettype_tcp:
1775 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1777 if (res != ISC_R_SUCCESS) {
1778 disp->shutdown_why = res;
1779 disp->shutting_down = 1;
1781 return (ISC_R_SUCCESS); /* recover by cancel */
1783 INSIST(disp->recv_pending == 0);
1784 disp->recv_pending = 1;
1791 return (ISC_R_SUCCESS);
1795 * Mgr must be locked when calling this function.
1797 static isc_boolean_t
1798 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1799 mgr_log(mgr, LVL(90),
1800 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1801 "depool=%d, rpool=%d, dpool=%d",
1802 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1803 isc_mempool_getallocated(mgr->depool),
1804 isc_mempool_getallocated(mgr->rpool),
1805 isc_mempool_getallocated(mgr->dpool));
1806 if (!MGR_IS_SHUTTINGDOWN(mgr))
1808 if (!ISC_LIST_EMPTY(mgr->list))
1810 if (isc_mempool_getallocated(mgr->depool) != 0)
1812 if (isc_mempool_getallocated(mgr->rpool) != 0)
1814 if (isc_mempool_getallocated(mgr->dpool) != 0)
1821 * Mgr must be unlocked when calling this function.
1824 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1826 dns_dispatchmgr_t *mgr;
1835 DESTROYLOCK(&mgr->lock);
1838 DESTROYLOCK(&mgr->arc4_lock);
1840 isc_mempool_destroy(&mgr->depool);
1841 isc_mempool_destroy(&mgr->rpool);
1842 isc_mempool_destroy(&mgr->dpool);
1843 if (mgr->bpool != NULL)
1844 isc_mempool_destroy(&mgr->bpool);
1845 if (mgr->spool != NULL)
1846 isc_mempool_destroy(&mgr->spool);
1848 DESTROYLOCK(&mgr->spool_lock);
1849 DESTROYLOCK(&mgr->bpool_lock);
1850 DESTROYLOCK(&mgr->dpool_lock);
1851 DESTROYLOCK(&mgr->rpool_lock);
1852 DESTROYLOCK(&mgr->depool_lock);
1855 if (mgr->entropy != NULL)
1856 isc_entropy_detach(&mgr->entropy);
1858 if (mgr->qid != NULL)
1859 qid_destroy(mctx, &mgr->qid);
1861 DESTROYLOCK(&mgr->buffer_lock);
1863 if (mgr->blackhole != NULL)
1864 dns_acl_detach(&mgr->blackhole);
1866 if (mgr->stats != NULL)
1867 isc_stats_detach(&mgr->stats);
1869 if (mgr->v4ports != NULL) {
1870 isc_mem_put(mctx, mgr->v4ports,
1871 mgr->nv4ports * sizeof(in_port_t));
1873 if (mgr->v6ports != NULL) {
1874 isc_mem_put(mctx, mgr->v6ports,
1875 mgr->nv6ports * sizeof(in_port_t));
1877 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1878 isc_mem_detach(&mctx);
1882 open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1883 unsigned int options, isc_socket_t **sockp,
1884 isc_socket_t *dup_socket)
1887 isc_result_t result;
1892 result = isc_socket_open(sock);
1893 if (result != ISC_R_SUCCESS)
1898 } else if (dup_socket != NULL) {
1899 result = isc_socket_dup(dup_socket, &sock);
1900 if (result != ISC_R_SUCCESS)
1903 isc_socket_setname(sock, "dispatcher", NULL);
1905 return (ISC_R_SUCCESS);
1907 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1908 isc_sockettype_udp, &sock);
1909 if (result != ISC_R_SUCCESS)
1913 isc_socket_setname(sock, "dispatcher", NULL);
1915 #ifndef ISC_ALLOW_MAPPED
1916 isc_socket_ipv6only(sock, ISC_TRUE);
1918 result = isc_socket_bind(sock, local, options);
1919 if (result != ISC_R_SUCCESS) {
1921 isc_socket_detach(&sock);
1924 isc_socket_close(sock);
1933 return (ISC_R_SUCCESS);
1937 * Create a temporary port list to set the initial default set of dispatch
1938 * ports: [1024, 65535]. This is almost meaningless as the application will
1939 * normally set the ports explicitly, but is provided to fill some minor corner
1943 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1944 isc_result_t result;
1946 result = isc_portset_create(mctx, portsetp);
1947 if (result != ISC_R_SUCCESS)
1949 isc_portset_addrange(*portsetp, 1024, 65535);
1951 return (ISC_R_SUCCESS);
1959 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1960 dns_dispatchmgr_t **mgrp)
1962 dns_dispatchmgr_t *mgr;
1963 isc_result_t result;
1964 isc_portset_t *v4portset = NULL;
1965 isc_portset_t *v6portset = NULL;
1967 REQUIRE(mctx != NULL);
1968 REQUIRE(mgrp != NULL && *mgrp == NULL);
1970 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1972 return (ISC_R_NOMEMORY);
1975 isc_mem_attach(mctx, &mgr->mctx);
1977 mgr->blackhole = NULL;
1980 result = isc_mutex_init(&mgr->lock);
1981 if (result != ISC_R_SUCCESS)
1984 result = isc_mutex_init(&mgr->arc4_lock);
1985 if (result != ISC_R_SUCCESS)
1988 result = isc_mutex_init(&mgr->buffer_lock);
1989 if (result != ISC_R_SUCCESS)
1990 goto kill_arc4_lock;
1992 result = isc_mutex_init(&mgr->depool_lock);
1993 if (result != ISC_R_SUCCESS)
1994 goto kill_buffer_lock;
1996 result = isc_mutex_init(&mgr->rpool_lock);
1997 if (result != ISC_R_SUCCESS)
1998 goto kill_depool_lock;
2000 result = isc_mutex_init(&mgr->dpool_lock);
2001 if (result != ISC_R_SUCCESS)
2002 goto kill_rpool_lock;
2004 result = isc_mutex_init(&mgr->bpool_lock);
2005 if (result != ISC_R_SUCCESS)
2006 goto kill_dpool_lock;
2008 result = isc_mutex_init(&mgr->spool_lock);
2009 if (result != ISC_R_SUCCESS)
2010 goto kill_bpool_lock;
2013 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
2014 &mgr->depool) != ISC_R_SUCCESS) {
2015 result = ISC_R_NOMEMORY;
2016 goto kill_spool_lock;
2020 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
2021 &mgr->rpool) != ISC_R_SUCCESS) {
2022 result = ISC_R_NOMEMORY;
2027 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
2028 &mgr->dpool) != ISC_R_SUCCESS) {
2029 result = ISC_R_NOMEMORY;
2033 isc_mempool_setname(mgr->depool, "dispmgr_depool");
2034 isc_mempool_setmaxalloc(mgr->depool, 32768);
2035 isc_mempool_setfreemax(mgr->depool, 32768);
2036 isc_mempool_associatelock(mgr->depool, &mgr->depool_lock);
2037 isc_mempool_setfillcount(mgr->depool, 256);
2039 isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
2040 isc_mempool_setmaxalloc(mgr->rpool, 32768);
2041 isc_mempool_setfreemax(mgr->rpool, 32768);
2042 isc_mempool_associatelock(mgr->rpool, &mgr->rpool_lock);
2043 isc_mempool_setfillcount(mgr->rpool, 256);
2045 isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
2046 isc_mempool_setmaxalloc(mgr->dpool, 32768);
2047 isc_mempool_setfreemax(mgr->dpool, 32768);
2048 isc_mempool_associatelock(mgr->dpool, &mgr->dpool_lock);
2049 isc_mempool_setfillcount(mgr->dpool, 256);
2052 mgr->buffersize = 0;
2053 mgr->maxbuffers = 0;
2056 mgr->entropy = NULL;
2059 ISC_LIST_INIT(mgr->list);
2060 mgr->v4ports = NULL;
2061 mgr->v6ports = NULL;
2064 mgr->magic = DNS_DISPATCHMGR_MAGIC;
2066 result = create_default_portset(mctx, &v4portset);
2067 if (result == ISC_R_SUCCESS) {
2068 result = create_default_portset(mctx, &v6portset);
2069 if (result == ISC_R_SUCCESS) {
2070 result = dns_dispatchmgr_setavailports(mgr,
2075 if (v4portset != NULL)
2076 isc_portset_destroy(mctx, &v4portset);
2077 if (v6portset != NULL)
2078 isc_portset_destroy(mctx, &v6portset);
2079 if (result != ISC_R_SUCCESS)
2083 if (entropy != NULL)
2084 isc_entropy_attach(entropy, &mgr->entropy);
2089 dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
2092 return (ISC_R_SUCCESS);
2095 isc_mempool_destroy(&mgr->dpool);
2097 isc_mempool_destroy(&mgr->rpool);
2099 isc_mempool_destroy(&mgr->depool);
2101 DESTROYLOCK(&mgr->spool_lock);
2103 DESTROYLOCK(&mgr->bpool_lock);
2105 DESTROYLOCK(&mgr->dpool_lock);
2107 DESTROYLOCK(&mgr->rpool_lock);
2109 DESTROYLOCK(&mgr->depool_lock);
2111 DESTROYLOCK(&mgr->buffer_lock);
2113 DESTROYLOCK(&mgr->arc4_lock);
2115 DESTROYLOCK(&mgr->lock);
2117 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
2118 isc_mem_detach(&mctx);
2124 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
2125 REQUIRE(VALID_DISPATCHMGR(mgr));
2126 if (mgr->blackhole != NULL)
2127 dns_acl_detach(&mgr->blackhole);
2128 dns_acl_attach(blackhole, &mgr->blackhole);
2132 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
2133 REQUIRE(VALID_DISPATCHMGR(mgr));
2134 return (mgr->blackhole);
2138 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
2139 dns_portlist_t *portlist)
2141 REQUIRE(VALID_DISPATCHMGR(mgr));
2144 /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
2149 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
2150 REQUIRE(VALID_DISPATCHMGR(mgr));
2151 return (NULL); /* this function is deprecated */
2155 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
2156 isc_portset_t *v6portset)
2158 in_port_t *v4ports, *v6ports, p;
2159 unsigned int nv4ports, nv6ports, i4, i6;
2161 REQUIRE(VALID_DISPATCHMGR(mgr));
2163 nv4ports = isc_portset_nports(v4portset);
2164 nv6ports = isc_portset_nports(v6portset);
2167 if (nv4ports != 0) {
2168 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
2169 if (v4ports == NULL)
2170 return (ISC_R_NOMEMORY);
2173 if (nv6ports != 0) {
2174 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
2175 if (v6ports == NULL) {
2176 if (v4ports != NULL) {
2177 isc_mem_put(mgr->mctx, v4ports,
2179 isc_portset_nports(v4portset));
2181 return (ISC_R_NOMEMORY);
2189 if (isc_portset_isset(v4portset, p)) {
2190 INSIST(i4 < nv4ports);
2193 if (isc_portset_isset(v6portset, p)) {
2194 INSIST(i6 < nv6ports);
2197 } while (p++ < 65535);
2198 INSIST(i4 == nv4ports && i6 == nv6ports);
2201 if (mgr->v4ports != NULL) {
2202 isc_mem_put(mgr->mctx, mgr->v4ports,
2203 mgr->nv4ports * sizeof(in_port_t));
2205 mgr->v4ports = v4ports;
2206 mgr->nv4ports = nv4ports;
2208 if (mgr->v6ports != NULL) {
2209 isc_mem_put(mgr->mctx, mgr->v6ports,
2210 mgr->nv6ports * sizeof(in_port_t));
2212 mgr->v6ports = v6ports;
2213 mgr->nv6ports = nv6ports;
2216 return (ISC_R_SUCCESS);
2220 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2221 unsigned int buffersize, unsigned int maxbuffers,
2222 unsigned int maxrequests, unsigned int buckets,
2223 unsigned int increment)
2225 isc_result_t result;
2227 REQUIRE(VALID_DISPATCHMGR(mgr));
2228 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2229 REQUIRE(maxbuffers > 0);
2230 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2231 REQUIRE(increment > buckets);
2234 * Keep some number of items around. This should be a config
2235 * option. For now, keep 8, but later keep at least two even
2236 * if the caller wants less. This allows us to ensure certain
2237 * things, like an event can be "freed" and the next allocation
2238 * will always succeed.
2240 * Note that if limits are placed on anything here, we use one
2241 * event internally, so the actual limit should be "wanted + 1."
2249 LOCK(&mgr->buffer_lock);
2251 /* Create or adjust buffer pool */
2252 if (mgr->bpool != NULL) {
2254 * We only increase the maxbuffers to avoid accidental buffer
2255 * shortage. Ideally we'd separate the manager-wide maximum
2256 * from per-dispatch limits and respect the latter within the
2257 * global limit. But at this moment that's deemed to be
2258 * overkilling and isn't worth additional implementation
2261 if (maxbuffers > mgr->maxbuffers) {
2262 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2263 isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2264 mgr->maxbuffers = maxbuffers;
2267 result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2268 if (result != ISC_R_SUCCESS) {
2269 UNLOCK(&mgr->buffer_lock);
2272 isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2273 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2274 isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2275 isc_mempool_associatelock(mgr->bpool, &mgr->bpool_lock);
2276 isc_mempool_setfillcount(mgr->bpool, 256);
2279 /* Create or adjust socket pool */
2280 if (mgr->spool != NULL) {
2281 if (maxrequests < DNS_DISPATCH_POOLSOCKS * 2)
2282 isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2283 isc_mempool_setfreemax(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2284 UNLOCK(&mgr->buffer_lock);
2285 return (ISC_R_SUCCESS);
2287 result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2289 if (result != ISC_R_SUCCESS) {
2290 UNLOCK(&mgr->buffer_lock);
2293 isc_mempool_setname(mgr->spool, "dispmgr_spool");
2294 isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2295 isc_mempool_setfreemax(mgr->spool, maxrequests);
2296 isc_mempool_associatelock(mgr->spool, &mgr->spool_lock);
2297 isc_mempool_setfillcount(mgr->spool, 256);
2299 result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
2300 if (result != ISC_R_SUCCESS)
2303 mgr->buffersize = buffersize;
2304 mgr->maxbuffers = maxbuffers;
2305 UNLOCK(&mgr->buffer_lock);
2306 return (ISC_R_SUCCESS);
2309 isc_mempool_destroy(&mgr->bpool);
2310 if (mgr->spool != NULL)
2311 isc_mempool_destroy(&mgr->spool);
2312 UNLOCK(&mgr->buffer_lock);
2317 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2318 dns_dispatchmgr_t *mgr;
2319 isc_boolean_t killit;
2321 REQUIRE(mgrp != NULL);
2322 REQUIRE(VALID_DISPATCHMGR(*mgrp));
2328 mgr->state |= MGR_SHUTTINGDOWN;
2330 killit = destroy_mgr_ok(mgr);
2333 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2340 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2341 REQUIRE(VALID_DISPATCHMGR(mgr));
2342 REQUIRE(ISC_LIST_EMPTY(mgr->list));
2343 REQUIRE(mgr->stats == NULL);
2345 isc_stats_attach(stats, &mgr->stats);
2349 port_cmp(const void *key, const void *ent) {
2350 in_port_t p1 = *(const in_port_t *)key;
2351 in_port_t p2 = *(const in_port_t *)ent;
2361 static isc_boolean_t
2362 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2363 isc_sockaddr_t *sockaddrp)
2365 isc_sockaddr_t sockaddr;
2366 isc_result_t result;
2367 in_port_t *ports, port;
2368 unsigned int nports;
2369 isc_boolean_t available = ISC_FALSE;
2371 REQUIRE(sock != NULL || sockaddrp != NULL);
2375 sockaddrp = &sockaddr;
2376 result = isc_socket_getsockname(sock, sockaddrp);
2377 if (result != ISC_R_SUCCESS)
2381 if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2382 ports = mgr->v4ports;
2383 nports = mgr->nv4ports;
2385 ports = mgr->v6ports;
2386 nports = mgr->nv6ports;
2391 port = isc_sockaddr_getport(sockaddrp);
2392 if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2393 available = ISC_TRUE;
2400 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2402 static isc_boolean_t
2403 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2404 isc_sockaddr_t sockaddr;
2405 isc_result_t result;
2407 REQUIRE(disp->socket != NULL);
2413 * Don't match wildcard ports unless the port is available in the
2414 * current configuration.
2416 if (isc_sockaddr_getport(addr) == 0 &&
2417 isc_sockaddr_getport(&disp->local) == 0 &&
2418 !portavailable(disp->mgr, disp->socket, NULL)) {
2423 * Check if we match the binding <address,port>.
2424 * Wildcard ports match/fail here.
2426 if (isc_sockaddr_equal(&disp->local, addr))
2428 if (isc_sockaddr_getport(addr) == 0)
2432 * Check if we match a bound wildcard port <address,port>.
2434 if (!isc_sockaddr_eqaddr(&disp->local, addr))
2436 result = isc_socket_getsockname(disp->socket, &sockaddr);
2437 if (result != ISC_R_SUCCESS)
2440 return (isc_sockaddr_equal(&sockaddr, addr));
2444 * Requires mgr be locked.
2446 * No dispatcher can be locked by this thread when calling this function.
2450 * If a matching dispatcher is found, it is locked after this function
2451 * returns, and must be unlocked by the caller.
2454 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2455 unsigned int attributes, unsigned int mask,
2456 dns_dispatch_t **dispp)
2458 dns_dispatch_t *disp;
2459 isc_result_t result;
2462 * Make certain that we will not match a private or exclusive dispatch.
2464 attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2465 mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2467 disp = ISC_LIST_HEAD(mgr->list);
2468 while (disp != NULL) {
2470 if ((disp->shutting_down == 0)
2471 && ATTRMATCH(disp->attributes, attributes, mask)
2472 && local_addr_match(disp, local))
2474 UNLOCK(&disp->lock);
2475 disp = ISC_LIST_NEXT(disp, link);
2479 result = ISC_R_NOTFOUND;
2484 result = ISC_R_SUCCESS;
2491 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2492 unsigned int increment, dns_qid_t **qidp,
2493 isc_boolean_t needsocktable)
2497 isc_result_t result;
2499 REQUIRE(VALID_DISPATCHMGR(mgr));
2500 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2501 REQUIRE(increment > buckets);
2502 REQUIRE(qidp != NULL && *qidp == NULL);
2504 qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2506 return (ISC_R_NOMEMORY);
2508 qid->qid_table = isc_mem_get(mgr->mctx,
2509 buckets * sizeof(dns_displist_t));
2510 if (qid->qid_table == NULL) {
2511 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2512 return (ISC_R_NOMEMORY);
2515 qid->sock_table = NULL;
2516 if (needsocktable) {
2517 qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2518 sizeof(dispsocketlist_t));
2519 if (qid->sock_table == NULL) {
2520 isc_mem_put(mgr->mctx, qid->qid_table,
2521 buckets * sizeof(dns_displist_t));
2522 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2523 return (ISC_R_NOMEMORY);
2527 result = isc_mutex_init(&qid->lock);
2528 if (result != ISC_R_SUCCESS) {
2529 if (qid->sock_table != NULL) {
2530 isc_mem_put(mgr->mctx, qid->sock_table,
2531 buckets * sizeof(dispsocketlist_t));
2533 isc_mem_put(mgr->mctx, qid->qid_table,
2534 buckets * sizeof(dns_displist_t));
2535 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2539 for (i = 0; i < buckets; i++) {
2540 ISC_LIST_INIT(qid->qid_table[i]);
2541 if (qid->sock_table != NULL)
2542 ISC_LIST_INIT(qid->sock_table[i]);
2545 qid->qid_nbuckets = buckets;
2546 qid->qid_increment = increment;
2547 qid->magic = QID_MAGIC;
2549 return (ISC_R_SUCCESS);
2553 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2556 REQUIRE(qidp != NULL);
2559 REQUIRE(VALID_QID(qid));
2563 isc_mem_put(mctx, qid->qid_table,
2564 qid->qid_nbuckets * sizeof(dns_displist_t));
2565 if (qid->sock_table != NULL) {
2566 isc_mem_put(mctx, qid->sock_table,
2567 qid->qid_nbuckets * sizeof(dispsocketlist_t));
2569 DESTROYLOCK(&qid->lock);
2570 isc_mem_put(mctx, qid, sizeof(*qid));
2574 * Allocate and set important limits.
2577 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2578 dns_dispatch_t **dispp)
2580 dns_dispatch_t *disp;
2581 isc_result_t result;
2583 REQUIRE(VALID_DISPATCHMGR(mgr));
2584 REQUIRE(dispp != NULL && *dispp == NULL);
2587 * Set up the dispatcher, mostly. Don't bother setting some of
2588 * the options that are controlled by tcp vs. udp, etc.
2591 disp = isc_mempool_get(mgr->dpool);
2593 return (ISC_R_NOMEMORY);
2597 disp->maxrequests = maxrequests;
2598 disp->attributes = 0;
2599 ISC_LINK_INIT(disp, link);
2601 disp->recv_pending = 0;
2602 memset(&disp->local, 0, sizeof(disp->local));
2603 disp->localport = 0;
2604 disp->shutting_down = 0;
2605 disp->shutdown_out = 0;
2606 disp->connected = 0;
2607 disp->tcpmsg_valid = 0;
2608 disp->shutdown_why = ISC_R_UNEXPECTED;
2610 disp->tcpbuffers = 0;
2612 ISC_LIST_INIT(disp->activesockets);
2613 ISC_LIST_INIT(disp->inactivesockets);
2615 dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL);
2616 disp->port_table = NULL;
2617 disp->portpool = NULL;
2619 result = isc_mutex_init(&disp->lock);
2620 if (result != ISC_R_SUCCESS)
2623 disp->failsafe_ev = allocate_devent(disp);
2624 if (disp->failsafe_ev == NULL) {
2625 result = ISC_R_NOMEMORY;
2629 disp->magic = DISPATCH_MAGIC;
2632 return (ISC_R_SUCCESS);
2638 DESTROYLOCK(&disp->lock);
2640 isc_mempool_put(mgr->dpool, disp);
2647 * MUST be unlocked, and not used by anything.
2650 dispatch_free(dns_dispatch_t **dispp)
2652 dns_dispatch_t *disp;
2653 dns_dispatchmgr_t *mgr;
2656 REQUIRE(VALID_DISPATCH(*dispp));
2661 REQUIRE(VALID_DISPATCHMGR(mgr));
2663 if (disp->tcpmsg_valid) {
2664 dns_tcpmsg_invalidate(&disp->tcpmsg);
2665 disp->tcpmsg_valid = 0;
2668 INSIST(disp->tcpbuffers == 0);
2669 INSIST(disp->requests == 0);
2670 INSIST(disp->recv_pending == 0);
2671 INSIST(ISC_LIST_EMPTY(disp->activesockets));
2672 INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2674 isc_mempool_put(mgr->depool, disp->failsafe_ev);
2675 disp->failsafe_ev = NULL;
2677 if (disp->qid != NULL)
2678 qid_destroy(mgr->mctx, &disp->qid);
2680 if (disp->port_table != NULL) {
2681 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2682 INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2683 isc_mem_put(mgr->mctx, disp->port_table,
2684 sizeof(disp->port_table[0]) *
2685 DNS_DISPATCH_PORTTABLESIZE);
2688 if (disp->portpool != NULL)
2689 isc_mempool_destroy(&disp->portpool);
2692 DESTROYLOCK(&disp->lock);
2694 isc_mempool_put(mgr->dpool, disp);
2698 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2699 isc_taskmgr_t *taskmgr, unsigned int buffersize,
2700 unsigned int maxbuffers, unsigned int maxrequests,
2701 unsigned int buckets, unsigned int increment,
2702 unsigned int attributes, dns_dispatch_t **dispp)
2704 isc_result_t result;
2705 dns_dispatch_t *disp;
2710 REQUIRE(VALID_DISPATCHMGR(mgr));
2711 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2712 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2713 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2715 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */
2720 * dispatch_allocate() checks mgr for us.
2721 * qid_allocate() checks buckets and increment for us.
2724 result = dispatch_allocate(mgr, maxrequests, &disp);
2725 if (result != ISC_R_SUCCESS) {
2730 result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2731 if (result != ISC_R_SUCCESS)
2732 goto deallocate_dispatch;
2734 disp->socktype = isc_sockettype_tcp;
2735 disp->socket = NULL;
2736 isc_socket_attach(sock, &disp->socket);
2738 disp->sepool = NULL;
2741 disp->task[0] = NULL;
2742 result = isc_task_create(taskmgr, 0, &disp->task[0]);
2743 if (result != ISC_R_SUCCESS)
2746 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2747 DNS_EVENT_DISPATCHCONTROL,
2749 sizeof(isc_event_t));
2750 if (disp->ctlevent == NULL) {
2751 result = ISC_R_NOMEMORY;
2755 isc_task_setname(disp->task[0], "tcpdispatch", disp);
2757 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2758 disp->tcpmsg_valid = 1;
2760 disp->attributes = attributes;
2763 * Append it to the dispatcher list.
2765 ISC_LIST_APPEND(mgr->list, disp, link);
2768 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2769 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2773 return (ISC_R_SUCCESS);
2779 isc_task_detach(&disp->task[0]);
2781 isc_socket_detach(&disp->socket);
2782 deallocate_dispatch:
2783 dispatch_free(&disp);
2791 dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2792 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2793 unsigned int buffersize,
2794 unsigned int maxbuffers, unsigned int maxrequests,
2795 unsigned int buckets, unsigned int increment,
2796 unsigned int attributes, unsigned int mask,
2797 dns_dispatch_t **dispp, dns_dispatch_t *dup_dispatch)
2799 isc_result_t result;
2800 dns_dispatch_t *disp = NULL;
2802 REQUIRE(VALID_DISPATCHMGR(mgr));
2803 REQUIRE(sockmgr != NULL);
2804 REQUIRE(localaddr != NULL);
2805 REQUIRE(taskmgr != NULL);
2806 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2807 REQUIRE(maxbuffers > 0);
2808 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2809 REQUIRE(increment > buckets);
2810 REQUIRE(dispp != NULL && *dispp == NULL);
2811 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2813 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2814 maxrequests, buckets, increment);
2815 if (result != ISC_R_SUCCESS)
2820 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2821 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2826 * See if we have a dispatcher that matches.
2828 if (dup_dispatch == NULL) {
2829 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2830 if (result == ISC_R_SUCCESS) {
2833 if (disp->maxrequests < maxrequests)
2834 disp->maxrequests = maxrequests;
2836 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0
2837 && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2839 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2840 if (disp->recv_pending != 0)
2841 isc_socket_cancel(disp->socket,
2843 ISC_SOCKCANCEL_RECV);
2846 UNLOCK(&disp->lock);
2851 return (ISC_R_SUCCESS);
2859 result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2860 maxrequests, attributes, &disp,
2861 dup_dispatch == NULL
2863 : dup_dispatch->socket);
2865 if (result != ISC_R_SUCCESS) {
2873 return (ISC_R_SUCCESS);
2877 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2878 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2879 unsigned int buffersize,
2880 unsigned int maxbuffers, unsigned int maxrequests,
2881 unsigned int buckets, unsigned int increment,
2882 unsigned int attributes, unsigned int mask,
2883 dns_dispatch_t **dispp)
2885 return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr,
2886 buffersize, maxbuffers, maxrequests,
2887 buckets, increment, attributes,
2888 mask, dispp, NULL));
2892 * mgr should be locked.
2895 #ifndef DNS_DISPATCH_HELD
2896 #define DNS_DISPATCH_HELD 20U
2900 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2901 isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2902 isc_socket_t **sockp, isc_socket_t *dup_socket)
2905 isc_socket_t *held[DNS_DISPATCH_HELD];
2906 isc_sockaddr_t localaddr_bound;
2907 isc_socket_t *sock = NULL;
2908 isc_result_t result = ISC_R_SUCCESS;
2909 isc_boolean_t anyport;
2911 INSIST(sockp != NULL && *sockp == NULL);
2913 localaddr_bound = *localaddr;
2914 anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2917 unsigned int nports;
2921 * If no port is specified, we first try to pick up a random
2922 * port by ourselves.
2924 if (isc_sockaddr_pf(localaddr) == AF_INET) {
2925 nports = disp->mgr->nv4ports;
2926 ports = disp->mgr->v4ports;
2928 nports = disp->mgr->nv6ports;
2929 ports = disp->mgr->v6ports;
2932 return (ISC_R_ADDRNOTAVAIL);
2934 for (i = 0; i < 1024; i++) {
2937 prt = ports[dispatch_uniformrandom(
2940 isc_sockaddr_setport(&localaddr_bound, prt);
2941 result = open_socket(sockmgr, &localaddr_bound,
2944 * Continue if the port choosen is already in use
2945 * or the OS has reserved it.
2947 if (result == ISC_R_NOPERM ||
2948 result == ISC_R_ADDRINUSE)
2950 disp->localport = prt;
2956 * If this fails 1024 times, we then ask the kernel for
2960 /* Allow to reuse address for non-random ports. */
2961 result = open_socket(sockmgr, localaddr,
2962 ISC_SOCKET_REUSEADDRESS, &sock,
2965 if (result == ISC_R_SUCCESS)
2971 memset(held, 0, sizeof(held));
2974 for (j = 0; j < 0xffffU; j++) {
2975 result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
2976 if (result != ISC_R_SUCCESS)
2978 else if (portavailable(mgr, sock, NULL))
2980 if (held[i] != NULL)
2981 isc_socket_detach(&held[i]);
2984 if (i == DNS_DISPATCH_HELD)
2988 mgr_log(mgr, ISC_LOG_ERROR,
2989 "avoid-v%s-udp-ports: unable to allocate "
2990 "an available port",
2991 isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2992 result = ISC_R_FAILURE;
2998 for (i = 0; i < DNS_DISPATCH_HELD; i++) {
2999 if (held[i] != NULL)
3000 isc_socket_detach(&held[i]);
3007 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
3008 isc_taskmgr_t *taskmgr,
3009 isc_sockaddr_t *localaddr,
3010 unsigned int maxrequests,
3011 unsigned int attributes,
3012 dns_dispatch_t **dispp,
3013 isc_socket_t *dup_socket)
3015 isc_result_t result;
3016 dns_dispatch_t *disp;
3017 isc_socket_t *sock = NULL;
3021 * dispatch_allocate() checks mgr for us.
3024 result = dispatch_allocate(mgr, maxrequests, &disp);
3025 if (result != ISC_R_SUCCESS)
3028 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
3029 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock,
3031 if (result != ISC_R_SUCCESS)
3032 goto deallocate_dispatch;
3034 if (isc_log_wouldlog(dns_lctx, 90)) {
3035 char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3037 isc_sockaddr_format(localaddr, addrbuf,
3038 ISC_SOCKADDR_FORMATSIZE);
3039 mgr_log(mgr, LVL(90), "dns_dispatch_createudp: Created"
3040 " UDP dispatch for %s with socket fd %d\n",
3041 addrbuf, isc_socket_getfd(sock));
3045 isc_sockaddr_t sa_any;
3048 * For dispatches using exclusive sockets with a specific
3049 * source address, we only check if the specified address is
3050 * available on the system. Query sockets will be created later
3053 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
3054 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
3055 result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
3057 isc_socket_detach(&sock);
3058 if (result != ISC_R_SUCCESS)
3059 goto deallocate_dispatch;
3062 disp->port_table = isc_mem_get(mgr->mctx,
3063 sizeof(disp->port_table[0]) *
3064 DNS_DISPATCH_PORTTABLESIZE);
3065 if (disp->port_table == NULL)
3066 goto deallocate_dispatch;
3067 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
3068 ISC_LIST_INIT(disp->port_table[i]);
3070 result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
3072 if (result != ISC_R_SUCCESS)
3073 goto deallocate_dispatch;
3074 isc_mempool_setname(disp->portpool, "disp_portpool");
3075 isc_mempool_setfreemax(disp->portpool, 128);
3077 disp->socktype = isc_sockettype_udp;
3078 disp->socket = sock;
3079 disp->local = *localaddr;
3081 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3082 disp->ntasks = MAX_INTERNAL_TASKS;
3085 for (i = 0; i < disp->ntasks; i++) {
3086 disp->task[i] = NULL;
3087 result = isc_task_create(taskmgr, 0, &disp->task[i]);
3088 if (result != ISC_R_SUCCESS) {
3090 isc_task_shutdown(disp->task[i]);
3091 isc_task_detach(&disp->task[i]);
3095 isc_task_setname(disp->task[i], "udpdispatch", disp);
3098 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
3099 DNS_EVENT_DISPATCHCONTROL,
3101 sizeof(isc_event_t));
3102 if (disp->ctlevent == NULL) {
3103 result = ISC_R_NOMEMORY;
3107 disp->sepool = NULL;
3108 if (isc_mempool_create(mgr->mctx, sizeof(isc_socketevent_t),
3109 &disp->sepool) != ISC_R_SUCCESS)
3111 result = ISC_R_NOMEMORY;
3115 result = isc_mutex_init(&disp->sepool_lock);
3116 if (result != ISC_R_SUCCESS)
3119 isc_mempool_setname(disp->sepool, "disp_sepool");
3120 isc_mempool_setmaxalloc(disp->sepool, 32768);
3121 isc_mempool_setfreemax(disp->sepool, 32768);
3122 isc_mempool_associatelock(disp->sepool, &disp->sepool_lock);
3123 isc_mempool_setfillcount(disp->sepool, 16);
3125 attributes &= ~DNS_DISPATCHATTR_TCP;
3126 attributes |= DNS_DISPATCHATTR_UDP;
3127 disp->attributes = attributes;
3130 * Append it to the dispatcher list.
3132 ISC_LIST_APPEND(mgr->list, disp, link);
3134 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
3135 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
3136 if (disp->socket != NULL)
3137 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
3147 isc_mempool_destroy(&disp->sepool);
3149 isc_event_free(&disp->ctlevent);
3151 for (i = 0; i < disp->ntasks; i++)
3152 isc_task_detach(&disp->task[i]);
3154 if (disp->socket != NULL)
3155 isc_socket_detach(&disp->socket);
3156 deallocate_dispatch:
3157 dispatch_free(&disp);
3163 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
3164 REQUIRE(VALID_DISPATCH(disp));
3165 REQUIRE(dispp != NULL && *dispp == NULL);
3169 UNLOCK(&disp->lock);
3175 * It is important to lock the manager while we are deleting the dispatch,
3176 * since dns_dispatch_getudp will call dispatch_find, which returns to
3177 * the caller a dispatch but does not attach to it until later. _getudp
3178 * locks the manager, however, so locking it here will keep us from attaching
3179 * to a dispatcher that is in the process of going away.
3182 dns_dispatch_detach(dns_dispatch_t **dispp) {
3183 dns_dispatch_t *disp;
3184 dispsocket_t *dispsock;
3185 isc_boolean_t killit;
3187 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
3194 INSIST(disp->refcount > 0);
3196 if (disp->refcount == 0) {
3197 if (disp->recv_pending > 0)
3198 isc_socket_cancel(disp->socket, disp->task[0],
3199 ISC_SOCKCANCEL_RECV);
3200 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3202 dispsock = ISC_LIST_NEXT(dispsock, link)) {
3203 isc_socket_cancel(dispsock->socket, dispsock->task,
3204 ISC_SOCKCANCEL_RECV);
3206 disp->shutting_down = 1;
3209 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
3211 killit = destroy_disp_ok(disp);
3212 UNLOCK(&disp->lock);
3214 isc_task_send(disp->task[0], &disp->ctlevent);
3218 dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3219 isc_task_t *task, isc_taskaction_t action, void *arg,
3220 dns_messageid_t *idp, dns_dispentry_t **resp,
3221 isc_socketmgr_t *sockmgr)
3223 dns_dispentry_t *res;
3224 unsigned int bucket;
3225 in_port_t localport = 0;
3230 dispsocket_t *dispsocket = NULL;
3231 isc_result_t result;
3233 REQUIRE(VALID_DISPATCH(disp));
3234 REQUIRE(task != NULL);
3235 REQUIRE(dest != NULL);
3236 REQUIRE(resp != NULL && *resp == NULL);
3237 REQUIRE(idp != NULL);
3238 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3239 REQUIRE(sockmgr != NULL);
3243 if (disp->shutting_down == 1) {
3244 UNLOCK(&disp->lock);
3245 return (ISC_R_SHUTTINGDOWN);
3248 if (disp->requests >= disp->maxrequests) {
3249 UNLOCK(&disp->lock);
3250 return (ISC_R_QUOTA);
3253 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
3254 disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
3255 dispsocket_t *oldestsocket;
3256 dns_dispentry_t *oldestresp;
3257 dns_dispatchevent_t *rev;
3260 * Kill oldest outstanding query if the number of sockets
3261 * exceeds the quota to keep the room for new queries.
3263 oldestsocket = ISC_LIST_HEAD(disp->activesockets);
3264 oldestresp = oldestsocket->resp;
3265 if (oldestresp != NULL && !oldestresp->item_out) {
3266 rev = allocate_devent(oldestresp->disp);
3268 rev->buffer.base = NULL;
3269 rev->result = ISC_R_CANCELED;
3270 rev->id = oldestresp->id;
3271 ISC_EVENT_INIT(rev, sizeof(*rev), 0,
3272 NULL, DNS_EVENT_DISPATCH,
3274 oldestresp->arg, oldestresp,
3276 oldestresp->item_out = ISC_TRUE;
3277 isc_task_send(oldestresp->task,
3278 ISC_EVENT_PTR(&rev));
3279 inc_stats(disp->mgr,
3280 dns_resstatscounter_dispabort);
3285 * Move this entry to the tail so that it won't (easily) be
3286 * examined before actually being canceled.
3288 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3289 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3292 qid = DNS_QID(disp);
3294 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3296 * Get a separate UDP socket with a random port number.
3298 result = get_dispsocket(disp, dest, sockmgr, &dispsocket,
3300 if (result != ISC_R_SUCCESS) {
3301 UNLOCK(&disp->lock);
3302 inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3306 localport = disp->localport;
3310 * Try somewhat hard to find an unique ID.
3313 id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp));
3314 bucket = dns_hash(qid, dest, id, localport);
3316 for (i = 0; i < 64; i++) {
3317 if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3321 id += qid->qid_increment;
3323 bucket = dns_hash(qid, dest, id, localport);
3328 UNLOCK(&disp->lock);
3329 return (ISC_R_NOMORE);
3332 res = isc_mempool_get(disp->mgr->rpool);
3334 UNLOCK(&disp->lock);
3335 if (dispsocket != NULL)
3336 destroy_dispsocket(disp, &dispsocket);
3337 return (ISC_R_NOMEMORY);
3343 isc_task_attach(task, &res->task);
3346 res->port = localport;
3347 res->bucket = bucket;
3349 res->action = action;
3351 res->dispsocket = dispsocket;
3352 if (dispsocket != NULL)
3353 dispsocket->resp = res;
3354 res->item_out = ISC_FALSE;
3355 ISC_LIST_INIT(res->items);
3356 ISC_LINK_INIT(res, link);
3357 res->magic = RESPONSE_MAGIC;
3360 ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3363 request_log(disp, res, LVL(90),
3364 "attached to task %p", res->task);
3366 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3367 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3368 result = startrecv(disp, dispsocket);
3369 if (result != ISC_R_SUCCESS) {
3371 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3374 if (dispsocket != NULL)
3375 destroy_dispsocket(disp, &dispsocket);
3380 UNLOCK(&disp->lock);
3381 isc_task_detach(&res->task);
3382 isc_mempool_put(disp->mgr->rpool, res);
3387 if (dispsocket != NULL)
3388 ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3390 UNLOCK(&disp->lock);
3395 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3396 INSIST(res->dispsocket != NULL);
3398 return (ISC_R_SUCCESS);
3402 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3403 isc_task_t *task, isc_taskaction_t action, void *arg,
3404 dns_messageid_t *idp, dns_dispentry_t **resp)
3406 REQUIRE(VALID_DISPATCH(disp));
3407 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3409 return (dns_dispatch_addresponse2(disp, dest, task, action, arg,
3414 dns_dispatch_starttcp(dns_dispatch_t *disp) {
3416 REQUIRE(VALID_DISPATCH(disp));
3418 dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3421 disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3422 (void)startrecv(disp, NULL);
3423 UNLOCK(&disp->lock);
3427 dns_dispatch_removeresponse(dns_dispentry_t **resp,
3428 dns_dispatchevent_t **sockevent)
3430 dns_dispatchmgr_t *mgr;
3431 dns_dispatch_t *disp;
3432 dns_dispentry_t *res;
3433 dispsocket_t *dispsock;
3434 dns_dispatchevent_t *ev;
3435 unsigned int bucket;
3436 isc_boolean_t killit;
3438 isc_eventlist_t events;
3441 REQUIRE(resp != NULL);
3442 REQUIRE(VALID_RESPONSE(*resp));
3448 REQUIRE(VALID_DISPATCH(disp));
3450 REQUIRE(VALID_DISPATCHMGR(mgr));
3452 qid = DNS_QID(disp);
3454 if (sockevent != NULL) {
3455 REQUIRE(*sockevent != NULL);
3464 INSIST(disp->requests > 0);
3466 INSIST(disp->refcount > 0);
3468 if (disp->refcount == 0) {
3469 if (disp->recv_pending > 0)
3470 isc_socket_cancel(disp->socket, disp->task[0],
3471 ISC_SOCKCANCEL_RECV);
3472 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3474 dispsock = ISC_LIST_NEXT(dispsock, link)) {
3475 isc_socket_cancel(dispsock->socket, dispsock->task,
3476 ISC_SOCKCANCEL_RECV);
3478 disp->shutting_down = 1;
3481 bucket = res->bucket;
3484 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3487 if (ev == NULL && res->item_out) {
3489 * We've posted our event, but the caller hasn't gotten it
3490 * yet. Take it back.
3492 ISC_LIST_INIT(events);
3493 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3496 * We had better have gotten it back.
3499 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3503 REQUIRE(res->item_out == ISC_TRUE);
3504 res->item_out = ISC_FALSE;
3505 if (ev->buffer.base != NULL)
3506 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3507 free_devent(disp, ev);
3510 request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3511 isc_task_detach(&res->task);
3513 if (res->dispsocket != NULL) {
3514 isc_socket_cancel(res->dispsocket->socket,
3515 res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3516 res->dispsocket->resp = NULL;
3520 * Free any buffered requests as well
3522 ev = ISC_LIST_HEAD(res->items);
3523 while (ev != NULL) {
3524 ISC_LIST_UNLINK(res->items, ev, ev_link);
3525 if (ev->buffer.base != NULL)
3526 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3527 free_devent(disp, ev);
3528 ev = ISC_LIST_HEAD(res->items);
3531 isc_mempool_put(disp->mgr->rpool, res);
3532 if (disp->shutting_down == 1)
3535 (void)startrecv(disp, NULL);
3537 killit = destroy_disp_ok(disp);
3538 UNLOCK(&disp->lock);
3540 isc_task_send(disp->task[0], &disp->ctlevent);
3544 do_cancel(dns_dispatch_t *disp) {
3545 dns_dispatchevent_t *ev;
3546 dns_dispentry_t *resp;
3549 if (disp->shutdown_out == 1)
3552 qid = DNS_QID(disp);
3555 * Search for the first response handler without packets outstanding
3556 * unless a specific hander is given.
3559 for (resp = linear_first(qid);
3560 resp != NULL && resp->item_out;
3562 resp = linear_next(qid, resp);
3565 * No one to send the cancel event to, so nothing to do.
3571 * Send the shutdown failsafe event to this resp.
3573 ev = disp->failsafe_ev;
3574 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3575 resp->action, resp->arg, resp, NULL, NULL);
3576 ev->result = disp->shutdown_why;
3577 ev->buffer.base = NULL;
3578 ev->buffer.length = 0;
3579 disp->shutdown_out = 1;
3580 request_log(disp, resp, LVL(10),
3581 "cancel: failsafe event %p -> task %p",
3583 resp->item_out = ISC_TRUE;
3584 isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3590 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3591 REQUIRE(VALID_DISPATCH(disp));
3593 return (disp->socket);
3597 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3598 REQUIRE(VALID_RESPONSE(resp));
3600 if (resp->dispsocket != NULL)
3601 return (resp->dispsocket->socket);
3607 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3609 REQUIRE(VALID_DISPATCH(disp));
3610 REQUIRE(addrp != NULL);
3612 if (disp->socktype == isc_sockettype_udp) {
3613 *addrp = disp->local;
3614 return (ISC_R_SUCCESS);
3616 return (ISC_R_NOTIMPLEMENTED);
3620 dns_dispatch_cancel(dns_dispatch_t *disp) {
3621 REQUIRE(VALID_DISPATCH(disp));
3625 if (disp->shutting_down == 1) {
3626 UNLOCK(&disp->lock);
3630 disp->shutdown_why = ISC_R_CANCELED;
3631 disp->shutting_down = 1;
3634 UNLOCK(&disp->lock);
3640 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3641 REQUIRE(VALID_DISPATCH(disp));
3644 * We don't bother locking disp here; it's the caller's responsibility
3645 * to use only non volatile flags.
3647 return (disp->attributes);
3651 dns_dispatch_changeattributes(dns_dispatch_t *disp,
3652 unsigned int attributes, unsigned int mask)
3654 REQUIRE(VALID_DISPATCH(disp));
3655 /* Exclusive attribute can only be set on creation */
3656 REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3657 /* Also, a dispatch with randomport specified cannot start listening */
3658 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3659 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3662 * Should check for valid attributes here!
3667 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3668 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3669 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3670 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3671 (void)startrecv(disp, NULL);
3672 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3674 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3675 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3676 if (disp->recv_pending != 0)
3677 isc_socket_cancel(disp->socket, disp->task[0],
3678 ISC_SOCKCANCEL_RECV);
3682 disp->attributes &= ~mask;
3683 disp->attributes |= (attributes & mask);
3684 UNLOCK(&disp->lock);
3688 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3690 isc_socketevent_t *sevent, *newsevent;
3692 REQUIRE(VALID_DISPATCH(disp));
3693 REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3694 REQUIRE(event != NULL);
3696 sevent = (isc_socketevent_t *)event;
3698 INSIST(sevent->n <= disp->mgr->buffersize);
3699 newsevent = (isc_socketevent_t *)
3700 isc_event_allocate(disp->mgr->mctx, NULL,
3701 DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3702 disp, sizeof(isc_socketevent_t));
3703 if (newsevent == NULL)
3706 buf = allocate_udp_buffer(disp);
3708 isc_event_free(ISC_EVENT_PTR(&newsevent));
3711 memcpy(buf, sevent->region.base, sevent->n);
3712 newsevent->region.base = buf;
3713 newsevent->region.length = disp->mgr->buffersize;
3714 newsevent->n = sevent->n;
3715 newsevent->result = sevent->result;
3716 newsevent->address = sevent->address;
3717 newsevent->timestamp = sevent->timestamp;
3718 newsevent->pktinfo = sevent->pktinfo;
3719 newsevent->attributes = sevent->attributes;
3721 isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3725 dns_dispatchset_get(dns_dispatchset_t *dset) {
3726 dns_dispatch_t *disp;
3728 /* check that dispatch set is configured */
3729 if (dset == NULL || dset->ndisp == 0)
3733 disp = dset->dispatches[dset->cur];
3735 if (dset->cur == dset->ndisp)
3737 UNLOCK(&dset->lock);
3743 dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr,
3744 isc_taskmgr_t *taskmgr, dns_dispatch_t *source,
3745 dns_dispatchset_t **dsetp, int n)
3747 isc_result_t result;
3748 dns_dispatchset_t *dset;
3749 dns_dispatchmgr_t *mgr;
3752 REQUIRE(VALID_DISPATCH(source));
3753 REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0);
3754 REQUIRE(dsetp != NULL && *dsetp == NULL);
3758 dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t));
3760 return (ISC_R_NOMEMORY);
3761 memset(dset, 0, sizeof(*dset));
3763 result = isc_mutex_init(&dset->lock);
3764 if (result != ISC_R_SUCCESS)
3767 dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n);
3769 result = ISC_R_NOMEMORY;
3773 isc_mem_attach(mctx, &dset->mctx);
3777 dset->dispatches[0] = NULL;
3778 dns_dispatch_attach(source, &dset->dispatches[0]);
3781 for (i = 1; i < n; i++) {
3782 dset->dispatches[i] = NULL;
3783 result = dispatch_createudp(mgr, sockmgr, taskmgr,
3785 source->maxrequests,
3787 &dset->dispatches[i],
3789 if (result != ISC_R_SUCCESS)
3796 return (ISC_R_SUCCESS);
3801 for (j = 0; j < i; j++)
3802 dns_dispatch_detach(&(dset->dispatches[j]));
3803 isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n);
3804 if (dset->mctx == mctx)
3805 isc_mem_detach(&dset->mctx);
3808 DESTROYLOCK(&dset->lock);
3811 isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t));
3816 dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) {
3819 REQUIRE(dset != NULL);
3821 for (i = 0; i < dset->ndisp; i++) {
3823 sock = dns_dispatch_getsocket(dset->dispatches[i]);
3824 isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL);
3829 dns_dispatchset_destroy(dns_dispatchset_t **dsetp) {
3830 dns_dispatchset_t *dset;
3833 REQUIRE(dsetp != NULL && *dsetp != NULL);
3836 for (i = 0; i < dset->ndisp; i++)
3837 dns_dispatch_detach(&(dset->dispatches[i]));
3838 isc_mem_put(dset->mctx, dset->dispatches,
3839 sizeof(dns_dispatch_t *) * dset->ndisp);
3840 DESTROYLOCK(&dset->lock);
3841 isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t));
3848 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3849 dns_dispatch_t *disp;
3852 disp = ISC_LIST_HEAD(mgr->list);
3853 while (disp != NULL) {
3854 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3855 printf("\tdispatch %p, addr %s\n", disp, foo);
3856 disp = ISC_LIST_NEXT(disp, link);