2 * Copyright (C) 2004-2009, 2011-2015 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: dispatch.c,v 1.175 2011/11/29 01:03:47 marka Exp $ */
25 #include <sys/types.h>
29 #include <isc/entropy.h>
31 #include <isc/mutex.h>
32 #include <isc/portset.h>
33 #include <isc/print.h>
34 #include <isc/random.h>
35 #include <isc/socket.h>
36 #include <isc/stats.h>
37 #include <isc/string.h>
43 #include <dns/dispatch.h>
44 #include <dns/events.h>
46 #include <dns/message.h>
47 #include <dns/portlist.h>
48 #include <dns/stats.h>
49 #include <dns/tcpmsg.h>
50 #include <dns/types.h>
52 typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
54 typedef struct dispsocket dispsocket_t;
55 typedef ISC_LIST(dispsocket_t) dispsocketlist_t;
57 typedef struct dispportentry dispportentry_t;
58 typedef ISC_LIST(dispportentry_t) dispportlist_t;
60 /* ARC4 Random generator state */
61 typedef struct arc4ctx {
66 isc_entropy_t *entropy; /*%< entropy source for ARC4 */
70 typedef struct dns_qid {
72 unsigned int qid_nbuckets; /*%< hash table size */
73 unsigned int qid_increment; /*%< id increment on collision */
75 dns_displist_t *qid_table; /*%< the table itself */
76 dispsocketlist_t *sock_table; /*%< socket table */
79 struct dns_dispatchmgr {
84 dns_portlist_t *portlist;
86 isc_entropy_t *entropy; /*%< entropy source */
88 /* Locked by "lock". */
91 ISC_LIST(dns_dispatch_t) list;
93 /* Locked by arc4_lock. */
94 isc_mutex_t arc4_lock;
95 arc4ctx_t arc4ctx; /*%< ARC4 context for QID */
97 /* locked by buffer lock */
99 isc_mutex_t buffer_lock;
100 unsigned int buffers; /*%< allocated buffers */
101 unsigned int buffersize; /*%< size of each buffer */
102 unsigned int maxbuffers; /*%< max buffers */
104 /* Locked internally. */
105 isc_mutex_t depool_lock;
106 isc_mempool_t *depool; /*%< pool for dispatch events */
107 isc_mutex_t rpool_lock;
108 isc_mempool_t *rpool; /*%< pool for replies */
109 isc_mutex_t dpool_lock;
110 isc_mempool_t *dpool; /*%< dispatch allocations */
111 isc_mutex_t bpool_lock;
112 isc_mempool_t *bpool; /*%< pool for buffers */
113 isc_mutex_t spool_lock;
114 isc_mempool_t *spool; /*%< pool for dispsocks */
117 * Locked by qid->lock if qid exists; otherwise, can be used without
119 * Memory footprint considerations: this is a simple implementation of
120 * available ports, i.e., an ordered array of the actual port numbers.
121 * This will require about 256KB of memory in the worst case (128KB for
122 * each of IPv4 and IPv6). We could reduce it by representing it as a
123 * more sophisticated way such as a list (or array) of ranges that are
124 * searched to identify a specific port. Our decision here is the saved
125 * memory isn't worth the implementation complexity, considering the
126 * fact that the whole BIND9 process (which is mainly named) already
127 * requires a pretty large memory footprint. We may, however, have to
128 * revisit the decision when we want to use it as a separate module for
129 * an environment where memory requirement is severer.
131 in_port_t *v4ports; /*%< available ports for IPv4 */
132 unsigned int nv4ports; /*%< # of available ports for IPv4 */
133 in_port_t *v6ports; /*%< available ports for IPv4 */
134 unsigned int nv6ports; /*%< # of available ports for IPv4 */
137 #define MGR_SHUTTINGDOWN 0x00000001U
138 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
140 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
142 struct dns_dispentry {
144 dns_dispatch_t *disp;
150 isc_taskaction_t action;
152 isc_boolean_t item_out;
153 dispsocket_t *dispsocket;
154 ISC_LIST(dns_dispatchevent_t) items;
155 ISC_LINK(dns_dispentry_t) link;
159 * Maximum number of dispatch sockets that can be pooled for reuse. The
160 * appropriate value may vary, but experiments have shown a busy caching server
161 * may need more than 1000 sockets concurrently opened. The maximum allowable
162 * number of dispatch sockets (per manager) will be set to the double of this
165 #ifndef DNS_DISPATCH_POOLSOCKS
166 #define DNS_DISPATCH_POOLSOCKS 2048
170 * Quota to control the number of dispatch sockets. If a dispatch has more
171 * than the quota of sockets, new queries will purge oldest ones, so that
172 * a massive number of outstanding queries won't prevent subsequent queries
173 * (especially if the older ones take longer time and result in timeout).
175 #ifndef DNS_DISPATCH_SOCKSQUOTA
176 #define DNS_DISPATCH_SOCKSQUOTA 3072
181 isc_socket_t *socket;
182 dns_dispatch_t *disp;
184 in_port_t localport; /* XXX: should be removed later */
185 dispportentry_t *portentry;
186 dns_dispentry_t *resp;
188 ISC_LINK(dispsocket_t) link;
190 ISC_LINK(dispsocket_t) blink;
194 * A port table entry. We remember every port we first open in a table with a
195 * reference counter so that we can 'reuse' the same port (with different
196 * destination addresses) using the SO_REUSEADDR socket option.
198 struct dispportentry {
201 ISC_LINK(struct dispportentry) link;
204 #ifndef DNS_DISPATCH_PORTTABLESIZE
205 #define DNS_DISPATCH_PORTTABLESIZE 1024
208 #define INVALID_BUCKET (0xffffdead)
211 * Number of tasks for each dispatch that use separate sockets for different
212 * transactions. This must be a power of 2 as it will divide 32 bit numbers
213 * to get an uniformly random tasks selection. See get_dispsocket().
215 #define MAX_INTERNAL_TASKS 64
217 struct dns_dispatch {
219 unsigned int magic; /*%< magic */
220 dns_dispatchmgr_t *mgr; /*%< dispatch manager */
223 * internal task buckets. We use multiple tasks to distribute various
224 * socket events well when using separate dispatch sockets. We use the
225 * 1st task (task[0]) for internal control events.
227 isc_task_t *task[MAX_INTERNAL_TASKS];
228 isc_socket_t *socket; /*%< isc socket attached to */
229 isc_sockaddr_t local; /*%< local address */
230 in_port_t localport; /*%< local UDP port */
231 unsigned int maxrequests; /*%< max requests */
232 isc_event_t *ctlevent;
234 isc_mutex_t sepool_lock;
235 isc_mempool_t *sepool; /*%< pool for socket events */
237 /*% Locked by mgr->lock. */
238 ISC_LINK(dns_dispatch_t) link;
240 /* Locked by "lock". */
241 isc_mutex_t lock; /*%< locks all below */
242 isc_sockettype_t socktype;
243 unsigned int attributes;
244 unsigned int refcount; /*%< number of users */
245 dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */
246 unsigned int shutting_down : 1,
250 recv_pending : 1; /*%< is a recv() pending? */
251 isc_result_t shutdown_why;
252 ISC_LIST(dispsocket_t) activesockets;
253 ISC_LIST(dispsocket_t) inactivesockets;
254 unsigned int nsockets;
255 unsigned int requests; /*%< how many requests we have */
256 unsigned int tcpbuffers; /*%< allocated buffers */
257 dns_tcpmsg_t tcpmsg; /*%< for tcp streams */
259 arc4ctx_t arc4ctx; /*%< for QID/UDP port num */
260 dispportlist_t *port_table; /*%< hold ports 'owned' by us */
261 isc_mempool_t *portpool; /*%< port table entries */
264 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
265 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
267 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
268 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
270 #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
271 #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
273 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
274 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
276 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
277 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
279 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
280 (disp)->qid : (disp)->mgr->qid
281 #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
282 (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
285 * Locking a query port buffer is a bit tricky. We access the buffer without
286 * locking until qid is created. Technically, there is a possibility of race
287 * between the creation of qid and access to the port buffer; in practice,
288 * however, this should be safe because qid isn't created until the first
289 * dispatch is created and there should be no contending situation until then.
291 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
292 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
297 static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
298 dns_messageid_t, in_port_t, unsigned int);
299 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
300 static void destroy_disp(isc_task_t *task, isc_event_t *event);
301 static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
302 static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
303 static void udp_exrecv(isc_task_t *, isc_event_t *);
304 static void udp_shrecv(isc_task_t *, isc_event_t *);
305 static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
306 static void tcp_recv(isc_task_t *, isc_event_t *);
307 static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
308 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
310 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
311 static void *allocate_udp_buffer(dns_dispatch_t *disp);
312 static inline void free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
313 static inline dns_dispatchevent_t *allocate_devent(dns_dispatch_t *disp);
314 static void do_cancel(dns_dispatch_t *disp);
315 static dns_dispentry_t *linear_first(dns_qid_t *disp);
316 static dns_dispentry_t *linear_next(dns_qid_t *disp,
317 dns_dispentry_t *resp);
318 static void dispatch_free(dns_dispatch_t **dispp);
319 static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
320 dns_dispatch_t *disp,
321 isc_socketmgr_t *sockmgr,
322 isc_sockaddr_t *localaddr,
323 isc_socket_t **sockp,
324 isc_socket_t *dup_socket);
325 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
326 isc_socketmgr_t *sockmgr,
327 isc_taskmgr_t *taskmgr,
328 isc_sockaddr_t *localaddr,
329 unsigned int maxrequests,
330 unsigned int attributes,
331 dns_dispatch_t **dispp,
332 isc_socket_t *dup_socket);
333 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
334 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
335 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
336 unsigned int increment, dns_qid_t **qidp,
337 isc_boolean_t needaddrtable);
338 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
339 static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
340 unsigned int options, isc_socket_t **sockp,
341 isc_socket_t *dup_socket);
342 static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
343 isc_sockaddr_t *sockaddrp);
345 #define LVL(x) ISC_LOG_DEBUG(x)
348 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
349 ISC_FORMAT_PRINTF(3, 4);
352 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
356 if (! isc_log_wouldlog(dns_lctx, level))
360 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
363 isc_log_write(dns_lctx,
364 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
365 level, "dispatchmgr %p: %s", mgr, msgbuf);
369 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
370 if (mgr->stats != NULL)
371 isc_stats_increment(mgr->stats, counter);
375 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
376 ISC_FORMAT_PRINTF(3, 4);
379 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
383 if (! isc_log_wouldlog(dns_lctx, level))
387 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
390 isc_log_write(dns_lctx,
391 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
392 level, "dispatch %p: %s", disp, msgbuf);
396 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
397 int level, const char *fmt, ...)
398 ISC_FORMAT_PRINTF(4, 5);
401 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
402 int level, const char *fmt, ...)
408 if (! isc_log_wouldlog(dns_lctx, level))
412 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
415 if (VALID_RESPONSE(resp)) {
416 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
417 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
418 DNS_LOGMODULE_DISPATCH, level,
419 "dispatch %p response %p %s: %s", disp, resp,
422 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
423 DNS_LOGMODULE_DISPATCH, level,
424 "dispatch %p req/resp %p: %s", disp, resp,
430 * ARC4 random number generator derived from OpenBSD.
431 * Only dispatch_random() and dispatch_uniformrandom() are expected
432 * to be called from general dispatch routines; the rest of them are subroutines
435 * The original copyright follows:
436 * Copyright (c) 1996, David Mazieres <dm@uun.org>
437 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
439 * Permission to use, copy, modify, and distribute this software for any
440 * purpose with or without fee is hereby granted, provided that the above
441 * copyright notice and this permission notice appear in all copies.
443 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
444 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
445 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
446 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
447 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
448 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
449 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
453 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
457 for (n = 0; n < 256; n++)
462 actx->entropy = entropy; /* don't have to attach */
467 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
472 for (n = 0; n < 256; n++) {
473 actx->i = (actx->i + 1);
474 si = actx->s[actx->i];
475 actx->j = (actx->j + si + dat[n % datlen]);
476 actx->s[actx->i] = actx->s[actx->j];
477 actx->s[actx->j] = si;
482 static inline isc_uint8_t
483 dispatch_arc4get8(arc4ctx_t *actx) {
486 actx->i = (actx->i + 1);
487 si = actx->s[actx->i];
488 actx->j = (actx->j + si);
489 sj = actx->s[actx->j];
490 actx->s[actx->i] = sj;
491 actx->s[actx->j] = si;
493 return (actx->s[(si + sj) & 0xff]);
496 static inline isc_uint16_t
497 dispatch_arc4get16(arc4ctx_t *actx) {
500 val = dispatch_arc4get8(actx) << 8;
501 val |= dispatch_arc4get8(actx);
507 dispatch_arc4stir(arc4ctx_t *actx) {
510 unsigned char rnd[128];
511 isc_uint32_t rnd32[32];
515 if (actx->entropy != NULL) {
517 * We accept any quality of random data to avoid blocking.
519 result = isc_entropy_getdata(actx->entropy, rnd.rnd,
520 sizeof(rnd), NULL, 0);
521 RUNTIME_CHECK(result == ISC_R_SUCCESS);
523 for (i = 0; i < 32; i++)
524 isc_random_get(&rnd.rnd32[i]);
526 dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
529 * Discard early keystream, as per recommendations in:
530 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
532 for (i = 0; i < 256; i++)
533 (void)dispatch_arc4get8(actx);
536 * Derived from OpenBSD's implementation. The rationale is not clear,
537 * but should be conservative enough in safety, and reasonably large
540 actx->count = 1600000;
544 dispatch_random(arc4ctx_t *actx) {
547 if (actx->lock != NULL)
550 actx->count -= sizeof(isc_uint16_t);
551 if (actx->count <= 0)
552 dispatch_arc4stir(actx);
553 result = dispatch_arc4get16(actx);
555 if (actx->lock != NULL)
562 * For general purpose library, we don't have to be too strict about the
563 * quality of random values. Performance doesn't matter much, either.
564 * So we simply use the isc_random module to keep the library as small as
569 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
580 dispatch_random(arc4ctx_t *actx) {
591 dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
598 * Ensure the range of random numbers [min, 0xffff] be a multiple of
599 * upper_bound and contain at least a half of the 16 bit range.
602 if (upper_bound > 0x8000)
603 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
605 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
608 * This could theoretically loop forever but each retry has
609 * p > 0.5 (worst case, usually far better) of selecting a
610 * number inside the range we need, so it should rarely need
614 r = dispatch_random(actx);
619 return (r % upper_bound);
623 * Return a hash of the destination and message id.
626 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
631 ret = isc_sockaddr_hash(dest, ISC_TRUE);
632 ret ^= (id << 16) | port;
633 ret %= qid->qid_nbuckets;
635 INSIST(ret < qid->qid_nbuckets);
641 * Find the first entry in 'qid'. Returns NULL if there are no entries.
643 static dns_dispentry_t *
644 linear_first(dns_qid_t *qid) {
645 dns_dispentry_t *ret;
650 while (bucket < qid->qid_nbuckets) {
651 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
661 * Find the next entry after 'resp' in 'qid'. Return NULL if there are
664 static dns_dispentry_t *
665 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
666 dns_dispentry_t *ret;
669 ret = ISC_LIST_NEXT(resp, link);
673 bucket = resp->bucket;
675 while (bucket < qid->qid_nbuckets) {
676 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
686 * The dispatch must be locked.
689 destroy_disp_ok(dns_dispatch_t *disp)
691 if (disp->refcount != 0)
694 if (disp->recv_pending != 0)
697 if (!ISC_LIST_EMPTY(disp->activesockets))
700 if (disp->shutting_down == 0)
707 * Called when refcount reaches 0 (and safe to destroy).
709 * The dispatcher must be locked.
710 * The manager must not be locked.
713 destroy_disp(isc_task_t *task, isc_event_t *event) {
714 dns_dispatch_t *disp;
715 dns_dispatchmgr_t *mgr;
716 isc_boolean_t killmgr;
717 dispsocket_t *dispsocket;
720 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
724 disp = event->ev_arg;
728 ISC_LIST_UNLINK(mgr->list, disp, link);
730 dispatch_log(disp, LVL(90),
731 "shutting down; detaching from sock %p, task %p",
732 disp->socket, disp->task[0]); /* XXXX */
734 if (disp->sepool != NULL) {
735 isc_mempool_destroy(&disp->sepool);
736 (void)isc_mutex_destroy(&disp->sepool_lock);
739 if (disp->socket != NULL)
740 isc_socket_detach(&disp->socket);
741 while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
742 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
743 destroy_dispsocket(disp, &dispsocket);
745 for (i = 0; i < disp->ntasks; i++)
746 isc_task_detach(&disp->task[i]);
747 isc_event_free(&event);
749 dispatch_free(&disp);
751 killmgr = destroy_mgr_ok(mgr);
758 * Manipulate port table per dispatch: find an entry for a given port number,
759 * create a new entry, and decrement a given entry with possible clean-up.
761 static dispportentry_t *
762 port_search(dns_dispatch_t *disp, in_port_t port) {
763 dispportentry_t *portentry;
765 REQUIRE(disp->port_table != NULL);
767 portentry = ISC_LIST_HEAD(disp->port_table[port %
768 DNS_DISPATCH_PORTTABLESIZE]);
769 while (portentry != NULL) {
770 if (portentry->port == port)
772 portentry = ISC_LIST_NEXT(portentry, link);
778 static dispportentry_t *
779 new_portentry(dns_dispatch_t *disp, in_port_t port) {
780 dispportentry_t *portentry;
783 REQUIRE(disp->port_table != NULL);
785 portentry = isc_mempool_get(disp->portpool);
786 if (portentry == NULL)
789 portentry->port = port;
791 ISC_LINK_INIT(portentry, link);
794 ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
802 * The caller must not hold the qid->lock.
805 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
806 dispportentry_t *portentry = *portentryp;
809 REQUIRE(disp->port_table != NULL);
810 REQUIRE(portentry != NULL && portentry->refs > 0);
816 if (portentry->refs == 0) {
817 ISC_LIST_UNLINK(disp->port_table[portentry->port %
818 DNS_DISPATCH_PORTTABLESIZE],
820 isc_mempool_put(disp->portpool, portentry);
824 * Set '*portentryp' to NULL inside the lock so that
825 * dispsock->portentry does not change in socket_search.
833 * Find a dispsocket for socket address 'dest', and port number 'port'.
834 * Return NULL if no such entry exists. Requires qid->lock to be held.
836 static dispsocket_t *
837 socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
840 dispsocket_t *dispsock;
842 REQUIRE(VALID_QID(qid));
843 REQUIRE(bucket < qid->qid_nbuckets);
845 dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
847 while (dispsock != NULL) {
848 if (dispsock->portentry != NULL &&
849 dispsock->portentry->port == port &&
850 isc_sockaddr_equal(dest, &dispsock->host))
852 dispsock = ISC_LIST_NEXT(dispsock, blink);
859 * Make a new socket for a single dispatch with a random port number.
860 * The caller must hold the disp->lock
863 get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
864 isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp,
869 dns_dispatchmgr_t *mgr = disp->mgr;
870 isc_socket_t *sock = NULL;
871 isc_result_t result = ISC_R_FAILURE;
873 isc_sockaddr_t localaddr;
874 unsigned int bucket = 0;
875 dispsocket_t *dispsock;
878 unsigned int bindoptions;
879 dispportentry_t *portentry = NULL;
882 if (isc_sockaddr_pf(&disp->local) == AF_INET) {
883 nports = disp->mgr->nv4ports;
884 ports = disp->mgr->v4ports;
886 nports = disp->mgr->nv6ports;
887 ports = disp->mgr->v6ports;
890 return (ISC_R_ADDRNOTAVAIL);
892 dispsock = ISC_LIST_HEAD(disp->inactivesockets);
893 if (dispsock != NULL) {
894 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
895 sock = dispsock->socket;
896 dispsock->socket = NULL;
898 dispsock = isc_mempool_get(mgr->spool);
899 if (dispsock == NULL)
900 return (ISC_R_NOMEMORY);
903 dispsock->socket = NULL;
904 dispsock->disp = disp;
905 dispsock->resp = NULL;
906 dispsock->portentry = NULL;
908 dispsock->task = NULL;
909 isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
910 ISC_LINK_INIT(dispsock, link);
911 ISC_LINK_INIT(dispsock, blink);
912 dispsock->magic = DISPSOCK_MAGIC;
916 * Pick up a random UDP port and open a new socket with it. Avoid
917 * choosing ports that share the same destination because it will be
918 * very likely to fail in bind(2) or connect(2).
920 localaddr = disp->local;
923 for (i = 0; i < 64; i++) {
924 port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
926 isc_sockaddr_setport(&localaddr, port);
929 bucket = dns_hash(qid, dest, 0, port);
930 if (socket_search(qid, dest, port, bucket) != NULL) {
936 portentry = port_search(disp, port);
938 if (portentry != NULL)
939 bindoptions |= ISC_SOCKET_REUSEADDRESS;
940 result = open_socket(sockmgr, &localaddr, bindoptions, &sock,
942 if (result == ISC_R_SUCCESS) {
943 if (portentry == NULL) {
944 portentry = new_portentry(disp, port);
945 if (portentry == NULL) {
946 result = ISC_R_NOMEMORY;
955 } else if (result == ISC_R_NOPERM) {
956 char buf[ISC_SOCKADDR_FORMATSIZE];
957 isc_sockaddr_format(&localaddr, buf, sizeof(buf));
958 dispatch_log(disp, ISC_LOG_WARNING,
959 "open_socket(%s) -> %s: continuing",
960 buf, isc_result_totext(result));
961 } else if (result != ISC_R_ADDRINUSE)
965 if (result == ISC_R_SUCCESS) {
966 dispsock->socket = sock;
967 dispsock->host = *dest;
968 dispsock->portentry = portentry;
969 dispsock->bucket = bucket;
971 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
973 *dispsockp = dispsock;
977 * We could keep it in the inactive list, but since this should
978 * be an exceptional case and might be resource shortage, we'd
982 isc_socket_detach(&sock);
983 destroy_dispsocket(disp, &dispsock);
990 * Destroy a dedicated dispatch socket.
993 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
994 dispsocket_t *dispsock;
998 * The dispatch must be locked.
1001 REQUIRE(dispsockp != NULL && *dispsockp != NULL);
1002 dispsock = *dispsockp;
1003 REQUIRE(!ISC_LINK_LINKED(dispsock, link));
1006 dispsock->magic = 0;
1007 if (dispsock->portentry != NULL)
1008 deref_portentry(disp, &dispsock->portentry);
1009 if (dispsock->socket != NULL)
1010 isc_socket_detach(&dispsock->socket);
1011 if (ISC_LINK_LINKED(dispsock, blink)) {
1012 qid = DNS_QID(disp);
1014 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1018 if (dispsock->task != NULL)
1019 isc_task_detach(&dispsock->task);
1020 isc_mempool_put(disp->mgr->spool, dispsock);
1026 * Deactivate a dedicated dispatch socket. Move it to the inactive list for
1027 * future reuse unless the total number of sockets are exceeding the maximum.
1030 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1031 isc_result_t result;
1035 * The dispatch must be locked.
1037 ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
1038 if (dispsock->resp != NULL) {
1039 INSIST(dispsock->resp->dispsocket == dispsock);
1040 dispsock->resp->dispsocket = NULL;
1043 INSIST(dispsock->portentry != NULL);
1044 deref_portentry(disp, &dispsock->portentry);
1047 if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
1048 destroy_dispsocket(disp, &dispsock);
1050 result = isc_socket_close(dispsock->socket);
1052 qid = DNS_QID(disp);
1054 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1058 if (result == ISC_R_SUCCESS)
1059 ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
1062 * If the underlying system does not allow this
1063 * optimization, destroy this temporary structure (and
1064 * create a new one for a new transaction).
1066 INSIST(result == ISC_R_NOTIMPLEMENTED);
1067 destroy_dispsocket(disp, &dispsock);
1071 /* This kind of optimization isn't necessary for normal use */
1075 destroy_dispsocket(disp, &dispsock);
1080 * Find an entry for query ID 'id', socket address 'dest', and port number
1082 * Return NULL if no such entry exists.
1084 static dns_dispentry_t *
1085 entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
1086 in_port_t port, unsigned int bucket)
1088 dns_dispentry_t *res;
1090 REQUIRE(VALID_QID(qid));
1091 REQUIRE(bucket < qid->qid_nbuckets);
1093 res = ISC_LIST_HEAD(qid->qid_table[bucket]);
1095 while (res != NULL) {
1096 if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1097 res->port == port) {
1100 res = ISC_LIST_NEXT(res, link);
1107 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1108 isc_mempool_t *bpool;
1109 INSIST(buf != NULL && len != 0);
1112 switch (disp->socktype) {
1113 case isc_sockettype_tcp:
1114 INSIST(disp->tcpbuffers > 0);
1116 isc_mem_put(disp->mgr->mctx, buf, len);
1118 case isc_sockettype_udp:
1119 LOCK(&disp->mgr->buffer_lock);
1120 INSIST(disp->mgr->buffers > 0);
1121 INSIST(len == disp->mgr->buffersize);
1122 disp->mgr->buffers--;
1123 bpool = disp->mgr->bpool;
1124 UNLOCK(&disp->mgr->buffer_lock);
1125 isc_mempool_put(bpool, buf);
1134 allocate_udp_buffer(dns_dispatch_t *disp) {
1135 isc_mempool_t *bpool;
1138 LOCK(&disp->mgr->buffer_lock);
1139 bpool = disp->mgr->bpool;
1140 disp->mgr->buffers++;
1141 UNLOCK(&disp->mgr->buffer_lock);
1143 temp = isc_mempool_get(bpool);
1146 LOCK(&disp->mgr->buffer_lock);
1147 disp->mgr->buffers--;
1148 UNLOCK(&disp->mgr->buffer_lock);
1155 free_sevent(isc_event_t *ev) {
1156 isc_mempool_t *pool = ev->ev_destroy_arg;
1157 isc_socketevent_t *sev = (isc_socketevent_t *) ev;
1158 isc_mempool_put(pool, sev);
1161 static inline isc_socketevent_t *
1162 allocate_sevent(dns_dispatch_t *disp, isc_socket_t *socket,
1163 isc_eventtype_t type, isc_taskaction_t action, const void *arg)
1165 isc_socketevent_t *ev;
1168 ev = isc_mempool_get(disp->sepool);
1171 DE_CONST(arg, deconst_arg);
1172 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type,
1173 action, deconst_arg, socket,
1174 free_sevent, disp->sepool);
1175 ev->result = ISC_R_UNSET;
1176 ISC_LINK_INIT(ev, ev_link);
1177 ISC_LIST_INIT(ev->bufferlist);
1178 ev->region.base = NULL;
1188 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1189 if (disp->failsafe_ev == ev) {
1190 INSIST(disp->shutdown_out == 1);
1191 disp->shutdown_out = 0;
1196 isc_mempool_put(disp->mgr->depool, ev);
1199 static inline dns_dispatchevent_t *
1200 allocate_devent(dns_dispatch_t *disp) {
1201 dns_dispatchevent_t *ev;
1203 ev = isc_mempool_get(disp->mgr->depool);
1206 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1207 NULL, NULL, NULL, NULL, NULL);
1213 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1214 dispsocket_t *dispsock = ev->ev_arg;
1218 REQUIRE(VALID_DISPSOCK(dispsock));
1219 udp_recv(ev, dispsock->disp, dispsock);
1223 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1224 dns_dispatch_t *disp = ev->ev_arg;
1228 REQUIRE(VALID_DISPATCH(disp));
1229 udp_recv(ev, disp, NULL);
1235 * If I/O result == CANCELED or error, free the buffer.
1237 * If query, free the buffer, restart.
1240 * Allocate event, fill in details.
1241 * If cannot allocate, free buffer, restart.
1242 * find target. If not found, free buffer, restart.
1243 * if event queue is not empty, queue. else, send.
1247 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1248 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1251 isc_buffer_t source;
1253 dns_dispentry_t *resp = NULL;
1254 dns_dispatchevent_t *rev;
1255 unsigned int bucket;
1256 isc_boolean_t killit;
1257 isc_boolean_t queue_response;
1258 dns_dispatchmgr_t *mgr;
1260 isc_netaddr_t netaddr;
1263 isc_boolean_t qidlocked = ISC_FALSE;
1270 dispatch_log(disp, LVL(90),
1271 "got packet: requests %d, buffers %d, recvs %d",
1272 disp->requests, disp->mgr->buffers, disp->recv_pending);
1274 if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1276 * Unless the receive event was imported from a listening
1277 * interface, in which case the event type is
1278 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1280 INSIST(disp->recv_pending != 0);
1281 disp->recv_pending = 0;
1284 if (dispsock != NULL &&
1285 (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1287 * dispsock->resp can be NULL if this transaction was canceled
1288 * just after receiving a response. Since this socket is
1289 * exclusively used and there should be at most one receive
1290 * event the canceled event should have been no effect. So
1291 * we can (and should) deactivate the socket right now.
1293 deactivate_dispsocket(disp, dispsock);
1297 if (disp->shutting_down) {
1299 * This dispatcher is shutting down.
1301 free_buffer(disp, ev->region.base, ev->region.length);
1303 isc_event_free(&ev_in);
1306 killit = destroy_disp_ok(disp);
1307 UNLOCK(&disp->lock);
1309 isc_task_send(disp->task[0], &disp->ctlevent);
1314 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1315 if (dispsock != NULL) {
1316 resp = dispsock->resp;
1318 if (ev->result != ISC_R_SUCCESS) {
1320 * This is most likely a network error on a
1321 * connected socket. It makes no sense to
1322 * check the address or parse the packet, but it
1323 * will help to return the error to the caller.
1328 free_buffer(disp, ev->region.base, ev->region.length);
1330 isc_event_free(&ev_in);
1331 UNLOCK(&disp->lock);
1334 } else if (ev->result != ISC_R_SUCCESS) {
1335 free_buffer(disp, ev->region.base, ev->region.length);
1337 if (ev->result != ISC_R_CANCELED)
1338 dispatch_log(disp, ISC_LOG_ERROR,
1339 "odd socket result in udp_recv(): %s",
1340 isc_result_totext(ev->result));
1342 isc_event_free(&ev_in);
1343 UNLOCK(&disp->lock);
1348 * If this is from a blackholed address, drop it.
1350 isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1351 if (disp->mgr->blackhole != NULL &&
1352 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1353 NULL, &match, NULL) == ISC_R_SUCCESS &&
1356 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1357 char netaddrstr[ISC_NETADDR_FORMATSIZE];
1358 isc_netaddr_format(&netaddr, netaddrstr,
1359 sizeof(netaddrstr));
1360 dispatch_log(disp, LVL(10),
1361 "blackholed packet from %s",
1364 free_buffer(disp, ev->region.base, ev->region.length);
1369 * Peek into the buffer to see what we can see.
1371 isc_buffer_init(&source, ev->region.base, ev->region.length);
1372 isc_buffer_add(&source, ev->n);
1373 dres = dns_message_peekheader(&source, &id, &flags);
1374 if (dres != ISC_R_SUCCESS) {
1375 free_buffer(disp, ev->region.base, ev->region.length);
1376 dispatch_log(disp, LVL(10), "got garbage packet");
1380 dispatch_log(disp, LVL(92),
1381 "got valid DNS message header, /QR %c, id %u",
1382 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1385 * Look at flags. If query, drop it. If response,
1386 * look to see where it goes.
1388 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1390 free_buffer(disp, ev->region.base, ev->region.length);
1395 * Search for the corresponding response. If we are using an exclusive
1396 * socket, we've already identified it and we can skip the search; but
1397 * the ID and the address must match the expected ones.
1400 bucket = dns_hash(qid, &ev->address, id, disp->localport);
1402 qidlocked = ISC_TRUE;
1403 resp = entry_search(qid, &ev->address, id, disp->localport,
1405 dispatch_log(disp, LVL(90),
1406 "search for response in bucket %d: %s",
1407 bucket, (resp == NULL ? "not found" : "found"));
1410 inc_stats(mgr, dns_resstatscounter_mismatch);
1411 free_buffer(disp, ev->region.base, ev->region.length);
1414 } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1416 dispatch_log(disp, LVL(90),
1417 "response to an exclusive socket doesn't match");
1418 inc_stats(mgr, dns_resstatscounter_mismatch);
1419 free_buffer(disp, ev->region.base, ev->region.length);
1424 * Now that we have the original dispatch the query was sent
1425 * from check that the address and port the response was
1426 * sent to make sense.
1428 if (disp != resp->disp) {
1433 * Check that the socket types and ports match.
1435 if (disp->socktype != resp->disp->socktype ||
1436 isc_sockaddr_getport(&disp->local) !=
1437 isc_sockaddr_getport(&resp->disp->local)) {
1438 free_buffer(disp, ev->region.base, ev->region.length);
1443 * If each dispatch is bound to a different address
1446 * Note under Linux a packet can be sent out via IPv4 socket
1447 * and the response be received via a IPv6 socket.
1449 * Requests sent out via IPv6 should always come back in
1452 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1453 isc_sockaddr_pf(&disp->local) != PF_INET6) {
1454 free_buffer(disp, ev->region.base, ev->region.length);
1457 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1458 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1459 if (!isc_sockaddr_eqaddr(&disp->local, &resp->disp->local) &&
1460 !isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1461 !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1462 free_buffer(disp, ev->region.base, ev->region.length);
1468 queue_response = resp->item_out;
1469 rev = allocate_devent(resp->disp);
1471 free_buffer(disp, ev->region.base, ev->region.length);
1476 * At this point, rev contains the event we want to fill in, and
1477 * resp contains the information on the place to send it to.
1478 * Send the event off.
1480 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1481 isc_buffer_add(&rev->buffer, ev->n);
1482 rev->result = ev->result;
1484 rev->addr = ev->address;
1485 rev->pktinfo = ev->pktinfo;
1486 rev->attributes = ev->attributes;
1487 if (queue_response) {
1488 ISC_LIST_APPEND(resp->items, rev, ev_link);
1490 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1492 resp->action, resp->arg, resp, NULL, NULL);
1493 request_log(disp, resp, LVL(90),
1494 "[a] Sent event %p buffer %p len %d to task %p",
1495 rev, rev->buffer.base, rev->buffer.length,
1497 resp->item_out = ISC_TRUE;
1498 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1505 * Restart recv() to get the next packet.
1508 result = startrecv(disp, dispsock);
1509 if (result != ISC_R_SUCCESS && dispsock != NULL) {
1511 * XXX: wired. There seems to be no recovery process other than
1512 * deactivate this socket anyway (since we cannot start
1513 * receiving, we won't be able to receive a cancel event
1516 deactivate_dispsocket(disp, dispsock);
1518 isc_event_free(&ev_in);
1519 UNLOCK(&disp->lock);
1525 * If I/O result == CANCELED, EOF, or error, notify everyone as the
1526 * various queues drain.
1528 * If query, restart.
1531 * Allocate event, fill in details.
1532 * If cannot allocate, restart.
1533 * find target. If not found, restart.
1534 * if event queue is not empty, queue. else, send.
1538 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1539 dns_dispatch_t *disp = ev_in->ev_arg;
1540 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1544 dns_dispentry_t *resp;
1545 dns_dispatchevent_t *rev;
1546 unsigned int bucket;
1547 isc_boolean_t killit;
1548 isc_boolean_t queue_response;
1551 char buf[ISC_SOCKADDR_FORMATSIZE];
1555 REQUIRE(VALID_DISPATCH(disp));
1559 dispatch_log(disp, LVL(90),
1560 "got TCP packet: requests %d, buffers %d, recvs %d",
1561 disp->requests, disp->tcpbuffers, disp->recv_pending);
1565 INSIST(disp->recv_pending != 0);
1566 disp->recv_pending = 0;
1568 if (disp->refcount == 0) {
1570 * This dispatcher is shutting down. Force cancelation.
1572 tcpmsg->result = ISC_R_CANCELED;
1575 if (tcpmsg->result != ISC_R_SUCCESS) {
1576 switch (tcpmsg->result) {
1577 case ISC_R_CANCELED:
1581 dispatch_log(disp, LVL(90), "shutting down on EOF");
1585 case ISC_R_CONNECTIONRESET:
1586 level = ISC_LOG_INFO;
1590 level = ISC_LOG_ERROR;
1592 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1593 dispatch_log(disp, level, "shutting down due to TCP "
1594 "receive error: %s: %s", buf,
1595 isc_result_totext(tcpmsg->result));
1601 * The event is statically allocated in the tcpmsg
1602 * structure, and destroy_disp() frees the tcpmsg, so we must
1603 * free the event *before* calling destroy_disp().
1605 isc_event_free(&ev_in);
1607 disp->shutting_down = 1;
1608 disp->shutdown_why = tcpmsg->result;
1611 * If the recv() was canceled pass the word on.
1613 killit = destroy_disp_ok(disp);
1614 UNLOCK(&disp->lock);
1616 isc_task_send(disp->task[0], &disp->ctlevent);
1620 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1622 tcpmsg->buffer.length, tcpmsg->buffer.base);
1625 * Peek into the buffer to see what we can see.
1627 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1628 if (dres != ISC_R_SUCCESS) {
1629 dispatch_log(disp, LVL(10), "got garbage packet");
1633 dispatch_log(disp, LVL(92),
1634 "got valid DNS message header, /QR %c, id %u",
1635 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1638 * Allocate an event to send to the query or response client, and
1639 * allocate a new buffer for our use.
1643 * Look at flags. If query, drop it. If response,
1644 * look to see where it goes.
1646 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1656 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1658 resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1659 dispatch_log(disp, LVL(90),
1660 "search for response in bucket %d: %s",
1661 bucket, (resp == NULL ? "not found" : "found"));
1665 queue_response = resp->item_out;
1666 rev = allocate_devent(disp);
1671 * At this point, rev contains the event we want to fill in, and
1672 * resp contains the information on the place to send it to.
1673 * Send the event off.
1675 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1677 rev->result = ISC_R_SUCCESS;
1679 rev->addr = tcpmsg->address;
1680 if (queue_response) {
1681 ISC_LIST_APPEND(resp->items, rev, ev_link);
1683 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1684 resp->action, resp->arg, resp, NULL, NULL);
1685 request_log(disp, resp, LVL(90),
1686 "[b] Sent event %p buffer %p len %d to task %p",
1687 rev, rev->buffer.base, rev->buffer.length,
1689 resp->item_out = ISC_TRUE;
1690 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1696 * Restart recv() to get the next packet.
1699 (void)startrecv(disp, NULL);
1701 isc_event_free(&ev_in);
1702 UNLOCK(&disp->lock);
1706 * disp must be locked.
1709 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1711 isc_region_t region;
1712 isc_socket_t *socket;
1714 if (disp->shutting_down == 1)
1715 return (ISC_R_SUCCESS);
1717 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1718 return (ISC_R_SUCCESS);
1720 if (disp->recv_pending != 0 && dispsock == NULL)
1721 return (ISC_R_SUCCESS);
1723 if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1724 return (ISC_R_NOMEMORY);
1726 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1728 return (ISC_R_SUCCESS);
1730 if (dispsock != NULL)
1731 socket = dispsock->socket;
1733 socket = disp->socket;
1734 INSIST(socket != NULL);
1736 switch (disp->socktype) {
1738 * UDP reads are always maximal.
1740 case isc_sockettype_udp:
1741 region.length = disp->mgr->buffersize;
1742 region.base = allocate_udp_buffer(disp);
1743 if (region.base == NULL)
1744 return (ISC_R_NOMEMORY);
1745 if (dispsock != NULL) {
1746 isc_task_t *dt = dispsock->task;
1747 isc_socketevent_t *sev =
1748 allocate_sevent(disp, socket,
1749 ISC_SOCKEVENT_RECVDONE,
1750 udp_exrecv, dispsock);
1752 free_buffer(disp, region.base, region.length);
1753 return (ISC_R_NOMEMORY);
1756 res = isc_socket_recv2(socket, ®ion, 1, dt, sev, 0);
1757 if (res != ISC_R_SUCCESS) {
1758 free_buffer(disp, region.base, region.length);
1762 isc_task_t *dt = disp->task[0];
1763 isc_socketevent_t *sev =
1764 allocate_sevent(disp, socket,
1765 ISC_SOCKEVENT_RECVDONE,
1768 free_buffer(disp, region.base, region.length);
1769 return (ISC_R_NOMEMORY);
1772 res = isc_socket_recv2(socket, ®ion, 1, dt, sev, 0);
1773 if (res != ISC_R_SUCCESS) {
1774 free_buffer(disp, region.base, region.length);
1775 disp->shutdown_why = res;
1776 disp->shutting_down = 1;
1778 return (ISC_R_SUCCESS); /* recover by cancel */
1780 INSIST(disp->recv_pending == 0);
1781 disp->recv_pending = 1;
1785 case isc_sockettype_tcp:
1786 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1788 if (res != ISC_R_SUCCESS) {
1789 disp->shutdown_why = res;
1790 disp->shutting_down = 1;
1792 return (ISC_R_SUCCESS); /* recover by cancel */
1794 INSIST(disp->recv_pending == 0);
1795 disp->recv_pending = 1;
1802 return (ISC_R_SUCCESS);
1806 * Mgr must be locked when calling this function.
1808 static isc_boolean_t
1809 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1810 mgr_log(mgr, LVL(90),
1811 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1812 "depool=%d, rpool=%d, dpool=%d",
1813 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1814 isc_mempool_getallocated(mgr->depool),
1815 isc_mempool_getallocated(mgr->rpool),
1816 isc_mempool_getallocated(mgr->dpool));
1817 if (!MGR_IS_SHUTTINGDOWN(mgr))
1819 if (!ISC_LIST_EMPTY(mgr->list))
1821 if (isc_mempool_getallocated(mgr->depool) != 0)
1823 if (isc_mempool_getallocated(mgr->rpool) != 0)
1825 if (isc_mempool_getallocated(mgr->dpool) != 0)
1832 * Mgr must be unlocked when calling this function.
1835 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1837 dns_dispatchmgr_t *mgr;
1846 DESTROYLOCK(&mgr->lock);
1849 DESTROYLOCK(&mgr->arc4_lock);
1851 isc_mempool_destroy(&mgr->depool);
1852 isc_mempool_destroy(&mgr->rpool);
1853 isc_mempool_destroy(&mgr->dpool);
1854 if (mgr->bpool != NULL)
1855 isc_mempool_destroy(&mgr->bpool);
1856 if (mgr->spool != NULL)
1857 isc_mempool_destroy(&mgr->spool);
1859 DESTROYLOCK(&mgr->spool_lock);
1860 DESTROYLOCK(&mgr->bpool_lock);
1861 DESTROYLOCK(&mgr->dpool_lock);
1862 DESTROYLOCK(&mgr->rpool_lock);
1863 DESTROYLOCK(&mgr->depool_lock);
1866 if (mgr->entropy != NULL)
1867 isc_entropy_detach(&mgr->entropy);
1869 if (mgr->qid != NULL)
1870 qid_destroy(mctx, &mgr->qid);
1872 DESTROYLOCK(&mgr->buffer_lock);
1874 if (mgr->blackhole != NULL)
1875 dns_acl_detach(&mgr->blackhole);
1877 if (mgr->stats != NULL)
1878 isc_stats_detach(&mgr->stats);
1880 if (mgr->v4ports != NULL) {
1881 isc_mem_put(mctx, mgr->v4ports,
1882 mgr->nv4ports * sizeof(in_port_t));
1884 if (mgr->v6ports != NULL) {
1885 isc_mem_put(mctx, mgr->v6ports,
1886 mgr->nv6ports * sizeof(in_port_t));
1888 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1889 isc_mem_detach(&mctx);
1893 open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1894 unsigned int options, isc_socket_t **sockp,
1895 isc_socket_t *dup_socket)
1898 isc_result_t result;
1903 result = isc_socket_open(sock);
1904 if (result != ISC_R_SUCCESS)
1909 } else if (dup_socket != NULL) {
1910 result = isc_socket_dup(dup_socket, &sock);
1911 if (result != ISC_R_SUCCESS)
1914 isc_socket_setname(sock, "dispatcher", NULL);
1916 return (ISC_R_SUCCESS);
1918 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1919 isc_sockettype_udp, &sock);
1920 if (result != ISC_R_SUCCESS)
1924 isc_socket_setname(sock, "dispatcher", NULL);
1926 #ifndef ISC_ALLOW_MAPPED
1927 isc_socket_ipv6only(sock, ISC_TRUE);
1929 result = isc_socket_bind(sock, local, options);
1930 if (result != ISC_R_SUCCESS) {
1932 isc_socket_detach(&sock);
1935 isc_socket_close(sock);
1944 return (ISC_R_SUCCESS);
1948 * Create a temporary port list to set the initial default set of dispatch
1949 * ports: [1024, 65535]. This is almost meaningless as the application will
1950 * normally set the ports explicitly, but is provided to fill some minor corner
1954 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1955 isc_result_t result;
1957 result = isc_portset_create(mctx, portsetp);
1958 if (result != ISC_R_SUCCESS)
1960 isc_portset_addrange(*portsetp, 1024, 65535);
1962 return (ISC_R_SUCCESS);
1970 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1971 dns_dispatchmgr_t **mgrp)
1973 dns_dispatchmgr_t *mgr;
1974 isc_result_t result;
1975 isc_portset_t *v4portset = NULL;
1976 isc_portset_t *v6portset = NULL;
1978 REQUIRE(mctx != NULL);
1979 REQUIRE(mgrp != NULL && *mgrp == NULL);
1981 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1983 return (ISC_R_NOMEMORY);
1986 isc_mem_attach(mctx, &mgr->mctx);
1988 mgr->blackhole = NULL;
1991 result = isc_mutex_init(&mgr->lock);
1992 if (result != ISC_R_SUCCESS)
1995 result = isc_mutex_init(&mgr->arc4_lock);
1996 if (result != ISC_R_SUCCESS)
1999 result = isc_mutex_init(&mgr->buffer_lock);
2000 if (result != ISC_R_SUCCESS)
2001 goto kill_arc4_lock;
2003 result = isc_mutex_init(&mgr->depool_lock);
2004 if (result != ISC_R_SUCCESS)
2005 goto kill_buffer_lock;
2007 result = isc_mutex_init(&mgr->rpool_lock);
2008 if (result != ISC_R_SUCCESS)
2009 goto kill_depool_lock;
2011 result = isc_mutex_init(&mgr->dpool_lock);
2012 if (result != ISC_R_SUCCESS)
2013 goto kill_rpool_lock;
2015 result = isc_mutex_init(&mgr->bpool_lock);
2016 if (result != ISC_R_SUCCESS)
2017 goto kill_dpool_lock;
2019 result = isc_mutex_init(&mgr->spool_lock);
2020 if (result != ISC_R_SUCCESS)
2021 goto kill_bpool_lock;
2024 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
2025 &mgr->depool) != ISC_R_SUCCESS) {
2026 result = ISC_R_NOMEMORY;
2027 goto kill_spool_lock;
2031 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
2032 &mgr->rpool) != ISC_R_SUCCESS) {
2033 result = ISC_R_NOMEMORY;
2038 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
2039 &mgr->dpool) != ISC_R_SUCCESS) {
2040 result = ISC_R_NOMEMORY;
2044 isc_mempool_setname(mgr->depool, "dispmgr_depool");
2045 isc_mempool_setmaxalloc(mgr->depool, 32768);
2046 isc_mempool_setfreemax(mgr->depool, 32768);
2047 isc_mempool_associatelock(mgr->depool, &mgr->depool_lock);
2048 isc_mempool_setfillcount(mgr->depool, 256);
2050 isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
2051 isc_mempool_setmaxalloc(mgr->rpool, 32768);
2052 isc_mempool_setfreemax(mgr->rpool, 32768);
2053 isc_mempool_associatelock(mgr->rpool, &mgr->rpool_lock);
2054 isc_mempool_setfillcount(mgr->rpool, 256);
2056 isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
2057 isc_mempool_setmaxalloc(mgr->dpool, 32768);
2058 isc_mempool_setfreemax(mgr->dpool, 32768);
2059 isc_mempool_associatelock(mgr->dpool, &mgr->dpool_lock);
2060 isc_mempool_setfillcount(mgr->dpool, 256);
2063 mgr->buffersize = 0;
2064 mgr->maxbuffers = 0;
2067 mgr->entropy = NULL;
2070 ISC_LIST_INIT(mgr->list);
2071 mgr->v4ports = NULL;
2072 mgr->v6ports = NULL;
2075 mgr->magic = DNS_DISPATCHMGR_MAGIC;
2077 result = create_default_portset(mctx, &v4portset);
2078 if (result == ISC_R_SUCCESS) {
2079 result = create_default_portset(mctx, &v6portset);
2080 if (result == ISC_R_SUCCESS) {
2081 result = dns_dispatchmgr_setavailports(mgr,
2086 if (v4portset != NULL)
2087 isc_portset_destroy(mctx, &v4portset);
2088 if (v6portset != NULL)
2089 isc_portset_destroy(mctx, &v6portset);
2090 if (result != ISC_R_SUCCESS)
2094 if (entropy != NULL)
2095 isc_entropy_attach(entropy, &mgr->entropy);
2100 dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
2103 return (ISC_R_SUCCESS);
2106 isc_mempool_destroy(&mgr->dpool);
2108 isc_mempool_destroy(&mgr->rpool);
2110 isc_mempool_destroy(&mgr->depool);
2112 DESTROYLOCK(&mgr->spool_lock);
2114 DESTROYLOCK(&mgr->bpool_lock);
2116 DESTROYLOCK(&mgr->dpool_lock);
2118 DESTROYLOCK(&mgr->rpool_lock);
2120 DESTROYLOCK(&mgr->depool_lock);
2122 DESTROYLOCK(&mgr->buffer_lock);
2124 DESTROYLOCK(&mgr->arc4_lock);
2126 DESTROYLOCK(&mgr->lock);
2128 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
2129 isc_mem_detach(&mctx);
2135 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
2136 REQUIRE(VALID_DISPATCHMGR(mgr));
2137 if (mgr->blackhole != NULL)
2138 dns_acl_detach(&mgr->blackhole);
2139 dns_acl_attach(blackhole, &mgr->blackhole);
2143 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
2144 REQUIRE(VALID_DISPATCHMGR(mgr));
2145 return (mgr->blackhole);
2149 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
2150 dns_portlist_t *portlist)
2152 REQUIRE(VALID_DISPATCHMGR(mgr));
2155 /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
2160 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
2161 REQUIRE(VALID_DISPATCHMGR(mgr));
2162 return (NULL); /* this function is deprecated */
2166 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
2167 isc_portset_t *v6portset)
2169 in_port_t *v4ports, *v6ports, p;
2170 unsigned int nv4ports, nv6ports, i4, i6;
2172 REQUIRE(VALID_DISPATCHMGR(mgr));
2174 nv4ports = isc_portset_nports(v4portset);
2175 nv6ports = isc_portset_nports(v6portset);
2178 if (nv4ports != 0) {
2179 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
2180 if (v4ports == NULL)
2181 return (ISC_R_NOMEMORY);
2184 if (nv6ports != 0) {
2185 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
2186 if (v6ports == NULL) {
2187 if (v4ports != NULL) {
2188 isc_mem_put(mgr->mctx, v4ports,
2190 isc_portset_nports(v4portset));
2192 return (ISC_R_NOMEMORY);
2200 if (isc_portset_isset(v4portset, p)) {
2201 INSIST(i4 < nv4ports);
2204 if (isc_portset_isset(v6portset, p)) {
2205 INSIST(i6 < nv6ports);
2208 } while (p++ < 65535);
2209 INSIST(i4 == nv4ports && i6 == nv6ports);
2212 if (mgr->v4ports != NULL) {
2213 isc_mem_put(mgr->mctx, mgr->v4ports,
2214 mgr->nv4ports * sizeof(in_port_t));
2216 mgr->v4ports = v4ports;
2217 mgr->nv4ports = nv4ports;
2219 if (mgr->v6ports != NULL) {
2220 isc_mem_put(mgr->mctx, mgr->v6ports,
2221 mgr->nv6ports * sizeof(in_port_t));
2223 mgr->v6ports = v6ports;
2224 mgr->nv6ports = nv6ports;
2227 return (ISC_R_SUCCESS);
2231 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2232 unsigned int buffersize, unsigned int maxbuffers,
2233 unsigned int maxrequests, unsigned int buckets,
2234 unsigned int increment)
2236 isc_result_t result;
2238 REQUIRE(VALID_DISPATCHMGR(mgr));
2239 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2240 REQUIRE(maxbuffers > 0);
2241 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2242 REQUIRE(increment > buckets);
2245 * Keep some number of items around. This should be a config
2246 * option. For now, keep 8, but later keep at least two even
2247 * if the caller wants less. This allows us to ensure certain
2248 * things, like an event can be "freed" and the next allocation
2249 * will always succeed.
2251 * Note that if limits are placed on anything here, we use one
2252 * event internally, so the actual limit should be "wanted + 1."
2260 LOCK(&mgr->buffer_lock);
2262 /* Create or adjust buffer pool */
2263 if (mgr->bpool != NULL) {
2265 * We only increase the maxbuffers to avoid accidental buffer
2266 * shortage. Ideally we'd separate the manager-wide maximum
2267 * from per-dispatch limits and respect the latter within the
2268 * global limit. But at this moment that's deemed to be
2269 * overkilling and isn't worth additional implementation
2272 if (maxbuffers > mgr->maxbuffers) {
2273 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2274 isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2275 mgr->maxbuffers = maxbuffers;
2278 result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2279 if (result != ISC_R_SUCCESS) {
2280 UNLOCK(&mgr->buffer_lock);
2283 isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2284 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2285 isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2286 isc_mempool_associatelock(mgr->bpool, &mgr->bpool_lock);
2287 isc_mempool_setfillcount(mgr->bpool, 256);
2290 /* Create or adjust socket pool */
2291 if (mgr->spool != NULL) {
2292 if (maxrequests < DNS_DISPATCH_POOLSOCKS * 2) {
2293 isc_mempool_setmaxalloc(mgr->spool,
2294 DNS_DISPATCH_POOLSOCKS * 2);
2295 isc_mempool_setfreemax(mgr->spool,
2296 DNS_DISPATCH_POOLSOCKS * 2);
2298 UNLOCK(&mgr->buffer_lock);
2299 return (ISC_R_SUCCESS);
2301 result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2303 if (result != ISC_R_SUCCESS) {
2304 UNLOCK(&mgr->buffer_lock);
2307 isc_mempool_setname(mgr->spool, "dispmgr_spool");
2308 isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2309 isc_mempool_setfreemax(mgr->spool, maxrequests);
2310 isc_mempool_associatelock(mgr->spool, &mgr->spool_lock);
2311 isc_mempool_setfillcount(mgr->spool, 256);
2313 result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
2314 if (result != ISC_R_SUCCESS)
2317 mgr->buffersize = buffersize;
2318 mgr->maxbuffers = maxbuffers;
2319 UNLOCK(&mgr->buffer_lock);
2320 return (ISC_R_SUCCESS);
2323 isc_mempool_destroy(&mgr->bpool);
2324 if (mgr->spool != NULL)
2325 isc_mempool_destroy(&mgr->spool);
2326 UNLOCK(&mgr->buffer_lock);
2331 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2332 dns_dispatchmgr_t *mgr;
2333 isc_boolean_t killit;
2335 REQUIRE(mgrp != NULL);
2336 REQUIRE(VALID_DISPATCHMGR(*mgrp));
2342 mgr->state |= MGR_SHUTTINGDOWN;
2344 killit = destroy_mgr_ok(mgr);
2347 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2354 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2355 REQUIRE(VALID_DISPATCHMGR(mgr));
2356 REQUIRE(ISC_LIST_EMPTY(mgr->list));
2357 REQUIRE(mgr->stats == NULL);
2359 isc_stats_attach(stats, &mgr->stats);
2363 port_cmp(const void *key, const void *ent) {
2364 in_port_t p1 = *(const in_port_t *)key;
2365 in_port_t p2 = *(const in_port_t *)ent;
2375 static isc_boolean_t
2376 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2377 isc_sockaddr_t *sockaddrp)
2379 isc_sockaddr_t sockaddr;
2380 isc_result_t result;
2381 in_port_t *ports, port;
2382 unsigned int nports;
2383 isc_boolean_t available = ISC_FALSE;
2385 REQUIRE(sock != NULL || sockaddrp != NULL);
2389 sockaddrp = &sockaddr;
2390 result = isc_socket_getsockname(sock, sockaddrp);
2391 if (result != ISC_R_SUCCESS)
2395 if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2396 ports = mgr->v4ports;
2397 nports = mgr->nv4ports;
2399 ports = mgr->v6ports;
2400 nports = mgr->nv6ports;
2405 port = isc_sockaddr_getport(sockaddrp);
2406 if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2407 available = ISC_TRUE;
2414 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2416 static isc_boolean_t
2417 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2418 isc_sockaddr_t sockaddr;
2419 isc_result_t result;
2421 REQUIRE(disp->socket != NULL);
2427 * Don't match wildcard ports unless the port is available in the
2428 * current configuration.
2430 if (isc_sockaddr_getport(addr) == 0 &&
2431 isc_sockaddr_getport(&disp->local) == 0 &&
2432 !portavailable(disp->mgr, disp->socket, NULL)) {
2437 * Check if we match the binding <address,port>.
2438 * Wildcard ports match/fail here.
2440 if (isc_sockaddr_equal(&disp->local, addr))
2442 if (isc_sockaddr_getport(addr) == 0)
2446 * Check if we match a bound wildcard port <address,port>.
2448 if (!isc_sockaddr_eqaddr(&disp->local, addr))
2450 result = isc_socket_getsockname(disp->socket, &sockaddr);
2451 if (result != ISC_R_SUCCESS)
2454 return (isc_sockaddr_equal(&sockaddr, addr));
2458 * Requires mgr be locked.
2460 * No dispatcher can be locked by this thread when calling this function.
2464 * If a matching dispatcher is found, it is locked after this function
2465 * returns, and must be unlocked by the caller.
2468 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2469 unsigned int attributes, unsigned int mask,
2470 dns_dispatch_t **dispp)
2472 dns_dispatch_t *disp;
2473 isc_result_t result;
2476 * Make certain that we will not match a private or exclusive dispatch.
2478 attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2479 mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2481 disp = ISC_LIST_HEAD(mgr->list);
2482 while (disp != NULL) {
2484 if ((disp->shutting_down == 0)
2485 && ATTRMATCH(disp->attributes, attributes, mask)
2486 && local_addr_match(disp, local))
2488 UNLOCK(&disp->lock);
2489 disp = ISC_LIST_NEXT(disp, link);
2493 result = ISC_R_NOTFOUND;
2498 result = ISC_R_SUCCESS;
2505 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2506 unsigned int increment, dns_qid_t **qidp,
2507 isc_boolean_t needsocktable)
2511 isc_result_t result;
2513 REQUIRE(VALID_DISPATCHMGR(mgr));
2514 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2515 REQUIRE(increment > buckets);
2516 REQUIRE(qidp != NULL && *qidp == NULL);
2518 qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2520 return (ISC_R_NOMEMORY);
2522 qid->qid_table = isc_mem_get(mgr->mctx,
2523 buckets * sizeof(dns_displist_t));
2524 if (qid->qid_table == NULL) {
2525 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2526 return (ISC_R_NOMEMORY);
2529 qid->sock_table = NULL;
2530 if (needsocktable) {
2531 qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2532 sizeof(dispsocketlist_t));
2533 if (qid->sock_table == NULL) {
2534 isc_mem_put(mgr->mctx, qid->qid_table,
2535 buckets * sizeof(dns_displist_t));
2536 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2537 return (ISC_R_NOMEMORY);
2541 result = isc_mutex_init(&qid->lock);
2542 if (result != ISC_R_SUCCESS) {
2543 if (qid->sock_table != NULL) {
2544 isc_mem_put(mgr->mctx, qid->sock_table,
2545 buckets * sizeof(dispsocketlist_t));
2547 isc_mem_put(mgr->mctx, qid->qid_table,
2548 buckets * sizeof(dns_displist_t));
2549 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2553 for (i = 0; i < buckets; i++) {
2554 ISC_LIST_INIT(qid->qid_table[i]);
2555 if (qid->sock_table != NULL)
2556 ISC_LIST_INIT(qid->sock_table[i]);
2559 qid->qid_nbuckets = buckets;
2560 qid->qid_increment = increment;
2561 qid->magic = QID_MAGIC;
2563 return (ISC_R_SUCCESS);
2567 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2570 REQUIRE(qidp != NULL);
2573 REQUIRE(VALID_QID(qid));
2577 isc_mem_put(mctx, qid->qid_table,
2578 qid->qid_nbuckets * sizeof(dns_displist_t));
2579 if (qid->sock_table != NULL) {
2580 isc_mem_put(mctx, qid->sock_table,
2581 qid->qid_nbuckets * sizeof(dispsocketlist_t));
2583 DESTROYLOCK(&qid->lock);
2584 isc_mem_put(mctx, qid, sizeof(*qid));
2588 * Allocate and set important limits.
2591 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2592 dns_dispatch_t **dispp)
2594 dns_dispatch_t *disp;
2595 isc_result_t result;
2597 REQUIRE(VALID_DISPATCHMGR(mgr));
2598 REQUIRE(dispp != NULL && *dispp == NULL);
2601 * Set up the dispatcher, mostly. Don't bother setting some of
2602 * the options that are controlled by tcp vs. udp, etc.
2605 disp = isc_mempool_get(mgr->dpool);
2607 return (ISC_R_NOMEMORY);
2611 disp->maxrequests = maxrequests;
2612 disp->attributes = 0;
2613 ISC_LINK_INIT(disp, link);
2615 disp->recv_pending = 0;
2616 memset(&disp->local, 0, sizeof(disp->local));
2617 disp->localport = 0;
2618 disp->shutting_down = 0;
2619 disp->shutdown_out = 0;
2620 disp->connected = 0;
2621 disp->tcpmsg_valid = 0;
2622 disp->shutdown_why = ISC_R_UNEXPECTED;
2624 disp->tcpbuffers = 0;
2626 ISC_LIST_INIT(disp->activesockets);
2627 ISC_LIST_INIT(disp->inactivesockets);
2629 dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL);
2630 disp->port_table = NULL;
2631 disp->portpool = NULL;
2633 result = isc_mutex_init(&disp->lock);
2634 if (result != ISC_R_SUCCESS)
2637 disp->failsafe_ev = allocate_devent(disp);
2638 if (disp->failsafe_ev == NULL) {
2639 result = ISC_R_NOMEMORY;
2643 disp->magic = DISPATCH_MAGIC;
2646 return (ISC_R_SUCCESS);
2652 DESTROYLOCK(&disp->lock);
2654 isc_mempool_put(mgr->dpool, disp);
2661 * MUST be unlocked, and not used by anything.
2664 dispatch_free(dns_dispatch_t **dispp) {
2665 dns_dispatch_t *disp;
2666 dns_dispatchmgr_t *mgr;
2669 REQUIRE(VALID_DISPATCH(*dispp));
2674 REQUIRE(VALID_DISPATCHMGR(mgr));
2676 if (disp->tcpmsg_valid) {
2677 dns_tcpmsg_invalidate(&disp->tcpmsg);
2678 disp->tcpmsg_valid = 0;
2681 INSIST(disp->tcpbuffers == 0);
2682 INSIST(disp->requests == 0);
2683 INSIST(disp->recv_pending == 0);
2684 INSIST(ISC_LIST_EMPTY(disp->activesockets));
2685 INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2687 isc_mempool_put(mgr->depool, disp->failsafe_ev);
2688 disp->failsafe_ev = NULL;
2690 if (disp->qid != NULL)
2691 qid_destroy(mgr->mctx, &disp->qid);
2693 if (disp->port_table != NULL) {
2694 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2695 INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2696 isc_mem_put(mgr->mctx, disp->port_table,
2697 sizeof(disp->port_table[0]) *
2698 DNS_DISPATCH_PORTTABLESIZE);
2701 if (disp->portpool != NULL)
2702 isc_mempool_destroy(&disp->portpool);
2705 DESTROYLOCK(&disp->lock);
2707 isc_mempool_put(mgr->dpool, disp);
2711 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2712 isc_taskmgr_t *taskmgr, unsigned int buffersize,
2713 unsigned int maxbuffers, unsigned int maxrequests,
2714 unsigned int buckets, unsigned int increment,
2715 unsigned int attributes, dns_dispatch_t **dispp)
2717 isc_result_t result;
2718 dns_dispatch_t *disp;
2723 REQUIRE(VALID_DISPATCHMGR(mgr));
2724 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2725 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2726 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2728 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */
2733 * dispatch_allocate() checks mgr for us.
2734 * qid_allocate() checks buckets and increment for us.
2737 result = dispatch_allocate(mgr, maxrequests, &disp);
2738 if (result != ISC_R_SUCCESS) {
2743 result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2744 if (result != ISC_R_SUCCESS)
2745 goto deallocate_dispatch;
2747 disp->socktype = isc_sockettype_tcp;
2748 disp->socket = NULL;
2749 isc_socket_attach(sock, &disp->socket);
2751 disp->sepool = NULL;
2754 disp->task[0] = NULL;
2755 result = isc_task_create(taskmgr, 0, &disp->task[0]);
2756 if (result != ISC_R_SUCCESS)
2759 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2760 DNS_EVENT_DISPATCHCONTROL,
2762 sizeof(isc_event_t));
2763 if (disp->ctlevent == NULL) {
2764 result = ISC_R_NOMEMORY;
2768 isc_task_setname(disp->task[0], "tcpdispatch", disp);
2770 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2771 disp->tcpmsg_valid = 1;
2773 disp->attributes = attributes;
2776 * Append it to the dispatcher list.
2778 ISC_LIST_APPEND(mgr->list, disp, link);
2781 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2782 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2786 return (ISC_R_SUCCESS);
2792 isc_task_detach(&disp->task[0]);
2794 isc_socket_detach(&disp->socket);
2795 deallocate_dispatch:
2796 dispatch_free(&disp);
2804 dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2805 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2806 unsigned int buffersize,
2807 unsigned int maxbuffers, unsigned int maxrequests,
2808 unsigned int buckets, unsigned int increment,
2809 unsigned int attributes, unsigned int mask,
2810 dns_dispatch_t **dispp, dns_dispatch_t *dup_dispatch)
2812 isc_result_t result;
2813 dns_dispatch_t *disp = NULL;
2815 REQUIRE(VALID_DISPATCHMGR(mgr));
2816 REQUIRE(sockmgr != NULL);
2817 REQUIRE(localaddr != NULL);
2818 REQUIRE(taskmgr != NULL);
2819 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2820 REQUIRE(maxbuffers > 0);
2821 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2822 REQUIRE(increment > buckets);
2823 REQUIRE(dispp != NULL && *dispp == NULL);
2824 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2826 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2827 maxrequests, buckets, increment);
2828 if (result != ISC_R_SUCCESS)
2833 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2834 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2839 * See if we have a dispatcher that matches.
2841 if (dup_dispatch == NULL) {
2842 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2843 if (result == ISC_R_SUCCESS) {
2846 if (disp->maxrequests < maxrequests)
2847 disp->maxrequests = maxrequests;
2849 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0
2850 && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2852 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2853 if (disp->recv_pending != 0)
2854 isc_socket_cancel(disp->socket,
2856 ISC_SOCKCANCEL_RECV);
2859 UNLOCK(&disp->lock);
2864 return (ISC_R_SUCCESS);
2872 result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2873 maxrequests, attributes, &disp,
2874 dup_dispatch == NULL
2876 : dup_dispatch->socket);
2878 if (result != ISC_R_SUCCESS) {
2886 return (ISC_R_SUCCESS);
2890 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2891 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2892 unsigned int buffersize,
2893 unsigned int maxbuffers, unsigned int maxrequests,
2894 unsigned int buckets, unsigned int increment,
2895 unsigned int attributes, unsigned int mask,
2896 dns_dispatch_t **dispp)
2898 return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr,
2899 buffersize, maxbuffers, maxrequests,
2900 buckets, increment, attributes,
2901 mask, dispp, NULL));
2905 * mgr should be locked.
2908 #ifndef DNS_DISPATCH_HELD
2909 #define DNS_DISPATCH_HELD 20U
2913 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2914 isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2915 isc_socket_t **sockp, isc_socket_t *dup_socket)
2918 isc_socket_t *held[DNS_DISPATCH_HELD];
2919 isc_sockaddr_t localaddr_bound;
2920 isc_socket_t *sock = NULL;
2921 isc_result_t result = ISC_R_SUCCESS;
2922 isc_boolean_t anyport;
2924 INSIST(sockp != NULL && *sockp == NULL);
2926 localaddr_bound = *localaddr;
2927 anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2930 unsigned int nports;
2934 * If no port is specified, we first try to pick up a random
2935 * port by ourselves.
2937 if (isc_sockaddr_pf(localaddr) == AF_INET) {
2938 nports = disp->mgr->nv4ports;
2939 ports = disp->mgr->v4ports;
2941 nports = disp->mgr->nv6ports;
2942 ports = disp->mgr->v6ports;
2945 return (ISC_R_ADDRNOTAVAIL);
2947 for (i = 0; i < 1024; i++) {
2950 prt = ports[dispatch_uniformrandom(
2953 isc_sockaddr_setport(&localaddr_bound, prt);
2954 result = open_socket(sockmgr, &localaddr_bound,
2957 * Continue if the port choosen is already in use
2958 * or the OS has reserved it.
2960 if (result == ISC_R_NOPERM ||
2961 result == ISC_R_ADDRINUSE)
2963 disp->localport = prt;
2969 * If this fails 1024 times, we then ask the kernel for
2973 /* Allow to reuse address for non-random ports. */
2974 result = open_socket(sockmgr, localaddr,
2975 ISC_SOCKET_REUSEADDRESS, &sock,
2978 if (result == ISC_R_SUCCESS)
2984 memset(held, 0, sizeof(held));
2987 for (j = 0; j < 0xffffU; j++) {
2988 result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
2989 if (result != ISC_R_SUCCESS)
2991 else if (portavailable(mgr, sock, NULL))
2993 if (held[i] != NULL)
2994 isc_socket_detach(&held[i]);
2997 if (i == DNS_DISPATCH_HELD)
3001 mgr_log(mgr, ISC_LOG_ERROR,
3002 "avoid-v%s-udp-ports: unable to allocate "
3003 "an available port",
3004 isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
3005 result = ISC_R_FAILURE;
3011 for (i = 0; i < DNS_DISPATCH_HELD; i++) {
3012 if (held[i] != NULL)
3013 isc_socket_detach(&held[i]);
3020 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
3021 isc_taskmgr_t *taskmgr,
3022 isc_sockaddr_t *localaddr,
3023 unsigned int maxrequests,
3024 unsigned int attributes,
3025 dns_dispatch_t **dispp,
3026 isc_socket_t *dup_socket)
3028 isc_result_t result;
3029 dns_dispatch_t *disp;
3030 isc_socket_t *sock = NULL;
3034 * dispatch_allocate() checks mgr for us.
3037 result = dispatch_allocate(mgr, maxrequests, &disp);
3038 if (result != ISC_R_SUCCESS)
3041 disp->socktype = isc_sockettype_udp;
3043 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
3044 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock,
3046 if (result != ISC_R_SUCCESS)
3047 goto deallocate_dispatch;
3049 if (isc_log_wouldlog(dns_lctx, 90)) {
3050 char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3052 isc_sockaddr_format(localaddr, addrbuf,
3053 ISC_SOCKADDR_FORMATSIZE);
3054 mgr_log(mgr, LVL(90), "dns_dispatch_createudp: Created"
3055 " UDP dispatch for %s with socket fd %d\n",
3056 addrbuf, isc_socket_getfd(sock));
3060 isc_sockaddr_t sa_any;
3063 * For dispatches using exclusive sockets with a specific
3064 * source address, we only check if the specified address is
3065 * available on the system. Query sockets will be created later
3068 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
3069 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
3070 result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
3072 isc_socket_detach(&sock);
3073 if (result != ISC_R_SUCCESS)
3074 goto deallocate_dispatch;
3077 disp->port_table = isc_mem_get(mgr->mctx,
3078 sizeof(disp->port_table[0]) *
3079 DNS_DISPATCH_PORTTABLESIZE);
3080 if (disp->port_table == NULL)
3081 goto deallocate_dispatch;
3082 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
3083 ISC_LIST_INIT(disp->port_table[i]);
3085 result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
3087 if (result != ISC_R_SUCCESS)
3088 goto deallocate_dispatch;
3089 isc_mempool_setname(disp->portpool, "disp_portpool");
3090 isc_mempool_setfreemax(disp->portpool, 128);
3092 disp->socket = sock;
3093 disp->local = *localaddr;
3095 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3096 disp->ntasks = MAX_INTERNAL_TASKS;
3099 for (i = 0; i < disp->ntasks; i++) {
3100 disp->task[i] = NULL;
3101 result = isc_task_create(taskmgr, 0, &disp->task[i]);
3102 if (result != ISC_R_SUCCESS) {
3104 isc_task_shutdown(disp->task[i]);
3105 isc_task_detach(&disp->task[i]);
3109 isc_task_setname(disp->task[i], "udpdispatch", disp);
3112 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
3113 DNS_EVENT_DISPATCHCONTROL,
3115 sizeof(isc_event_t));
3116 if (disp->ctlevent == NULL) {
3117 result = ISC_R_NOMEMORY;
3121 disp->sepool = NULL;
3122 if (isc_mempool_create(mgr->mctx, sizeof(isc_socketevent_t),
3123 &disp->sepool) != ISC_R_SUCCESS)
3125 result = ISC_R_NOMEMORY;
3129 result = isc_mutex_init(&disp->sepool_lock);
3130 if (result != ISC_R_SUCCESS)
3133 isc_mempool_setname(disp->sepool, "disp_sepool");
3134 isc_mempool_setmaxalloc(disp->sepool, 32768);
3135 isc_mempool_setfreemax(disp->sepool, 32768);
3136 isc_mempool_associatelock(disp->sepool, &disp->sepool_lock);
3137 isc_mempool_setfillcount(disp->sepool, 16);
3139 attributes &= ~DNS_DISPATCHATTR_TCP;
3140 attributes |= DNS_DISPATCHATTR_UDP;
3141 disp->attributes = attributes;
3144 * Append it to the dispatcher list.
3146 ISC_LIST_APPEND(mgr->list, disp, link);
3148 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
3149 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
3150 if (disp->socket != NULL)
3151 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
3161 isc_mempool_destroy(&disp->sepool);
3163 isc_event_free(&disp->ctlevent);
3165 for (i = 0; i < disp->ntasks; i++)
3166 isc_task_detach(&disp->task[i]);
3168 if (disp->socket != NULL)
3169 isc_socket_detach(&disp->socket);
3170 deallocate_dispatch:
3171 dispatch_free(&disp);
3177 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
3178 REQUIRE(VALID_DISPATCH(disp));
3179 REQUIRE(dispp != NULL && *dispp == NULL);
3183 UNLOCK(&disp->lock);
3189 * It is important to lock the manager while we are deleting the dispatch,
3190 * since dns_dispatch_getudp will call dispatch_find, which returns to
3191 * the caller a dispatch but does not attach to it until later. _getudp
3192 * locks the manager, however, so locking it here will keep us from attaching
3193 * to a dispatcher that is in the process of going away.
3196 dns_dispatch_detach(dns_dispatch_t **dispp) {
3197 dns_dispatch_t *disp;
3198 dispsocket_t *dispsock;
3199 isc_boolean_t killit;
3201 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
3208 INSIST(disp->refcount > 0);
3210 if (disp->refcount == 0) {
3211 if (disp->recv_pending > 0)
3212 isc_socket_cancel(disp->socket, disp->task[0],
3213 ISC_SOCKCANCEL_RECV);
3214 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3216 dispsock = ISC_LIST_NEXT(dispsock, link)) {
3217 isc_socket_cancel(dispsock->socket, dispsock->task,
3218 ISC_SOCKCANCEL_RECV);
3220 disp->shutting_down = 1;
3223 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
3225 killit = destroy_disp_ok(disp);
3226 UNLOCK(&disp->lock);
3228 isc_task_send(disp->task[0], &disp->ctlevent);
3232 dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3233 isc_task_t *task, isc_taskaction_t action, void *arg,
3234 dns_messageid_t *idp, dns_dispentry_t **resp,
3235 isc_socketmgr_t *sockmgr)
3237 return (dns_dispatch_addresponse3(disp, 0, dest, task, action, arg,
3238 idp, resp, sockmgr));
3242 dns_dispatch_addresponse3(dns_dispatch_t *disp, unsigned int options,
3243 isc_sockaddr_t *dest, isc_task_t *task,
3244 isc_taskaction_t action, void *arg,
3245 dns_messageid_t *idp, dns_dispentry_t **resp,
3246 isc_socketmgr_t *sockmgr)
3248 dns_dispentry_t *res;
3249 unsigned int bucket;
3250 in_port_t localport = 0;
3255 dispsocket_t *dispsocket = NULL;
3256 isc_result_t result;
3258 REQUIRE(VALID_DISPATCH(disp));
3259 REQUIRE(task != NULL);
3260 REQUIRE(dest != NULL);
3261 REQUIRE(resp != NULL && *resp == NULL);
3262 REQUIRE(idp != NULL);
3263 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3264 REQUIRE(sockmgr != NULL);
3268 if (disp->shutting_down == 1) {
3269 UNLOCK(&disp->lock);
3270 return (ISC_R_SHUTTINGDOWN);
3273 if (disp->requests >= disp->maxrequests) {
3274 UNLOCK(&disp->lock);
3275 return (ISC_R_QUOTA);
3278 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
3279 disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
3280 dispsocket_t *oldestsocket;
3281 dns_dispentry_t *oldestresp;
3282 dns_dispatchevent_t *rev;
3285 * Kill oldest outstanding query if the number of sockets
3286 * exceeds the quota to keep the room for new queries.
3288 oldestsocket = ISC_LIST_HEAD(disp->activesockets);
3289 oldestresp = oldestsocket->resp;
3290 if (oldestresp != NULL && !oldestresp->item_out) {
3291 rev = allocate_devent(oldestresp->disp);
3293 rev->buffer.base = NULL;
3294 rev->result = ISC_R_CANCELED;
3295 rev->id = oldestresp->id;
3296 ISC_EVENT_INIT(rev, sizeof(*rev), 0,
3297 NULL, DNS_EVENT_DISPATCH,
3299 oldestresp->arg, oldestresp,
3301 oldestresp->item_out = ISC_TRUE;
3302 isc_task_send(oldestresp->task,
3303 ISC_EVENT_PTR(&rev));
3304 inc_stats(disp->mgr,
3305 dns_resstatscounter_dispabort);
3310 * Move this entry to the tail so that it won't (easily) be
3311 * examined before actually being canceled.
3313 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3314 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3317 qid = DNS_QID(disp);
3319 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3321 * Get a separate UDP socket with a random port number.
3323 result = get_dispsocket(disp, dest, sockmgr, &dispsocket,
3325 if (result != ISC_R_SUCCESS) {
3326 UNLOCK(&disp->lock);
3327 inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3331 localport = disp->localport;
3335 * Try somewhat hard to find an unique ID unless FIXEDID is set
3336 * in which case we use the id passed in via *idp.
3339 if ((options & DNS_DISPATCHOPT_FIXEDID) != 0)
3342 id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp));
3346 bucket = dns_hash(qid, dest, id, localport);
3347 if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3351 if ((disp->attributes & DNS_DISPATCHATTR_FIXEDID) != 0)
3353 id += qid->qid_increment;
3359 UNLOCK(&disp->lock);
3360 return (ISC_R_NOMORE);
3363 res = isc_mempool_get(disp->mgr->rpool);
3365 if (dispsocket != NULL)
3366 destroy_dispsocket(disp, &dispsocket);
3367 UNLOCK(&disp->lock);
3368 return (ISC_R_NOMEMORY);
3374 isc_task_attach(task, &res->task);
3377 res->port = localport;
3378 res->bucket = bucket;
3380 res->action = action;
3382 res->dispsocket = dispsocket;
3383 if (dispsocket != NULL)
3384 dispsocket->resp = res;
3385 res->item_out = ISC_FALSE;
3386 ISC_LIST_INIT(res->items);
3387 ISC_LINK_INIT(res, link);
3388 res->magic = RESPONSE_MAGIC;
3391 ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3394 request_log(disp, res, LVL(90),
3395 "attached to task %p", res->task);
3397 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3398 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3399 result = startrecv(disp, dispsocket);
3400 if (result != ISC_R_SUCCESS) {
3402 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3405 if (dispsocket != NULL)
3406 destroy_dispsocket(disp, &dispsocket);
3411 UNLOCK(&disp->lock);
3412 isc_task_detach(&res->task);
3413 isc_mempool_put(disp->mgr->rpool, res);
3418 if (dispsocket != NULL)
3419 ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3421 UNLOCK(&disp->lock);
3426 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3427 INSIST(res->dispsocket != NULL);
3429 return (ISC_R_SUCCESS);
3433 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3434 isc_task_t *task, isc_taskaction_t action, void *arg,
3435 dns_messageid_t *idp, dns_dispentry_t **resp)
3437 REQUIRE(VALID_DISPATCH(disp));
3438 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3440 return (dns_dispatch_addresponse3(disp, 0, dest, task, action, arg,
3445 dns_dispatch_starttcp(dns_dispatch_t *disp) {
3447 REQUIRE(VALID_DISPATCH(disp));
3449 dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3452 disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3453 (void)startrecv(disp, NULL);
3454 UNLOCK(&disp->lock);
3458 dns_dispatch_removeresponse(dns_dispentry_t **resp,
3459 dns_dispatchevent_t **sockevent)
3461 dns_dispatchmgr_t *mgr;
3462 dns_dispatch_t *disp;
3463 dns_dispentry_t *res;
3464 dispsocket_t *dispsock;
3465 dns_dispatchevent_t *ev;
3466 unsigned int bucket;
3467 isc_boolean_t killit;
3469 isc_eventlist_t events;
3472 REQUIRE(resp != NULL);
3473 REQUIRE(VALID_RESPONSE(*resp));
3479 REQUIRE(VALID_DISPATCH(disp));
3481 REQUIRE(VALID_DISPATCHMGR(mgr));
3483 qid = DNS_QID(disp);
3485 if (sockevent != NULL) {
3486 REQUIRE(*sockevent != NULL);
3495 INSIST(disp->requests > 0);
3497 INSIST(disp->refcount > 0);
3499 if (disp->refcount == 0) {
3500 if (disp->recv_pending > 0)
3501 isc_socket_cancel(disp->socket, disp->task[0],
3502 ISC_SOCKCANCEL_RECV);
3503 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3505 dispsock = ISC_LIST_NEXT(dispsock, link)) {
3506 isc_socket_cancel(dispsock->socket, dispsock->task,
3507 ISC_SOCKCANCEL_RECV);
3509 disp->shutting_down = 1;
3512 bucket = res->bucket;
3515 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3518 if (ev == NULL && res->item_out) {
3520 * We've posted our event, but the caller hasn't gotten it
3521 * yet. Take it back.
3523 ISC_LIST_INIT(events);
3524 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3527 * We had better have gotten it back.
3530 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3534 REQUIRE(res->item_out == ISC_TRUE);
3535 res->item_out = ISC_FALSE;
3536 if (ev->buffer.base != NULL)
3537 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3538 free_devent(disp, ev);
3541 request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3542 isc_task_detach(&res->task);
3544 if (res->dispsocket != NULL) {
3545 isc_socket_cancel(res->dispsocket->socket,
3546 res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3547 res->dispsocket->resp = NULL;
3551 * Free any buffered requests as well
3553 ev = ISC_LIST_HEAD(res->items);
3554 while (ev != NULL) {
3555 ISC_LIST_UNLINK(res->items, ev, ev_link);
3556 if (ev->buffer.base != NULL)
3557 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3558 free_devent(disp, ev);
3559 ev = ISC_LIST_HEAD(res->items);
3562 isc_mempool_put(disp->mgr->rpool, res);
3563 if (disp->shutting_down == 1)
3566 (void)startrecv(disp, NULL);
3568 killit = destroy_disp_ok(disp);
3569 UNLOCK(&disp->lock);
3571 isc_task_send(disp->task[0], &disp->ctlevent);
3575 do_cancel(dns_dispatch_t *disp) {
3576 dns_dispatchevent_t *ev;
3577 dns_dispentry_t *resp;
3580 if (disp->shutdown_out == 1)
3583 qid = DNS_QID(disp);
3586 * Search for the first response handler without packets outstanding
3587 * unless a specific hander is given.
3590 for (resp = linear_first(qid);
3591 resp != NULL && resp->item_out;
3593 resp = linear_next(qid, resp);
3596 * No one to send the cancel event to, so nothing to do.
3602 * Send the shutdown failsafe event to this resp.
3604 ev = disp->failsafe_ev;
3605 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3606 resp->action, resp->arg, resp, NULL, NULL);
3607 ev->result = disp->shutdown_why;
3608 ev->buffer.base = NULL;
3609 ev->buffer.length = 0;
3610 disp->shutdown_out = 1;
3611 request_log(disp, resp, LVL(10),
3612 "cancel: failsafe event %p -> task %p",
3614 resp->item_out = ISC_TRUE;
3615 isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3621 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3622 REQUIRE(VALID_DISPATCH(disp));
3624 return (disp->socket);
3628 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3629 REQUIRE(VALID_RESPONSE(resp));
3631 if (resp->dispsocket != NULL)
3632 return (resp->dispsocket->socket);
3638 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3640 REQUIRE(VALID_DISPATCH(disp));
3641 REQUIRE(addrp != NULL);
3643 if (disp->socktype == isc_sockettype_udp) {
3644 *addrp = disp->local;
3645 return (ISC_R_SUCCESS);
3647 return (ISC_R_NOTIMPLEMENTED);
3651 dns_dispatch_cancel(dns_dispatch_t *disp) {
3652 REQUIRE(VALID_DISPATCH(disp));
3656 if (disp->shutting_down == 1) {
3657 UNLOCK(&disp->lock);
3661 disp->shutdown_why = ISC_R_CANCELED;
3662 disp->shutting_down = 1;
3665 UNLOCK(&disp->lock);
3671 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3672 REQUIRE(VALID_DISPATCH(disp));
3675 * We don't bother locking disp here; it's the caller's responsibility
3676 * to use only non volatile flags.
3678 return (disp->attributes);
3682 dns_dispatch_changeattributes(dns_dispatch_t *disp,
3683 unsigned int attributes, unsigned int mask)
3685 REQUIRE(VALID_DISPATCH(disp));
3686 /* Exclusive attribute can only be set on creation */
3687 REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3688 /* Also, a dispatch with randomport specified cannot start listening */
3689 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3690 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3693 * Should check for valid attributes here!
3698 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3699 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3700 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3701 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3702 (void)startrecv(disp, NULL);
3703 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3705 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3706 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3707 if (disp->recv_pending != 0)
3708 isc_socket_cancel(disp->socket, disp->task[0],
3709 ISC_SOCKCANCEL_RECV);
3713 disp->attributes &= ~mask;
3714 disp->attributes |= (attributes & mask);
3715 UNLOCK(&disp->lock);
3719 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3721 isc_socketevent_t *sevent, *newsevent;
3723 REQUIRE(VALID_DISPATCH(disp));
3724 REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3725 REQUIRE(event != NULL);
3727 sevent = (isc_socketevent_t *)event;
3729 INSIST(sevent->n <= disp->mgr->buffersize);
3730 newsevent = (isc_socketevent_t *)
3731 isc_event_allocate(disp->mgr->mctx, NULL,
3732 DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3733 disp, sizeof(isc_socketevent_t));
3734 if (newsevent == NULL)
3737 buf = allocate_udp_buffer(disp);
3739 isc_event_free(ISC_EVENT_PTR(&newsevent));
3742 memmove(buf, sevent->region.base, sevent->n);
3743 newsevent->region.base = buf;
3744 newsevent->region.length = disp->mgr->buffersize;
3745 newsevent->n = sevent->n;
3746 newsevent->result = sevent->result;
3747 newsevent->address = sevent->address;
3748 newsevent->timestamp = sevent->timestamp;
3749 newsevent->pktinfo = sevent->pktinfo;
3750 newsevent->attributes = sevent->attributes;
3752 isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3756 dns_dispatchset_get(dns_dispatchset_t *dset) {
3757 dns_dispatch_t *disp;
3759 /* check that dispatch set is configured */
3760 if (dset == NULL || dset->ndisp == 0)
3764 disp = dset->dispatches[dset->cur];
3766 if (dset->cur == dset->ndisp)
3768 UNLOCK(&dset->lock);
3774 dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr,
3775 isc_taskmgr_t *taskmgr, dns_dispatch_t *source,
3776 dns_dispatchset_t **dsetp, int n)
3778 isc_result_t result;
3779 dns_dispatchset_t *dset;
3780 dns_dispatchmgr_t *mgr;
3783 REQUIRE(VALID_DISPATCH(source));
3784 REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0);
3785 REQUIRE(dsetp != NULL && *dsetp == NULL);
3789 dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t));
3791 return (ISC_R_NOMEMORY);
3792 memset(dset, 0, sizeof(*dset));
3794 result = isc_mutex_init(&dset->lock);
3795 if (result != ISC_R_SUCCESS)
3798 dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n);
3800 result = ISC_R_NOMEMORY;
3804 isc_mem_attach(mctx, &dset->mctx);
3808 dset->dispatches[0] = NULL;
3809 dns_dispatch_attach(source, &dset->dispatches[0]);
3812 for (i = 1; i < n; i++) {
3813 dset->dispatches[i] = NULL;
3814 result = dispatch_createudp(mgr, sockmgr, taskmgr,
3816 source->maxrequests,
3818 &dset->dispatches[i],
3820 if (result != ISC_R_SUCCESS)
3827 return (ISC_R_SUCCESS);
3832 for (j = 0; j < i; j++)
3833 dns_dispatch_detach(&(dset->dispatches[j]));
3834 isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n);
3835 if (dset->mctx == mctx)
3836 isc_mem_detach(&dset->mctx);
3839 DESTROYLOCK(&dset->lock);
3842 isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t));
3847 dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) {
3850 REQUIRE(dset != NULL);
3852 for (i = 0; i < dset->ndisp; i++) {
3854 sock = dns_dispatch_getsocket(dset->dispatches[i]);
3855 isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL);
3860 dns_dispatchset_destroy(dns_dispatchset_t **dsetp) {
3861 dns_dispatchset_t *dset;
3864 REQUIRE(dsetp != NULL && *dsetp != NULL);
3867 for (i = 0; i < dset->ndisp; i++)
3868 dns_dispatch_detach(&(dset->dispatches[i]));
3869 isc_mem_put(dset->mctx, dset->dispatches,
3870 sizeof(dns_dispatch_t *) * dset->ndisp);
3871 DESTROYLOCK(&dset->lock);
3872 isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t));
3879 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3880 dns_dispatch_t *disp;
3883 disp = ISC_LIST_HEAD(mgr->list);
3884 while (disp != NULL) {
3885 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3886 printf("\tdispatch %p, addr %s\n", disp, foo);
3887 disp = ISC_LIST_NEXT(disp, link);