2 * Copyright (C) 2004-2008 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: dispatch.c,v 1.116.18.19.12.5 2008/07/23 23:16:43 marka Exp $ */
25 #include <sys/types.h>
28 #include <isc/entropy.h>
30 #include <isc/mutex.h>
31 #include <isc/print.h>
32 #include <isc/random.h>
33 #include <isc/string.h>
39 #include <dns/dispatch.h>
40 #include <dns/events.h>
42 #include <dns/message.h>
43 #include <dns/portlist.h>
44 #include <dns/tcpmsg.h>
45 #include <dns/types.h>
47 typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
49 typedef struct dns_qid {
51 unsigned int qid_nbuckets; /*%< hash table size */
52 unsigned int qid_increment; /*%< id increment on collision */
54 dns_displist_t *qid_table; /*%< the table itself */
57 /* ARC4 Random generator state */
58 typedef struct arc4ctx {
65 struct dns_dispatchmgr {
70 dns_portlist_t *portlist;
72 /* Locked by "lock". */
75 ISC_LIST(dns_dispatch_t) list;
77 /* Locked by arc4_lock. */
78 isc_mutex_t arc4_lock;
79 arc4ctx_t arc4ctx; /*%< ARC4 context for QID */
81 /* locked by buffer lock */
83 isc_mutex_t buffer_lock;
84 unsigned int buffers; /*%< allocated buffers */
85 unsigned int buffersize; /*%< size of each buffer */
86 unsigned int maxbuffers; /*%< max buffers */
88 /* Locked internally. */
89 isc_mutex_t pool_lock;
90 isc_mempool_t *epool; /*%< memory pool for events */
91 isc_mempool_t *rpool; /*%< memory pool for replies */
92 isc_mempool_t *dpool; /*%< dispatch allocations */
93 isc_mempool_t *bpool; /*%< memory pool for buffers */
95 isc_entropy_t *entropy; /*%< entropy source */
98 #define MGR_SHUTTINGDOWN 0x00000001U
99 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
101 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
103 struct dns_dispentry {
105 dns_dispatch_t *disp;
111 isc_taskaction_t action;
113 isc_boolean_t item_out;
114 ISC_LIST(dns_dispatchevent_t) items;
115 ISC_LINK(dns_dispentry_t) link;
118 #define INVALID_BUCKET (0xffffdead)
120 struct dns_dispatch {
122 unsigned int magic; /*%< magic */
123 dns_dispatchmgr_t *mgr; /*%< dispatch manager */
124 isc_task_t *task; /*%< internal task */
125 isc_socket_t *socket; /*%< isc socket attached to */
126 isc_sockaddr_t local; /*%< local address */
127 in_port_t localport; /*%< local UDP port */
128 unsigned int maxrequests; /*%< max requests */
129 isc_event_t *ctlevent;
131 /*% Locked by mgr->lock. */
132 ISC_LINK(dns_dispatch_t) link;
134 /* Locked by "lock". */
135 isc_mutex_t lock; /*%< locks all below */
136 isc_sockettype_t socktype;
137 unsigned int attributes;
138 unsigned int refcount; /*%< number of users */
139 dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */
140 unsigned int shutting_down : 1,
144 recv_pending : 1; /*%< is a recv() pending? */
145 isc_result_t shutdown_why;
146 unsigned int requests; /*%< how many requests we have */
147 unsigned int tcpbuffers; /*%< allocated buffers */
148 dns_tcpmsg_t tcpmsg; /*%< for tcp streams */
152 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
153 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
155 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
156 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
158 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
159 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
161 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
162 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
164 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
165 (disp)->qid : (disp)->mgr->qid
169 static dns_dispentry_t *bucket_search(dns_qid_t *, isc_sockaddr_t *,
170 dns_messageid_t, in_port_t, unsigned int);
171 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
172 static void destroy_disp(isc_task_t *task, isc_event_t *event);
173 static void udp_recv(isc_task_t *, isc_event_t *);
174 static void tcp_recv(isc_task_t *, isc_event_t *);
175 static void startrecv(dns_dispatch_t *);
176 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
178 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
179 static void *allocate_udp_buffer(dns_dispatch_t *disp);
180 static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
181 static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
182 static void do_cancel(dns_dispatch_t *disp);
183 static dns_dispentry_t *linear_first(dns_qid_t *disp);
184 static dns_dispentry_t *linear_next(dns_qid_t *disp,
185 dns_dispentry_t *resp);
186 static void dispatch_free(dns_dispatch_t **dispp);
187 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
188 isc_socketmgr_t *sockmgr,
189 isc_taskmgr_t *taskmgr,
190 isc_sockaddr_t *localaddr,
191 unsigned int maxrequests,
192 unsigned int attributes,
193 dns_dispatch_t **dispp);
194 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
195 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
196 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
197 unsigned int increment, dns_qid_t **qidp);
198 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
200 #define LVL(x) ISC_LOG_DEBUG(x)
203 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
204 ISC_FORMAT_PRINTF(3, 4);
207 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
211 if (! isc_log_wouldlog(dns_lctx, level))
215 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
218 isc_log_write(dns_lctx,
219 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
220 level, "dispatchmgr %p: %s", mgr, msgbuf);
224 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
225 ISC_FORMAT_PRINTF(3, 4);
228 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
232 if (! isc_log_wouldlog(dns_lctx, level))
236 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
239 isc_log_write(dns_lctx,
240 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
241 level, "dispatch %p: %s", disp, msgbuf);
245 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
246 int level, const char *fmt, ...)
247 ISC_FORMAT_PRINTF(4, 5);
250 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
251 int level, const char *fmt, ...)
257 if (! isc_log_wouldlog(dns_lctx, level))
261 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
264 if (VALID_RESPONSE(resp)) {
265 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
266 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
267 DNS_LOGMODULE_DISPATCH, level,
268 "dispatch %p response %p %s: %s", disp, resp,
271 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
272 DNS_LOGMODULE_DISPATCH, level,
273 "dispatch %p req/resp %p: %s", disp, resp,
279 * ARC4 random number generator derived from OpenBSD.
280 * Only dispatch_arc4random() and dispatch_arc4uniformrandom() are expected
281 * to be called from general dispatch routines; the rest of them are subroutines
284 * The original copyright follows:
285 * Copyright (c) 1996, David Mazieres <dm@uun.org>
286 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
288 * Permission to use, copy, modify, and distribute this software for any
289 * purpose with or without fee is hereby granted, provided that the above
290 * copyright notice and this permission notice appear in all copies.
292 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
293 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
294 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
295 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
296 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
297 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
298 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
301 dispatch_arc4init(arc4ctx_t *actx) {
303 for (n = 0; n < 256; n++)
311 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
316 for (n = 0; n < 256; n++) {
317 actx->i = (actx->i + 1);
318 si = actx->s[actx->i];
319 actx->j = (actx->j + si + dat[n % datlen]);
320 actx->s[actx->i] = actx->s[actx->j];
321 actx->s[actx->j] = si;
326 static inline isc_uint8_t
327 dispatch_arc4get8(arc4ctx_t *actx) {
330 actx->i = (actx->i + 1);
331 si = actx->s[actx->i];
332 actx->j = (actx->j + si);
333 sj = actx->s[actx->j];
334 actx->s[actx->i] = sj;
335 actx->s[actx->j] = si;
337 return (actx->s[(si + sj) & 0xff]);
340 static inline isc_uint16_t
341 dispatch_arc4get16(arc4ctx_t *actx) {
344 val = dispatch_arc4get8(actx) << 8;
345 val |= dispatch_arc4get8(actx);
351 dispatch_arc4stir(dns_dispatchmgr_t *mgr) {
354 unsigned char rnd[128];
355 isc_uint32_t rnd32[32];
359 if (mgr->entropy != NULL) {
361 * We accept any quality of random data to avoid blocking.
363 result = isc_entropy_getdata(mgr->entropy, rnd.rnd,
364 sizeof(rnd), NULL, 0);
365 RUNTIME_CHECK(result == ISC_R_SUCCESS);
367 for (i = 0; i < 32; i++)
368 isc_random_get(&rnd.rnd32[i]);
370 dispatch_arc4addrandom(&mgr->arc4ctx, rnd.rnd, sizeof(rnd.rnd));
373 * Discard early keystream, as per recommendations in:
374 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
376 for (i = 0; i < 256; i++)
377 (void)dispatch_arc4get8(&mgr->arc4ctx);
380 * Derived from OpenBSD's implementation. The rationale is not clear,
381 * but should be conservative enough in safety, and reasonably large
384 mgr->arc4ctx.count = 1600000;
388 dispatch_arc4random(dns_dispatchmgr_t *mgr) {
391 LOCK(&mgr->arc4_lock);
392 mgr->arc4ctx.count -= sizeof(isc_uint16_t);
393 if (mgr->arc4ctx.count <= 0)
394 dispatch_arc4stir(mgr);
395 result = dispatch_arc4get16(&mgr->arc4ctx);
396 UNLOCK(&mgr->arc4_lock);
401 dispatch_arc4uniformrandom(dns_dispatchmgr_t *mgr, isc_uint16_t upper_bound) {
403 /* The caller must hold the manager lock. */
409 * Ensure the range of random numbers [min, 0xffff] be a multiple of
410 * upper_bound and contain at least a half of the 16 bit range.
413 if (upper_bound > 0x8000)
414 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
416 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
419 * This could theoretically loop forever but each retry has
420 * p > 0.5 (worst case, usually far better) of selecting a
421 * number inside the range we need, so it should rarely need
425 r = dispatch_arc4random(mgr);
430 return (r % upper_bound);
434 * Return a hash of the destination and message id.
437 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
442 ret = isc_sockaddr_hash(dest, ISC_TRUE);
443 ret ^= (id << 16) | port;
444 ret %= qid->qid_nbuckets;
446 INSIST(ret < qid->qid_nbuckets);
452 * Find the first entry in 'qid'. Returns NULL if there are no entries.
454 static dns_dispentry_t *
455 linear_first(dns_qid_t *qid) {
456 dns_dispentry_t *ret;
461 while (bucket < qid->qid_nbuckets) {
462 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
472 * Find the next entry after 'resp' in 'qid'. Return NULL if there are
475 static dns_dispentry_t *
476 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
477 dns_dispentry_t *ret;
480 ret = ISC_LIST_NEXT(resp, link);
484 bucket = resp->bucket;
486 while (bucket < qid->qid_nbuckets) {
487 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
497 * The dispatch must be locked.
500 destroy_disp_ok(dns_dispatch_t *disp)
502 if (disp->refcount != 0)
505 if (disp->recv_pending != 0)
508 if (disp->shutting_down == 0)
516 * Called when refcount reaches 0 (and safe to destroy).
518 * The dispatcher must not be locked.
519 * The manager must be locked.
522 destroy_disp(isc_task_t *task, isc_event_t *event) {
523 dns_dispatch_t *disp;
524 dns_dispatchmgr_t *mgr;
525 isc_boolean_t killmgr;
527 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
531 disp = event->ev_arg;
535 ISC_LIST_UNLINK(mgr->list, disp, link);
537 dispatch_log(disp, LVL(90),
538 "shutting down; detaching from sock %p, task %p",
539 disp->socket, disp->task);
541 isc_socket_detach(&disp->socket);
542 isc_task_detach(&disp->task);
543 isc_event_free(&event);
545 dispatch_free(&disp);
547 killmgr = destroy_mgr_ok(mgr);
555 * Find an entry for query ID 'id' and socket address 'dest' in 'qid'.
556 * Return NULL if no such entry exists.
558 static dns_dispentry_t *
559 bucket_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
560 in_port_t port, unsigned int bucket)
562 dns_dispentry_t *res;
564 REQUIRE(bucket < qid->qid_nbuckets);
566 res = ISC_LIST_HEAD(qid->qid_table[bucket]);
568 while (res != NULL) {
569 if ((res->id == id) && isc_sockaddr_equal(dest, &res->host) &&
573 res = ISC_LIST_NEXT(res, link);
580 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
581 INSIST(buf != NULL && len != 0);
584 switch (disp->socktype) {
585 case isc_sockettype_tcp:
586 INSIST(disp->tcpbuffers > 0);
588 isc_mem_put(disp->mgr->mctx, buf, len);
590 case isc_sockettype_udp:
591 LOCK(&disp->mgr->buffer_lock);
592 INSIST(disp->mgr->buffers > 0);
593 INSIST(len == disp->mgr->buffersize);
594 disp->mgr->buffers--;
595 isc_mempool_put(disp->mgr->bpool, buf);
596 UNLOCK(&disp->mgr->buffer_lock);
605 allocate_udp_buffer(dns_dispatch_t *disp) {
608 LOCK(&disp->mgr->buffer_lock);
609 temp = isc_mempool_get(disp->mgr->bpool);
612 disp->mgr->buffers++;
613 UNLOCK(&disp->mgr->buffer_lock);
619 free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
620 if (disp->failsafe_ev == ev) {
621 INSIST(disp->shutdown_out == 1);
622 disp->shutdown_out = 0;
627 isc_mempool_put(disp->mgr->epool, ev);
630 static inline dns_dispatchevent_t *
631 allocate_event(dns_dispatch_t *disp) {
632 dns_dispatchevent_t *ev;
634 ev = isc_mempool_get(disp->mgr->epool);
637 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
638 NULL, NULL, NULL, NULL, NULL);
646 * If I/O result == CANCELED or error, free the buffer.
648 * If query, free the buffer, restart.
651 * Allocate event, fill in details.
652 * If cannot allocate, free buffer, restart.
653 * find target. If not found, free buffer, restart.
654 * if event queue is not empty, queue. else, send.
658 udp_recv(isc_task_t *task, isc_event_t *ev_in) {
659 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
660 dns_dispatch_t *disp = ev_in->ev_arg;
665 dns_dispentry_t *resp;
666 dns_dispatchevent_t *rev;
668 isc_boolean_t killit;
669 isc_boolean_t queue_response;
670 dns_dispatchmgr_t *mgr;
672 isc_netaddr_t netaddr;
682 dispatch_log(disp, LVL(90),
683 "got packet: requests %d, buffers %d, recvs %d",
684 disp->requests, disp->mgr->buffers, disp->recv_pending);
686 if (ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
688 * Unless the receive event was imported from a listening
689 * interface, in which case the event type is
690 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
692 INSIST(disp->recv_pending != 0);
693 disp->recv_pending = 0;
696 if (disp->shutting_down) {
698 * This dispatcher is shutting down.
700 free_buffer(disp, ev->region.base, ev->region.length);
702 isc_event_free(&ev_in);
705 killit = destroy_disp_ok(disp);
708 isc_task_send(disp->task, &disp->ctlevent);
713 if (ev->result != ISC_R_SUCCESS) {
714 free_buffer(disp, ev->region.base, ev->region.length);
716 if (ev->result != ISC_R_CANCELED)
717 dispatch_log(disp, ISC_LOG_ERROR,
718 "odd socket result in udp_recv(): %s",
719 isc_result_totext(ev->result));
722 isc_event_free(&ev_in);
727 * If this is from a blackholed address, drop it.
729 isc_netaddr_fromsockaddr(&netaddr, &ev->address);
730 if (disp->mgr->blackhole != NULL &&
731 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
732 NULL, &match, NULL) == ISC_R_SUCCESS &&
735 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
736 char netaddrstr[ISC_NETADDR_FORMATSIZE];
737 isc_netaddr_format(&netaddr, netaddrstr,
739 dispatch_log(disp, LVL(10),
740 "blackholed packet from %s",
743 free_buffer(disp, ev->region.base, ev->region.length);
748 * Peek into the buffer to see what we can see.
750 isc_buffer_init(&source, ev->region.base, ev->region.length);
751 isc_buffer_add(&source, ev->n);
752 dres = dns_message_peekheader(&source, &id, &flags);
753 if (dres != ISC_R_SUCCESS) {
754 free_buffer(disp, ev->region.base, ev->region.length);
755 dispatch_log(disp, LVL(10), "got garbage packet");
759 dispatch_log(disp, LVL(92),
760 "got valid DNS message header, /QR %c, id %u",
761 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
764 * Look at flags. If query, drop it. If response,
765 * look to see where it goes.
767 queue_response = ISC_FALSE;
768 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
770 free_buffer(disp, ev->region.base, ev->region.length);
775 bucket = dns_hash(qid, &ev->address, id, disp->localport);
777 resp = bucket_search(qid, &ev->address, id, disp->localport, bucket);
778 dispatch_log(disp, LVL(90),
779 "search for response in bucket %d: %s",
780 bucket, (resp == NULL ? "not found" : "found"));
783 free_buffer(disp, ev->region.base, ev->region.length);
788 * Now that we have the original dispatch the query was sent
789 * from check that the address and port the response was
790 * sent to make sense.
792 if (disp != resp->disp) {
797 * Check that the socket types and ports match.
799 if (disp->socktype != resp->disp->socktype ||
800 isc_sockaddr_getport(&disp->local) !=
801 isc_sockaddr_getport(&resp->disp->local)) {
802 free_buffer(disp, ev->region.base, ev->region.length);
807 * If both dispatches are bound to an address then fail as
808 * the addresses can't be equal (enforced by the IP stack).
810 * Note under Linux a packet can be sent out via IPv4 socket
811 * and the response be received via a IPv6 socket.
813 * Requests sent out via IPv6 should always come back in
816 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
817 isc_sockaddr_pf(&disp->local) != PF_INET6) {
818 free_buffer(disp, ev->region.base, ev->region.length);
821 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
822 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
823 if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
824 !isc_sockaddr_eqaddr(&a2, &disp->local)) {
825 free_buffer(disp, ev->region.base, ev->region.length);
830 queue_response = resp->item_out;
831 rev = allocate_event(resp->disp);
833 free_buffer(disp, ev->region.base, ev->region.length);
838 * At this point, rev contains the event we want to fill in, and
839 * resp contains the information on the place to send it to.
840 * Send the event off.
842 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
843 isc_buffer_add(&rev->buffer, ev->n);
844 rev->result = ISC_R_SUCCESS;
846 rev->addr = ev->address;
847 rev->pktinfo = ev->pktinfo;
848 rev->attributes = ev->attributes;
849 if (queue_response) {
850 ISC_LIST_APPEND(resp->items, rev, ev_link);
852 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
854 resp->action, resp->arg, resp, NULL, NULL);
855 request_log(disp, resp, LVL(90),
856 "[a] Sent event %p buffer %p len %d to task %p",
857 rev, rev->buffer.base, rev->buffer.length,
859 resp->item_out = ISC_TRUE;
860 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
866 * Restart recv() to get the next packet.
873 isc_event_free(&ev_in);
879 * If I/O result == CANCELED, EOF, or error, notify everyone as the
880 * various queues drain.
885 * Allocate event, fill in details.
886 * If cannot allocate, restart.
887 * find target. If not found, restart.
888 * if event queue is not empty, queue. else, send.
892 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
893 dns_dispatch_t *disp = ev_in->ev_arg;
894 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
898 dns_dispentry_t *resp;
899 dns_dispatchevent_t *rev;
901 isc_boolean_t killit;
902 isc_boolean_t queue_response;
905 char buf[ISC_SOCKADDR_FORMATSIZE];
909 REQUIRE(VALID_DISPATCH(disp));
913 dispatch_log(disp, LVL(90),
914 "got TCP packet: requests %d, buffers %d, recvs %d",
915 disp->requests, disp->tcpbuffers, disp->recv_pending);
919 INSIST(disp->recv_pending != 0);
920 disp->recv_pending = 0;
922 if (disp->refcount == 0) {
924 * This dispatcher is shutting down. Force cancelation.
926 tcpmsg->result = ISC_R_CANCELED;
929 if (tcpmsg->result != ISC_R_SUCCESS) {
930 switch (tcpmsg->result) {
935 dispatch_log(disp, LVL(90), "shutting down on EOF");
939 case ISC_R_CONNECTIONRESET:
940 level = ISC_LOG_INFO;
944 level = ISC_LOG_ERROR;
946 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
947 dispatch_log(disp, level, "shutting down due to TCP "
948 "receive error: %s: %s", buf,
949 isc_result_totext(tcpmsg->result));
955 * The event is statically allocated in the tcpmsg
956 * structure, and destroy_disp() frees the tcpmsg, so we must
957 * free the event *before* calling destroy_disp().
959 isc_event_free(&ev_in);
961 disp->shutting_down = 1;
962 disp->shutdown_why = tcpmsg->result;
965 * If the recv() was canceled pass the word on.
967 killit = destroy_disp_ok(disp);
970 isc_task_send(disp->task, &disp->ctlevent);
974 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
976 tcpmsg->buffer.length, tcpmsg->buffer.base);
979 * Peek into the buffer to see what we can see.
981 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
982 if (dres != ISC_R_SUCCESS) {
983 dispatch_log(disp, LVL(10), "got garbage packet");
987 dispatch_log(disp, LVL(92),
988 "got valid DNS message header, /QR %c, id %u",
989 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
992 * Allocate an event to send to the query or response client, and
993 * allocate a new buffer for our use.
997 * Look at flags. If query, drop it. If response,
998 * look to see where it goes.
1000 queue_response = ISC_FALSE;
1001 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1011 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1013 resp = bucket_search(qid, &tcpmsg->address, id, disp->localport,
1015 dispatch_log(disp, LVL(90),
1016 "search for response in bucket %d: %s",
1017 bucket, (resp == NULL ? "not found" : "found"));
1021 queue_response = resp->item_out;
1022 rev = allocate_event(disp);
1027 * At this point, rev contains the event we want to fill in, and
1028 * resp contains the information on the place to send it to.
1029 * Send the event off.
1031 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1033 rev->result = ISC_R_SUCCESS;
1035 rev->addr = tcpmsg->address;
1036 if (queue_response) {
1037 ISC_LIST_APPEND(resp->items, rev, ev_link);
1039 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1040 resp->action, resp->arg, resp, NULL, NULL);
1041 request_log(disp, resp, LVL(90),
1042 "[b] Sent event %p buffer %p len %d to task %p",
1043 rev, rev->buffer.base, rev->buffer.length,
1045 resp->item_out = ISC_TRUE;
1046 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1052 * Restart recv() to get the next packet.
1057 UNLOCK(&disp->lock);
1059 isc_event_free(&ev_in);
1063 * disp must be locked.
1066 startrecv(dns_dispatch_t *disp) {
1068 isc_region_t region;
1070 if (disp->shutting_down == 1)
1073 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1076 if (disp->recv_pending != 0)
1079 if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1082 switch (disp->socktype) {
1084 * UDP reads are always maximal.
1086 case isc_sockettype_udp:
1087 region.length = disp->mgr->buffersize;
1088 region.base = allocate_udp_buffer(disp);
1089 if (region.base == NULL)
1091 res = isc_socket_recv(disp->socket, ®ion, 1,
1092 disp->task, udp_recv, disp);
1093 if (res != ISC_R_SUCCESS) {
1094 free_buffer(disp, region.base, region.length);
1095 disp->shutdown_why = res;
1096 disp->shutting_down = 1;
1100 INSIST(disp->recv_pending == 0);
1101 disp->recv_pending = 1;
1104 case isc_sockettype_tcp:
1105 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task,
1107 if (res != ISC_R_SUCCESS) {
1108 disp->shutdown_why = res;
1109 disp->shutting_down = 1;
1113 INSIST(disp->recv_pending == 0);
1114 disp->recv_pending = 1;
1123 * Mgr must be locked when calling this function.
1125 static isc_boolean_t
1126 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1127 mgr_log(mgr, LVL(90),
1128 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1129 "epool=%d, rpool=%d, dpool=%d",
1130 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1131 isc_mempool_getallocated(mgr->epool),
1132 isc_mempool_getallocated(mgr->rpool),
1133 isc_mempool_getallocated(mgr->dpool));
1134 if (!MGR_IS_SHUTTINGDOWN(mgr))
1136 if (!ISC_LIST_EMPTY(mgr->list))
1138 if (isc_mempool_getallocated(mgr->epool) != 0)
1140 if (isc_mempool_getallocated(mgr->rpool) != 0)
1142 if (isc_mempool_getallocated(mgr->dpool) != 0)
1149 * Mgr must be unlocked when calling this function.
1152 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1154 dns_dispatchmgr_t *mgr;
1163 DESTROYLOCK(&mgr->lock);
1166 DESTROYLOCK(&mgr->arc4_lock);
1168 isc_mempool_destroy(&mgr->epool);
1169 isc_mempool_destroy(&mgr->rpool);
1170 isc_mempool_destroy(&mgr->dpool);
1171 isc_mempool_destroy(&mgr->bpool);
1173 DESTROYLOCK(&mgr->pool_lock);
1175 if (mgr->entropy != NULL)
1176 isc_entropy_detach(&mgr->entropy);
1177 if (mgr->qid != NULL)
1178 qid_destroy(mctx, &mgr->qid);
1180 DESTROYLOCK(&mgr->buffer_lock);
1182 if (mgr->blackhole != NULL)
1183 dns_acl_detach(&mgr->blackhole);
1185 if (mgr->portlist != NULL)
1186 dns_portlist_detach(&mgr->portlist);
1188 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1189 isc_mem_detach(&mctx);
1193 create_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1194 unsigned int options, isc_socket_t **sockp)
1197 isc_result_t result;
1200 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1201 isc_sockettype_udp, &sock);
1202 if (result != ISC_R_SUCCESS)
1205 #ifndef ISC_ALLOW_MAPPED
1206 isc_socket_ipv6only(sock, ISC_TRUE);
1208 result = isc_socket_bind(sock, local, options);
1209 if (result != ISC_R_SUCCESS) {
1210 isc_socket_detach(&sock);
1215 return (ISC_R_SUCCESS);
1223 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1224 dns_dispatchmgr_t **mgrp)
1226 dns_dispatchmgr_t *mgr;
1227 isc_result_t result;
1229 REQUIRE(mctx != NULL);
1230 REQUIRE(mgrp != NULL && *mgrp == NULL);
1232 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1234 return (ISC_R_NOMEMORY);
1237 isc_mem_attach(mctx, &mgr->mctx);
1239 mgr->blackhole = NULL;
1240 mgr->portlist = NULL;
1242 result = isc_mutex_init(&mgr->lock);
1243 if (result != ISC_R_SUCCESS)
1246 result = isc_mutex_init(&mgr->arc4_lock);
1247 if (result != ISC_R_SUCCESS)
1250 result = isc_mutex_init(&mgr->buffer_lock);
1251 if (result != ISC_R_SUCCESS)
1252 goto kill_arc4_lock;
1254 result = isc_mutex_init(&mgr->pool_lock);
1255 if (result != ISC_R_SUCCESS)
1256 goto kill_buffer_lock;
1259 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1260 &mgr->epool) != ISC_R_SUCCESS) {
1261 result = ISC_R_NOMEMORY;
1262 goto kill_pool_lock;
1266 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1267 &mgr->rpool) != ISC_R_SUCCESS) {
1268 result = ISC_R_NOMEMORY;
1273 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1274 &mgr->dpool) != ISC_R_SUCCESS) {
1275 result = ISC_R_NOMEMORY;
1279 isc_mempool_setname(mgr->epool, "dispmgr_epool");
1280 isc_mempool_setfreemax(mgr->epool, 1024);
1281 isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
1283 isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1284 isc_mempool_setfreemax(mgr->rpool, 1024);
1285 isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
1287 isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1288 isc_mempool_setfreemax(mgr->dpool, 1024);
1289 isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
1292 mgr->buffersize = 0;
1293 mgr->maxbuffers = 0;
1295 mgr->entropy = NULL;
1298 ISC_LIST_INIT(mgr->list);
1299 mgr->magic = DNS_DISPATCHMGR_MAGIC;
1301 if (entropy != NULL)
1302 isc_entropy_attach(entropy, &mgr->entropy);
1304 dispatch_arc4init(&mgr->arc4ctx);
1307 return (ISC_R_SUCCESS);
1310 isc_mempool_destroy(&mgr->rpool);
1312 isc_mempool_destroy(&mgr->epool);
1314 DESTROYLOCK(&mgr->pool_lock);
1316 DESTROYLOCK(&mgr->buffer_lock);
1318 DESTROYLOCK(&mgr->arc4_lock);
1320 DESTROYLOCK(&mgr->lock);
1322 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1323 isc_mem_detach(&mctx);
1329 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1330 REQUIRE(VALID_DISPATCHMGR(mgr));
1331 if (mgr->blackhole != NULL)
1332 dns_acl_detach(&mgr->blackhole);
1333 dns_acl_attach(blackhole, &mgr->blackhole);
1337 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
1338 REQUIRE(VALID_DISPATCHMGR(mgr));
1339 return (mgr->blackhole);
1343 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
1344 dns_portlist_t *portlist)
1346 REQUIRE(VALID_DISPATCHMGR(mgr));
1347 if (mgr->portlist != NULL)
1348 dns_portlist_detach(&mgr->portlist);
1349 if (portlist != NULL)
1350 dns_portlist_attach(portlist, &mgr->portlist);
1354 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
1355 REQUIRE(VALID_DISPATCHMGR(mgr));
1356 return (mgr->portlist);
1360 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
1361 unsigned int buffersize, unsigned int maxbuffers,
1362 unsigned int buckets, unsigned int increment)
1364 isc_result_t result;
1366 REQUIRE(VALID_DISPATCHMGR(mgr));
1367 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1368 REQUIRE(maxbuffers > 0);
1369 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
1370 REQUIRE(increment > buckets);
1373 * Keep some number of items around. This should be a config
1374 * option. For now, keep 8, but later keep at least two even
1375 * if the caller wants less. This allows us to ensure certain
1376 * things, like an event can be "freed" and the next allocation
1377 * will always succeed.
1379 * Note that if limits are placed on anything here, we use one
1380 * event internally, so the actual limit should be "wanted + 1."
1388 LOCK(&mgr->buffer_lock);
1389 if (mgr->bpool != NULL) {
1390 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1391 mgr->maxbuffers = maxbuffers;
1392 UNLOCK(&mgr->buffer_lock);
1393 return (ISC_R_SUCCESS);
1396 if (isc_mempool_create(mgr->mctx, buffersize,
1397 &mgr->bpool) != ISC_R_SUCCESS) {
1398 UNLOCK(&mgr->buffer_lock);
1399 return (ISC_R_NOMEMORY);
1402 isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
1403 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1404 isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
1406 result = qid_allocate(mgr, buckets, increment, &mgr->qid);
1407 if (result != ISC_R_SUCCESS)
1410 mgr->buffersize = buffersize;
1411 mgr->maxbuffers = maxbuffers;
1412 UNLOCK(&mgr->buffer_lock);
1413 return (ISC_R_SUCCESS);
1416 isc_mempool_destroy(&mgr->bpool);
1417 UNLOCK(&mgr->buffer_lock);
1418 return (ISC_R_NOMEMORY);
1422 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
1423 dns_dispatchmgr_t *mgr;
1424 isc_boolean_t killit;
1426 REQUIRE(mgrp != NULL);
1427 REQUIRE(VALID_DISPATCHMGR(*mgrp));
1433 mgr->state |= MGR_SHUTTINGDOWN;
1435 killit = destroy_mgr_ok(mgr);
1438 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
1444 static isc_boolean_t
1445 blacklisted(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
1446 isc_sockaddr_t *sockaddrp)
1448 isc_sockaddr_t sockaddr;
1449 isc_result_t result;
1451 REQUIRE(sock != NULL || sockaddrp != NULL);
1453 if (mgr->portlist == NULL)
1457 sockaddrp = &sockaddr;
1458 result = isc_socket_getsockname(sock, sockaddrp);
1459 if (result != ISC_R_SUCCESS)
1463 if (mgr->portlist != NULL &&
1464 dns_portlist_match(mgr->portlist, isc_sockaddr_pf(sockaddrp),
1465 isc_sockaddr_getport(sockaddrp)))
1470 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
1472 static isc_boolean_t
1473 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
1474 isc_sockaddr_t sockaddr;
1475 isc_result_t result;
1481 * Don't match wildcard ports against newly blacklisted ports.
1483 if (disp->mgr->portlist != NULL &&
1484 isc_sockaddr_getport(addr) == 0 &&
1485 isc_sockaddr_getport(&disp->local) == 0 &&
1486 blacklisted(disp->mgr, disp->socket, NULL))
1490 * Check if we match the binding <address,port>.
1491 * Wildcard ports match/fail here.
1493 if (isc_sockaddr_equal(&disp->local, addr))
1495 if (isc_sockaddr_getport(addr) == 0)
1499 * Check if we match a bound wildcard port <address,port>.
1501 if (!isc_sockaddr_eqaddr(&disp->local, addr))
1503 result = isc_socket_getsockname(disp->socket, &sockaddr);
1504 if (result != ISC_R_SUCCESS)
1507 return (isc_sockaddr_equal(&sockaddr, addr));
1511 * Requires mgr be locked.
1513 * No dispatcher can be locked by this thread when calling this function.
1517 * If a matching dispatcher is found, it is locked after this function
1518 * returns, and must be unlocked by the caller.
1521 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
1522 unsigned int attributes, unsigned int mask,
1523 dns_dispatch_t **dispp)
1525 dns_dispatch_t *disp;
1526 isc_result_t result;
1529 * Make certain that we will not match a private dispatch.
1531 attributes &= ~DNS_DISPATCHATTR_PRIVATE;
1532 mask |= DNS_DISPATCHATTR_PRIVATE;
1534 disp = ISC_LIST_HEAD(mgr->list);
1535 while (disp != NULL) {
1537 if ((disp->shutting_down == 0)
1538 && ATTRMATCH(disp->attributes, attributes, mask)
1539 && local_addr_match(disp, local))
1541 UNLOCK(&disp->lock);
1542 disp = ISC_LIST_NEXT(disp, link);
1546 result = ISC_R_NOTFOUND;
1551 result = ISC_R_SUCCESS;
1558 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
1559 unsigned int increment, dns_qid_t **qidp)
1563 isc_result_t result;
1565 REQUIRE(VALID_DISPATCHMGR(mgr));
1566 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
1567 REQUIRE(increment > buckets);
1568 REQUIRE(qidp != NULL && *qidp == NULL);
1570 qid = isc_mem_get(mgr->mctx, sizeof(*qid));
1572 return (ISC_R_NOMEMORY);
1574 qid->qid_table = isc_mem_get(mgr->mctx,
1575 buckets * sizeof(dns_displist_t));
1576 if (qid->qid_table == NULL) {
1577 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
1578 return (ISC_R_NOMEMORY);
1581 result = isc_mutex_init(&qid->lock);
1582 if (result != ISC_R_SUCCESS) {
1583 isc_mem_put(mgr->mctx, qid->qid_table,
1584 buckets * sizeof(dns_displist_t));
1585 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
1589 for (i = 0; i < buckets; i++)
1590 ISC_LIST_INIT(qid->qid_table[i]);
1592 qid->qid_nbuckets = buckets;
1593 qid->qid_increment = increment;
1594 qid->magic = QID_MAGIC;
1596 return (ISC_R_SUCCESS);
1600 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
1603 REQUIRE(qidp != NULL);
1606 REQUIRE(VALID_QID(qid));
1610 isc_mem_put(mctx, qid->qid_table,
1611 qid->qid_nbuckets * sizeof(dns_displist_t));
1612 DESTROYLOCK(&qid->lock);
1613 isc_mem_put(mctx, qid, sizeof(*qid));
1617 * Allocate and set important limits.
1620 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
1621 dns_dispatch_t **dispp)
1623 dns_dispatch_t *disp;
1624 isc_result_t result;
1626 REQUIRE(VALID_DISPATCHMGR(mgr));
1627 REQUIRE(dispp != NULL && *dispp == NULL);
1630 * Set up the dispatcher, mostly. Don't bother setting some of
1631 * the options that are controlled by tcp vs. udp, etc.
1634 disp = isc_mempool_get(mgr->dpool);
1636 return (ISC_R_NOMEMORY);
1640 disp->maxrequests = maxrequests;
1641 disp->attributes = 0;
1642 ISC_LINK_INIT(disp, link);
1644 disp->recv_pending = 0;
1645 memset(&disp->local, 0, sizeof(disp->local));
1646 disp->localport = 0;
1647 disp->shutting_down = 0;
1648 disp->shutdown_out = 0;
1649 disp->connected = 0;
1650 disp->tcpmsg_valid = 0;
1651 disp->shutdown_why = ISC_R_UNEXPECTED;
1653 disp->tcpbuffers = 0;
1656 result = isc_mutex_init(&disp->lock);
1657 if (result != ISC_R_SUCCESS)
1660 disp->failsafe_ev = allocate_event(disp);
1661 if (disp->failsafe_ev == NULL) {
1662 result = ISC_R_NOMEMORY;
1666 disp->magic = DISPATCH_MAGIC;
1669 return (ISC_R_SUCCESS);
1675 DESTROYLOCK(&disp->lock);
1677 isc_mempool_put(mgr->dpool, disp);
1684 * MUST be unlocked, and not used by anthing.
1687 dispatch_free(dns_dispatch_t **dispp)
1689 dns_dispatch_t *disp;
1690 dns_dispatchmgr_t *mgr;
1692 REQUIRE(VALID_DISPATCH(*dispp));
1697 REQUIRE(VALID_DISPATCHMGR(mgr));
1699 if (disp->tcpmsg_valid) {
1700 dns_tcpmsg_invalidate(&disp->tcpmsg);
1701 disp->tcpmsg_valid = 0;
1704 INSIST(disp->tcpbuffers == 0);
1705 INSIST(disp->requests == 0);
1706 INSIST(disp->recv_pending == 0);
1708 isc_mempool_put(mgr->epool, disp->failsafe_ev);
1709 disp->failsafe_ev = NULL;
1711 if (disp->qid != NULL)
1712 qid_destroy(mgr->mctx, &disp->qid);
1714 DESTROYLOCK(&disp->lock);
1716 isc_mempool_put(mgr->dpool, disp);
1720 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
1721 isc_taskmgr_t *taskmgr, unsigned int buffersize,
1722 unsigned int maxbuffers, unsigned int maxrequests,
1723 unsigned int buckets, unsigned int increment,
1724 unsigned int attributes, dns_dispatch_t **dispp)
1726 isc_result_t result;
1727 dns_dispatch_t *disp;
1732 REQUIRE(VALID_DISPATCHMGR(mgr));
1733 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
1734 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
1735 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
1737 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */
1742 * dispatch_allocate() checks mgr for us.
1743 * qid_allocate() checks buckets and increment for us.
1746 result = dispatch_allocate(mgr, maxrequests, &disp);
1747 if (result != ISC_R_SUCCESS) {
1752 result = qid_allocate(mgr, buckets, increment, &disp->qid);
1753 if (result != ISC_R_SUCCESS)
1754 goto deallocate_dispatch;
1756 disp->socktype = isc_sockettype_tcp;
1757 disp->socket = NULL;
1758 isc_socket_attach(sock, &disp->socket);
1761 result = isc_task_create(taskmgr, 0, &disp->task);
1762 if (result != ISC_R_SUCCESS)
1765 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
1766 DNS_EVENT_DISPATCHCONTROL,
1768 sizeof(isc_event_t));
1769 if (disp->ctlevent == NULL) {
1770 result = ISC_R_NOMEMORY;
1774 isc_task_setname(disp->task, "tcpdispatch", disp);
1776 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
1777 disp->tcpmsg_valid = 1;
1779 disp->attributes = attributes;
1782 * Append it to the dispatcher list.
1784 ISC_LIST_APPEND(mgr->list, disp, link);
1787 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
1788 dispatch_log(disp, LVL(90), "created task %p", disp->task);
1792 return (ISC_R_SUCCESS);
1798 isc_task_detach(&disp->task);
1800 isc_socket_detach(&disp->socket);
1801 deallocate_dispatch:
1802 dispatch_free(&disp);
1810 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
1811 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
1812 unsigned int buffersize,
1813 unsigned int maxbuffers, unsigned int maxrequests,
1814 unsigned int buckets, unsigned int increment,
1815 unsigned int attributes, unsigned int mask,
1816 dns_dispatch_t **dispp)
1818 isc_result_t result;
1819 dns_dispatch_t *disp = NULL;
1821 REQUIRE(VALID_DISPATCHMGR(mgr));
1822 REQUIRE(sockmgr != NULL);
1823 REQUIRE(localaddr != NULL);
1824 REQUIRE(taskmgr != NULL);
1825 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1826 REQUIRE(maxbuffers > 0);
1827 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
1828 REQUIRE(increment > buckets);
1829 REQUIRE(dispp != NULL && *dispp == NULL);
1830 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
1832 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
1833 buckets, increment);
1834 if (result != ISC_R_SUCCESS)
1839 if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) {
1840 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
1845 * First, see if we have a dispatcher that matches.
1848 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
1849 if (result == ISC_R_SUCCESS) {
1852 if (disp->maxrequests < maxrequests)
1853 disp->maxrequests = maxrequests;
1855 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
1856 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1858 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
1859 if (disp->recv_pending != 0)
1860 isc_socket_cancel(disp->socket, disp->task,
1861 ISC_SOCKCANCEL_RECV);
1864 UNLOCK(&disp->lock);
1869 return (ISC_R_SUCCESS);
1876 result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
1877 maxrequests, attributes, &disp);
1878 if (result != ISC_R_SUCCESS) {
1885 return (ISC_R_SUCCESS);
1889 * mgr should be locked.
1892 #ifndef DNS_DISPATCH_HELD
1893 #define DNS_DISPATCH_HELD 20U
1897 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
1898 isc_taskmgr_t *taskmgr,
1899 isc_sockaddr_t *localaddr,
1900 unsigned int maxrequests,
1901 unsigned int attributes,
1902 dns_dispatch_t **dispp)
1904 isc_result_t result;
1905 dns_dispatch_t *disp;
1906 isc_socket_t *sock = NULL;
1907 isc_socket_t *held[DNS_DISPATCH_HELD];
1908 unsigned int i = 0, j = 0, k = 0;
1909 isc_sockaddr_t localaddr_bound;
1910 in_port_t localport = 0;
1913 * dispatch_allocate() checks mgr for us.
1916 result = dispatch_allocate(mgr, maxrequests, &disp);
1917 if (result != ISC_R_SUCCESS)
1921 * Try to allocate a socket that is not on the blacklist.
1922 * Hold up to DNS_DISPATCH_HELD sockets to prevent the OS
1923 * from returning the same port to us too quickly.
1925 memset(held, 0, sizeof(held));
1926 localaddr_bound = *localaddr;
1928 if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) {
1931 /* XXX: should the range be configurable? */
1932 prt = 1024 + dispatch_arc4uniformrandom(mgr, 65535 - 1023);
1933 isc_sockaddr_setport(&localaddr_bound, prt);
1934 if (blacklisted(mgr, NULL, &localaddr_bound)) {
1936 attributes &= ~DNS_DISPATCHATTR_RANDOMPORT;
1939 result = create_socket(sockmgr, &localaddr_bound, 0, &sock);
1940 if (result == ISC_R_ADDRINUSE) {
1942 attributes &= ~DNS_DISPATCHATTR_RANDOMPORT;
1947 result = create_socket(sockmgr, localaddr,
1948 ISC_SOCKET_REUSEADDRESS, &sock);
1949 if (result != ISC_R_SUCCESS)
1950 goto deallocate_dispatch;
1951 if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) == 0 &&
1952 isc_sockaddr_getport(localaddr) == 0 &&
1953 blacklisted(mgr, sock, NULL))
1955 if (held[i] != NULL)
1956 isc_socket_detach(&held[i]);
1959 if (i == DNS_DISPATCH_HELD)
1961 if (j++ == 0xffffU) {
1962 mgr_log(mgr, ISC_LOG_ERROR, "avoid-v%s-udp-ports: "
1963 "unable to allocate a non-blacklisted port",
1964 isc_sockaddr_pf(localaddr) == AF_INET ?
1966 result = ISC_R_FAILURE;
1967 goto deallocate_dispatch;
1972 disp->socktype = isc_sockettype_udp;
1973 disp->socket = sock;
1974 disp->local = *localaddr;
1975 disp->localport = localport;
1978 result = isc_task_create(taskmgr, 0, &disp->task);
1979 if (result != ISC_R_SUCCESS)
1982 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
1983 DNS_EVENT_DISPATCHCONTROL,
1985 sizeof(isc_event_t));
1986 if (disp->ctlevent == NULL) {
1987 result = ISC_R_NOMEMORY;
1991 isc_task_setname(disp->task, "udpdispatch", disp);
1993 attributes &= ~DNS_DISPATCHATTR_TCP;
1994 attributes |= DNS_DISPATCHATTR_UDP;
1995 disp->attributes = attributes;
1998 * Append it to the dispatcher list.
2000 ISC_LIST_APPEND(mgr->list, disp, link);
2002 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
2003 dispatch_log(disp, LVL(90), "created task %p", disp->task);
2004 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
2014 isc_task_detach(&disp->task);
2016 isc_socket_detach(&disp->socket);
2017 deallocate_dispatch:
2018 dispatch_free(&disp);
2020 for (i = 0; i < DNS_DISPATCH_HELD; i++)
2021 if (held[i] != NULL)
2022 isc_socket_detach(&held[i]);
2027 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
2028 REQUIRE(VALID_DISPATCH(disp));
2029 REQUIRE(dispp != NULL && *dispp == NULL);
2033 UNLOCK(&disp->lock);
2039 * It is important to lock the manager while we are deleting the dispatch,
2040 * since dns_dispatch_getudp will call dispatch_find, which returns to
2041 * the caller a dispatch but does not attach to it until later. _getudp
2042 * locks the manager, however, so locking it here will keep us from attaching
2043 * to a dispatcher that is in the process of going away.
2046 dns_dispatch_detach(dns_dispatch_t **dispp) {
2047 dns_dispatch_t *disp;
2048 isc_boolean_t killit;
2050 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2057 INSIST(disp->refcount > 0);
2060 if (disp->refcount == 0) {
2061 if (disp->recv_pending > 0)
2062 isc_socket_cancel(disp->socket, disp->task,
2063 ISC_SOCKCANCEL_RECV);
2064 disp->shutting_down = 1;
2067 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
2069 killit = destroy_disp_ok(disp);
2070 UNLOCK(&disp->lock);
2072 isc_task_send(disp->task, &disp->ctlevent);
2076 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
2077 isc_task_t *task, isc_taskaction_t action, void *arg,
2078 dns_messageid_t *idp, dns_dispentry_t **resp)
2080 dns_dispentry_t *res;
2081 unsigned int bucket;
2087 REQUIRE(VALID_DISPATCH(disp));
2088 REQUIRE(task != NULL);
2089 REQUIRE(dest != NULL);
2090 REQUIRE(resp != NULL && *resp == NULL);
2091 REQUIRE(idp != NULL);
2095 if (disp->shutting_down == 1) {
2096 UNLOCK(&disp->lock);
2097 return (ISC_R_SHUTTINGDOWN);
2100 if (disp->requests >= disp->maxrequests) {
2101 UNLOCK(&disp->lock);
2102 return (ISC_R_QUOTA);
2106 * Try somewhat hard to find an unique ID.
2108 id = (dns_messageid_t)dispatch_arc4random(disp->mgr);
2109 qid = DNS_QID(disp);
2111 bucket = dns_hash(qid, dest, id, disp->localport);
2113 for (i = 0; i < 64; i++) {
2114 if (bucket_search(qid, dest, id, disp->localport, bucket) ==
2119 id += qid->qid_increment;
2121 bucket = dns_hash(qid, dest, id, disp->localport);
2126 UNLOCK(&disp->lock);
2127 return (ISC_R_NOMORE);
2130 res = isc_mempool_get(disp->mgr->rpool);
2133 UNLOCK(&disp->lock);
2134 return (ISC_R_NOMEMORY);
2140 isc_task_attach(task, &res->task);
2143 res->port = disp->localport;
2144 res->bucket = bucket;
2146 res->action = action;
2148 res->item_out = ISC_FALSE;
2149 ISC_LIST_INIT(res->items);
2150 ISC_LINK_INIT(res, link);
2151 res->magic = RESPONSE_MAGIC;
2152 ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
2155 request_log(disp, res, LVL(90),
2156 "attached to task %p", res->task);
2158 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
2159 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0))
2162 UNLOCK(&disp->lock);
2167 return (ISC_R_SUCCESS);
2171 dns_dispatch_starttcp(dns_dispatch_t *disp) {
2173 REQUIRE(VALID_DISPATCH(disp));
2175 dispatch_log(disp, LVL(90), "starttcp %p", disp->task);
2178 disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
2180 UNLOCK(&disp->lock);
2184 dns_dispatch_removeresponse(dns_dispentry_t **resp,
2185 dns_dispatchevent_t **sockevent)
2187 dns_dispatchmgr_t *mgr;
2188 dns_dispatch_t *disp;
2189 dns_dispentry_t *res;
2190 dns_dispatchevent_t *ev;
2191 unsigned int bucket;
2192 isc_boolean_t killit;
2194 isc_eventlist_t events;
2197 REQUIRE(resp != NULL);
2198 REQUIRE(VALID_RESPONSE(*resp));
2204 REQUIRE(VALID_DISPATCH(disp));
2206 REQUIRE(VALID_DISPATCHMGR(mgr));
2208 qid = DNS_QID(disp);
2210 if (sockevent != NULL) {
2211 REQUIRE(*sockevent != NULL);
2220 INSIST(disp->requests > 0);
2222 INSIST(disp->refcount > 0);
2225 if (disp->refcount == 0) {
2226 if (disp->recv_pending > 0)
2227 isc_socket_cancel(disp->socket, disp->task,
2228 ISC_SOCKCANCEL_RECV);
2229 disp->shutting_down = 1;
2232 bucket = res->bucket;
2235 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
2238 if (ev == NULL && res->item_out) {
2240 * We've posted our event, but the caller hasn't gotten it
2241 * yet. Take it back.
2243 ISC_LIST_INIT(events);
2244 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
2247 * We had better have gotten it back.
2250 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
2254 REQUIRE(res->item_out == ISC_TRUE);
2255 res->item_out = ISC_FALSE;
2256 if (ev->buffer.base != NULL)
2257 free_buffer(disp, ev->buffer.base, ev->buffer.length);
2258 free_event(disp, ev);
2261 request_log(disp, res, LVL(90), "detaching from task %p", res->task);
2262 isc_task_detach(&res->task);
2265 * Free any buffered requests as well
2267 ev = ISC_LIST_HEAD(res->items);
2268 while (ev != NULL) {
2269 ISC_LIST_UNLINK(res->items, ev, ev_link);
2270 if (ev->buffer.base != NULL)
2271 free_buffer(disp, ev->buffer.base, ev->buffer.length);
2272 free_event(disp, ev);
2273 ev = ISC_LIST_HEAD(res->items);
2276 isc_mempool_put(disp->mgr->rpool, res);
2277 if (disp->shutting_down == 1)
2282 killit = destroy_disp_ok(disp);
2283 UNLOCK(&disp->lock);
2285 isc_task_send(disp->task, &disp->ctlevent);
2289 do_cancel(dns_dispatch_t *disp) {
2290 dns_dispatchevent_t *ev;
2291 dns_dispentry_t *resp;
2294 if (disp->shutdown_out == 1)
2297 qid = DNS_QID(disp);
2300 * Search for the first response handler without packets outstanding.
2303 for (resp = linear_first(qid);
2304 resp != NULL && resp->item_out != ISC_FALSE;
2306 resp = linear_next(qid, resp);
2308 * No one to send the cancel event to, so nothing to do.
2314 * Send the shutdown failsafe event to this resp.
2316 ev = disp->failsafe_ev;
2317 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
2318 resp->action, resp->arg, resp, NULL, NULL);
2319 ev->result = disp->shutdown_why;
2320 ev->buffer.base = NULL;
2321 ev->buffer.length = 0;
2322 disp->shutdown_out = 1;
2323 request_log(disp, resp, LVL(10),
2324 "cancel: failsafe event %p -> task %p",
2326 resp->item_out = ISC_TRUE;
2327 isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
2333 dns_dispatch_getsocket(dns_dispatch_t *disp) {
2334 REQUIRE(VALID_DISPATCH(disp));
2336 return (disp->socket);
2340 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
2342 REQUIRE(VALID_DISPATCH(disp));
2343 REQUIRE(addrp != NULL);
2345 if (disp->socktype == isc_sockettype_udp) {
2346 *addrp = disp->local;
2347 return (ISC_R_SUCCESS);
2349 return (ISC_R_NOTIMPLEMENTED);
2353 dns_dispatch_cancel(dns_dispatch_t *disp) {
2354 REQUIRE(VALID_DISPATCH(disp));
2358 if (disp->shutting_down == 1) {
2359 UNLOCK(&disp->lock);
2363 disp->shutdown_why = ISC_R_CANCELED;
2364 disp->shutting_down = 1;
2367 UNLOCK(&disp->lock);
2373 dns_dispatch_changeattributes(dns_dispatch_t *disp,
2374 unsigned int attributes, unsigned int mask)
2376 REQUIRE(VALID_DISPATCH(disp));
2379 * Should check for valid attributes here!
2384 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
2385 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
2386 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
2387 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
2389 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
2391 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
2392 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2393 if (disp->recv_pending != 0)
2394 isc_socket_cancel(disp->socket, disp->task,
2395 ISC_SOCKCANCEL_RECV);
2399 disp->attributes &= ~mask;
2400 disp->attributes |= (attributes & mask);
2401 UNLOCK(&disp->lock);
2405 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
2407 isc_socketevent_t *sevent, *newsevent;
2409 REQUIRE(VALID_DISPATCH(disp));
2410 REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
2411 REQUIRE(event != NULL);
2413 sevent = (isc_socketevent_t *)event;
2415 INSIST(sevent->n <= disp->mgr->buffersize);
2416 newsevent = (isc_socketevent_t *)
2417 isc_event_allocate(disp->mgr->mctx, NULL,
2418 DNS_EVENT_IMPORTRECVDONE, udp_recv,
2419 disp, sizeof(isc_socketevent_t));
2420 if (newsevent == NULL)
2423 buf = allocate_udp_buffer(disp);
2425 isc_event_free(ISC_EVENT_PTR(&newsevent));
2428 memcpy(buf, sevent->region.base, sevent->n);
2429 newsevent->region.base = buf;
2430 newsevent->region.length = disp->mgr->buffersize;
2431 newsevent->n = sevent->n;
2432 newsevent->result = sevent->result;
2433 newsevent->address = sevent->address;
2434 newsevent->timestamp = sevent->timestamp;
2435 newsevent->pktinfo = sevent->pktinfo;
2436 newsevent->attributes = sevent->attributes;
2438 isc_task_send(disp->task, ISC_EVENT_PTR(&newsevent));
2443 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
2444 dns_dispatch_t *disp;
2447 disp = ISC_LIST_HEAD(mgr->list);
2448 while (disp != NULL) {
2449 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
2450 printf("\tdispatch %p, addr %s\n", disp, foo);
2451 disp = ISC_LIST_NEXT(disp, link);