]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/bind9/lib/dns/dispatch.c
add -n option to suppress clearing the build tree and add -DNO_CLEAN
[FreeBSD/FreeBSD.git] / contrib / bind9 / lib / dns / dispatch.c
1 /*
2  * Copyright (C) 2004-2008  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: dispatch.c,v 1.116.18.19.12.5 2008/07/23 23:16:43 marka Exp $ */
19
20 /*! \file */
21
22 #include <config.h>
23
24 #include <stdlib.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27
28 #include <isc/entropy.h>
29 #include <isc/mem.h>
30 #include <isc/mutex.h>
31 #include <isc/print.h>
32 #include <isc/random.h>
33 #include <isc/string.h>
34 #include <isc/task.h>
35 #include <isc/time.h>
36 #include <isc/util.h>
37
38 #include <dns/acl.h>
39 #include <dns/dispatch.h>
40 #include <dns/events.h>
41 #include <dns/log.h>
42 #include <dns/message.h>
43 #include <dns/portlist.h>
44 #include <dns/tcpmsg.h>
45 #include <dns/types.h>
46
47 typedef ISC_LIST(dns_dispentry_t)       dns_displist_t;
48
49 typedef struct dns_qid {
50         unsigned int    magic;
51         unsigned int    qid_nbuckets;   /*%< hash table size */
52         unsigned int    qid_increment;  /*%< id increment on collision */
53         isc_mutex_t     lock;
54         dns_displist_t  *qid_table;     /*%< the table itself */
55 } dns_qid_t;
56
57 /* ARC4 Random generator state */
58 typedef struct arc4ctx {
59         isc_uint8_t     i;
60         isc_uint8_t     j;
61         isc_uint8_t     s[256];
62         int             count;
63 } arc4ctx_t;
64
65 struct dns_dispatchmgr {
66         /* Unlocked. */
67         unsigned int                    magic;
68         isc_mem_t                      *mctx;
69         dns_acl_t                      *blackhole;
70         dns_portlist_t                 *portlist;
71
72         /* Locked by "lock". */
73         isc_mutex_t                     lock;
74         unsigned int                    state;
75         ISC_LIST(dns_dispatch_t)        list;
76
77         /* Locked by arc4_lock. */
78         isc_mutex_t                     arc4_lock;
79         arc4ctx_t                       arc4ctx;    /*%< ARC4 context for QID */
80
81         /* locked by buffer lock */
82         dns_qid_t                       *qid;
83         isc_mutex_t                     buffer_lock;
84         unsigned int                    buffers;    /*%< allocated buffers */
85         unsigned int                    buffersize; /*%< size of each buffer */
86         unsigned int                    maxbuffers; /*%< max buffers */
87
88         /* Locked internally. */
89         isc_mutex_t                     pool_lock;
90         isc_mempool_t                  *epool;  /*%< memory pool for events */
91         isc_mempool_t                  *rpool;  /*%< memory pool for replies */
92         isc_mempool_t                  *dpool;  /*%< dispatch allocations */
93         isc_mempool_t                  *bpool;  /*%< memory pool for buffers */
94
95         isc_entropy_t                  *entropy; /*%< entropy source */
96 };
97
98 #define MGR_SHUTTINGDOWN                0x00000001U
99 #define MGR_IS_SHUTTINGDOWN(l)  (((l)->state & MGR_SHUTTINGDOWN) != 0)
100
101 #define IS_PRIVATE(d)   (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
102
103 struct dns_dispentry {
104         unsigned int                    magic;
105         dns_dispatch_t                 *disp;
106         dns_messageid_t                 id;
107         in_port_t                       port;
108         unsigned int                    bucket;
109         isc_sockaddr_t                  host;
110         isc_task_t                     *task;
111         isc_taskaction_t                action;
112         void                           *arg;
113         isc_boolean_t                   item_out;
114         ISC_LIST(dns_dispatchevent_t)   items;
115         ISC_LINK(dns_dispentry_t)       link;
116 };
117
118 #define INVALID_BUCKET          (0xffffdead)
119
120 struct dns_dispatch {
121         /* Unlocked. */
122         unsigned int            magic;          /*%< magic */
123         dns_dispatchmgr_t      *mgr;            /*%< dispatch manager */
124         isc_task_t             *task;           /*%< internal task */
125         isc_socket_t           *socket;         /*%< isc socket attached to */
126         isc_sockaddr_t          local;          /*%< local address */
127         in_port_t               localport;      /*%< local UDP port */
128         unsigned int            maxrequests;    /*%< max requests */
129         isc_event_t            *ctlevent;
130
131         /*% Locked by mgr->lock. */
132         ISC_LINK(dns_dispatch_t) link;
133
134         /* Locked by "lock". */
135         isc_mutex_t             lock;           /*%< locks all below */
136         isc_sockettype_t        socktype;
137         unsigned int            attributes;
138         unsigned int            refcount;       /*%< number of users */
139         dns_dispatchevent_t    *failsafe_ev;    /*%< failsafe cancel event */
140         unsigned int            shutting_down : 1,
141                                 shutdown_out : 1,
142                                 connected : 1,
143                                 tcpmsg_valid : 1,
144                                 recv_pending : 1; /*%< is a recv() pending? */
145         isc_result_t            shutdown_why;
146         unsigned int            requests;       /*%< how many requests we have */
147         unsigned int            tcpbuffers;     /*%< allocated buffers */
148         dns_tcpmsg_t            tcpmsg;         /*%< for tcp streams */
149         dns_qid_t               *qid;
150 };
151
152 #define QID_MAGIC               ISC_MAGIC('Q', 'i', 'd', ' ')
153 #define VALID_QID(e)            ISC_MAGIC_VALID((e), QID_MAGIC)
154
155 #define RESPONSE_MAGIC          ISC_MAGIC('D', 'r', 's', 'p')
156 #define VALID_RESPONSE(e)       ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
157
158 #define DISPATCH_MAGIC          ISC_MAGIC('D', 'i', 's', 'p')
159 #define VALID_DISPATCH(e)       ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
160
161 #define DNS_DISPATCHMGR_MAGIC   ISC_MAGIC('D', 'M', 'g', 'r')
162 #define VALID_DISPATCHMGR(e)    ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
163
164 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
165                        (disp)->qid : (disp)->mgr->qid
166 /*
167  * Statics.
168  */
169 static dns_dispentry_t *bucket_search(dns_qid_t *, isc_sockaddr_t *,
170                                       dns_messageid_t, in_port_t, unsigned int);
171 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
172 static void destroy_disp(isc_task_t *task, isc_event_t *event);
173 static void udp_recv(isc_task_t *, isc_event_t *);
174 static void tcp_recv(isc_task_t *, isc_event_t *);
175 static void startrecv(dns_dispatch_t *);
176 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
177                              in_port_t);
178 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
179 static void *allocate_udp_buffer(dns_dispatch_t *disp);
180 static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
181 static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
182 static void do_cancel(dns_dispatch_t *disp);
183 static dns_dispentry_t *linear_first(dns_qid_t *disp);
184 static dns_dispentry_t *linear_next(dns_qid_t *disp,
185                                     dns_dispentry_t *resp);
186 static void dispatch_free(dns_dispatch_t **dispp);
187 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
188                                        isc_socketmgr_t *sockmgr,
189                                        isc_taskmgr_t *taskmgr,
190                                        isc_sockaddr_t *localaddr,
191                                        unsigned int maxrequests,
192                                        unsigned int attributes,
193                                        dns_dispatch_t **dispp);
194 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
195 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
196 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
197                                  unsigned int increment, dns_qid_t **qidp);
198 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
199
200 #define LVL(x) ISC_LOG_DEBUG(x)
201
202 static void
203 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
204      ISC_FORMAT_PRINTF(3, 4);
205
206 static void
207 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
208         char msgbuf[2048];
209         va_list ap;
210
211         if (! isc_log_wouldlog(dns_lctx, level))
212                 return;
213
214         va_start(ap, fmt);
215         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
216         va_end(ap);
217
218         isc_log_write(dns_lctx,
219                       DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
220                       level, "dispatchmgr %p: %s", mgr, msgbuf);
221 }
222
223 static void
224 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
225      ISC_FORMAT_PRINTF(3, 4);
226
227 static void
228 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
229         char msgbuf[2048];
230         va_list ap;
231
232         if (! isc_log_wouldlog(dns_lctx, level))
233                 return;
234
235         va_start(ap, fmt);
236         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
237         va_end(ap);
238
239         isc_log_write(dns_lctx,
240                       DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
241                       level, "dispatch %p: %s", disp, msgbuf);
242 }
243
244 static void
245 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
246             int level, const char *fmt, ...)
247      ISC_FORMAT_PRINTF(4, 5);
248
249 static void
250 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
251             int level, const char *fmt, ...)
252 {
253         char msgbuf[2048];
254         char peerbuf[256];
255         va_list ap;
256
257         if (! isc_log_wouldlog(dns_lctx, level))
258                 return;
259
260         va_start(ap, fmt);
261         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
262         va_end(ap);
263
264         if (VALID_RESPONSE(resp)) {
265                 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
266                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
267                               DNS_LOGMODULE_DISPATCH, level,
268                               "dispatch %p response %p %s: %s", disp, resp,
269                               peerbuf, msgbuf);
270         } else {
271                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
272                               DNS_LOGMODULE_DISPATCH, level,
273                               "dispatch %p req/resp %p: %s", disp, resp,
274                               msgbuf);
275         }
276 }
277
278 /*
279  * ARC4 random number generator derived from OpenBSD.
280  * Only dispatch_arc4random() and dispatch_arc4uniformrandom() are expected
281  * to be called from general dispatch routines; the rest of them are subroutines
282  * for these two.
283  *
284  * The original copyright follows:
285  * Copyright (c) 1996, David Mazieres <dm@uun.org>
286  * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
287  *
288  * Permission to use, copy, modify, and distribute this software for any
289  * purpose with or without fee is hereby granted, provided that the above
290  * copyright notice and this permission notice appear in all copies.
291  *
292  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
293  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
294  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
295  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
296  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
297  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
298  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
299  */
300 static void
301 dispatch_arc4init(arc4ctx_t *actx) {
302         int n;
303         for (n = 0; n < 256; n++)
304                 actx->s[n] = n;
305         actx->i = 0;
306         actx->j = 0;
307         actx->count = 0;
308 }
309
310 static void
311 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
312         int n;
313         isc_uint8_t si;
314
315         actx->i--;
316         for (n = 0; n < 256; n++) {
317                 actx->i = (actx->i + 1);
318                 si = actx->s[actx->i];
319                 actx->j = (actx->j + si + dat[n % datlen]);
320                 actx->s[actx->i] = actx->s[actx->j];
321                 actx->s[actx->j] = si;
322         }
323         actx->j = actx->i;
324 }
325
326 static inline isc_uint8_t
327 dispatch_arc4get8(arc4ctx_t *actx) {
328         isc_uint8_t si, sj;
329
330         actx->i = (actx->i + 1);
331         si = actx->s[actx->i];
332         actx->j = (actx->j + si);
333         sj = actx->s[actx->j];
334         actx->s[actx->i] = sj;
335         actx->s[actx->j] = si;
336
337         return (actx->s[(si + sj) & 0xff]);
338 }
339
340 static inline isc_uint16_t
341 dispatch_arc4get16(arc4ctx_t *actx) {
342         isc_uint16_t val;
343
344         val = dispatch_arc4get8(actx) << 8;
345         val |= dispatch_arc4get8(actx);
346
347         return (val);
348 }
349
350 static void
351 dispatch_arc4stir(dns_dispatchmgr_t *mgr) {
352         int i;
353         union {
354                 unsigned char rnd[128];
355                 isc_uint32_t rnd32[32];
356         } rnd;
357         isc_result_t result;
358
359         if (mgr->entropy != NULL) {
360                 /*
361                  * We accept any quality of random data to avoid blocking.
362                  */
363                 result = isc_entropy_getdata(mgr->entropy, rnd.rnd,
364                                              sizeof(rnd), NULL, 0);
365                 RUNTIME_CHECK(result == ISC_R_SUCCESS);
366         } else {
367                 for (i = 0; i < 32; i++)
368                         isc_random_get(&rnd.rnd32[i]);
369         }
370         dispatch_arc4addrandom(&mgr->arc4ctx, rnd.rnd, sizeof(rnd.rnd));
371
372         /*
373          * Discard early keystream, as per recommendations in:
374          * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
375          */
376         for (i = 0; i < 256; i++)
377                 (void)dispatch_arc4get8(&mgr->arc4ctx);
378
379         /*
380          * Derived from OpenBSD's implementation.  The rationale is not clear,
381          * but should be conservative enough in safety, and reasonably large
382          * for efficiency.
383          */
384         mgr->arc4ctx.count = 1600000;
385 }
386
387 static isc_uint16_t
388 dispatch_arc4random(dns_dispatchmgr_t *mgr) {
389         isc_uint16_t result;
390
391         LOCK(&mgr->arc4_lock);
392         mgr->arc4ctx.count -= sizeof(isc_uint16_t);
393         if (mgr->arc4ctx.count <= 0)
394                 dispatch_arc4stir(mgr);
395         result = dispatch_arc4get16(&mgr->arc4ctx);
396         UNLOCK(&mgr->arc4_lock);
397         return (result);
398 }
399
400 static isc_uint16_t
401 dispatch_arc4uniformrandom(dns_dispatchmgr_t *mgr, isc_uint16_t upper_bound) {
402         isc_uint16_t min, r;
403         /* The caller must hold the manager lock. */
404
405         if (upper_bound < 2)
406                 return (0);
407
408         /*
409          * Ensure the range of random numbers [min, 0xffff] be a multiple of
410          * upper_bound and contain at least a half of the 16 bit range.
411          */
412
413         if (upper_bound > 0x8000)
414                 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
415         else
416                 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
417
418         /*
419          * This could theoretically loop forever but each retry has
420          * p > 0.5 (worst case, usually far better) of selecting a
421          * number inside the range we need, so it should rarely need
422          * to re-roll.
423          */
424         for (;;) {
425                 r = dispatch_arc4random(mgr);
426                 if (r >= min)
427                         break;
428         }
429
430         return (r % upper_bound);
431 }
432
433 /*
434  * Return a hash of the destination and message id.
435  */
436 static isc_uint32_t
437 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
438          in_port_t port)
439 {
440         unsigned int ret;
441
442         ret = isc_sockaddr_hash(dest, ISC_TRUE);
443         ret ^= (id << 16) | port;
444         ret %= qid->qid_nbuckets;
445
446         INSIST(ret < qid->qid_nbuckets);
447
448         return (ret);
449 }
450
451 /*
452  * Find the first entry in 'qid'.  Returns NULL if there are no entries.
453  */
454 static dns_dispentry_t *
455 linear_first(dns_qid_t *qid) {
456         dns_dispentry_t *ret;
457         unsigned int bucket;
458
459         bucket = 0;
460
461         while (bucket < qid->qid_nbuckets) {
462                 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
463                 if (ret != NULL)
464                         return (ret);
465                 bucket++;
466         }
467
468         return (NULL);
469 }
470
471 /*
472  * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
473  * no more entries.
474  */
475 static dns_dispentry_t *
476 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
477         dns_dispentry_t *ret;
478         unsigned int bucket;
479
480         ret = ISC_LIST_NEXT(resp, link);
481         if (ret != NULL)
482                 return (ret);
483
484         bucket = resp->bucket;
485         bucket++;
486         while (bucket < qid->qid_nbuckets) {
487                 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
488                 if (ret != NULL)
489                         return (ret);
490                 bucket++;
491         }
492
493         return (NULL);
494 }
495
496 /*
497  * The dispatch must be locked.
498  */
499 static isc_boolean_t
500 destroy_disp_ok(dns_dispatch_t *disp)
501 {
502         if (disp->refcount != 0)
503                 return (ISC_FALSE);
504
505         if (disp->recv_pending != 0)
506                 return (ISC_FALSE);
507
508         if (disp->shutting_down == 0)
509                 return (ISC_FALSE);
510
511         return (ISC_TRUE);
512 }
513
514
515 /*
516  * Called when refcount reaches 0 (and safe to destroy).
517  *
518  * The dispatcher must not be locked.
519  * The manager must be locked.
520  */
521 static void
522 destroy_disp(isc_task_t *task, isc_event_t *event) {
523         dns_dispatch_t *disp;
524         dns_dispatchmgr_t *mgr;
525         isc_boolean_t killmgr;
526
527         INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
528
529         UNUSED(task);
530
531         disp = event->ev_arg;
532         mgr = disp->mgr;
533
534         LOCK(&mgr->lock);
535         ISC_LIST_UNLINK(mgr->list, disp, link);
536
537         dispatch_log(disp, LVL(90),
538                      "shutting down; detaching from sock %p, task %p",
539                      disp->socket, disp->task);
540
541         isc_socket_detach(&disp->socket);
542         isc_task_detach(&disp->task);
543         isc_event_free(&event);
544
545         dispatch_free(&disp);
546
547         killmgr = destroy_mgr_ok(mgr);
548         UNLOCK(&mgr->lock);
549         if (killmgr)
550                 destroy_mgr(&mgr);
551 }
552
553
554 /*
555  * Find an entry for query ID 'id' and socket address 'dest' in 'qid'.
556  * Return NULL if no such entry exists.
557  */
558 static dns_dispentry_t *
559 bucket_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
560               in_port_t port, unsigned int bucket)
561 {
562         dns_dispentry_t *res;
563
564         REQUIRE(bucket < qid->qid_nbuckets);
565
566         res = ISC_LIST_HEAD(qid->qid_table[bucket]);
567
568         while (res != NULL) {
569                 if ((res->id == id) && isc_sockaddr_equal(dest, &res->host) &&
570                     res->port == port) {
571                         return (res);
572                 }
573                 res = ISC_LIST_NEXT(res, link);
574         }
575
576         return (NULL);
577 }
578
579 static void
580 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
581         INSIST(buf != NULL && len != 0);
582
583
584         switch (disp->socktype) {
585         case isc_sockettype_tcp:
586                 INSIST(disp->tcpbuffers > 0);
587                 disp->tcpbuffers--;
588                 isc_mem_put(disp->mgr->mctx, buf, len);
589                 break;
590         case isc_sockettype_udp:
591                 LOCK(&disp->mgr->buffer_lock);
592                 INSIST(disp->mgr->buffers > 0);
593                 INSIST(len == disp->mgr->buffersize);
594                 disp->mgr->buffers--;
595                 isc_mempool_put(disp->mgr->bpool, buf);
596                 UNLOCK(&disp->mgr->buffer_lock);
597                 break;
598         default:
599                 INSIST(0);
600                 break;
601         }
602 }
603
604 static void *
605 allocate_udp_buffer(dns_dispatch_t *disp) {
606         void *temp;
607
608         LOCK(&disp->mgr->buffer_lock);
609         temp = isc_mempool_get(disp->mgr->bpool);
610
611         if (temp != NULL)
612                 disp->mgr->buffers++;
613         UNLOCK(&disp->mgr->buffer_lock);
614
615         return (temp);
616 }
617
618 static inline void
619 free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
620         if (disp->failsafe_ev == ev) {
621                 INSIST(disp->shutdown_out == 1);
622                 disp->shutdown_out = 0;
623
624                 return;
625         }
626
627         isc_mempool_put(disp->mgr->epool, ev);
628 }
629
630 static inline dns_dispatchevent_t *
631 allocate_event(dns_dispatch_t *disp) {
632         dns_dispatchevent_t *ev;
633
634         ev = isc_mempool_get(disp->mgr->epool);
635         if (ev == NULL)
636                 return (NULL);
637         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
638                        NULL, NULL, NULL, NULL, NULL);
639
640         return (ev);
641 }
642
643 /*
644  * General flow:
645  *
646  * If I/O result == CANCELED or error, free the buffer.
647  *
648  * If query, free the buffer, restart.
649  *
650  * If response:
651  *      Allocate event, fill in details.
652  *              If cannot allocate, free buffer, restart.
653  *      find target.  If not found, free buffer, restart.
654  *      if event queue is not empty, queue.  else, send.
655  *      restart.
656  */
657 static void
658 udp_recv(isc_task_t *task, isc_event_t *ev_in) {
659         isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
660         dns_dispatch_t *disp = ev_in->ev_arg;
661         dns_messageid_t id;
662         isc_result_t dres;
663         isc_buffer_t source;
664         unsigned int flags;
665         dns_dispentry_t *resp;
666         dns_dispatchevent_t *rev;
667         unsigned int bucket;
668         isc_boolean_t killit;
669         isc_boolean_t queue_response;
670         dns_dispatchmgr_t *mgr;
671         dns_qid_t *qid;
672         isc_netaddr_t netaddr;
673         int match;
674
675         UNUSED(task);
676
677         LOCK(&disp->lock);
678
679         mgr = disp->mgr;
680         qid = mgr->qid;
681
682         dispatch_log(disp, LVL(90),
683                      "got packet: requests %d, buffers %d, recvs %d",
684                      disp->requests, disp->mgr->buffers, disp->recv_pending);
685
686         if (ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
687                 /*
688                  * Unless the receive event was imported from a listening
689                  * interface, in which case the event type is
690                  * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
691                  */
692                 INSIST(disp->recv_pending != 0);
693                 disp->recv_pending = 0;
694         }
695
696         if (disp->shutting_down) {
697                 /*
698                  * This dispatcher is shutting down.
699                  */
700                 free_buffer(disp, ev->region.base, ev->region.length);
701
702                 isc_event_free(&ev_in);
703                 ev = NULL;
704
705                 killit = destroy_disp_ok(disp);
706                 UNLOCK(&disp->lock);
707                 if (killit)
708                         isc_task_send(disp->task, &disp->ctlevent);
709
710                 return;
711         }
712
713         if (ev->result != ISC_R_SUCCESS) {
714                 free_buffer(disp, ev->region.base, ev->region.length);
715
716                 if (ev->result != ISC_R_CANCELED)
717                         dispatch_log(disp, ISC_LOG_ERROR,
718                                      "odd socket result in udp_recv(): %s",
719                                      isc_result_totext(ev->result));
720
721                 UNLOCK(&disp->lock);
722                 isc_event_free(&ev_in);
723                 return;
724         }
725
726         /*
727          * If this is from a blackholed address, drop it.
728          */
729         isc_netaddr_fromsockaddr(&netaddr, &ev->address);
730         if (disp->mgr->blackhole != NULL &&
731             dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
732                           NULL, &match, NULL) == ISC_R_SUCCESS &&
733             match > 0)
734         {
735                 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
736                         char netaddrstr[ISC_NETADDR_FORMATSIZE];
737                         isc_netaddr_format(&netaddr, netaddrstr,
738                                            sizeof(netaddrstr));
739                         dispatch_log(disp, LVL(10),
740                                      "blackholed packet from %s",
741                                      netaddrstr);
742                 }
743                 free_buffer(disp, ev->region.base, ev->region.length);
744                 goto restart;
745         }
746
747         /*
748          * Peek into the buffer to see what we can see.
749          */
750         isc_buffer_init(&source, ev->region.base, ev->region.length);
751         isc_buffer_add(&source, ev->n);
752         dres = dns_message_peekheader(&source, &id, &flags);
753         if (dres != ISC_R_SUCCESS) {
754                 free_buffer(disp, ev->region.base, ev->region.length);
755                 dispatch_log(disp, LVL(10), "got garbage packet");
756                 goto restart;
757         }
758
759         dispatch_log(disp, LVL(92),
760                      "got valid DNS message header, /QR %c, id %u",
761                      ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
762
763         /*
764          * Look at flags.  If query, drop it. If response,
765          * look to see where it goes.
766          */
767         queue_response = ISC_FALSE;
768         if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
769                 /* query */
770                 free_buffer(disp, ev->region.base, ev->region.length);
771                 goto restart;
772         }
773
774         /* response */
775         bucket = dns_hash(qid, &ev->address, id, disp->localport);
776         LOCK(&qid->lock);
777         resp = bucket_search(qid, &ev->address, id, disp->localport, bucket);
778         dispatch_log(disp, LVL(90),
779                      "search for response in bucket %d: %s",
780                      bucket, (resp == NULL ? "not found" : "found"));
781
782         if (resp == NULL) {
783                 free_buffer(disp, ev->region.base, ev->region.length);
784                 goto unlock;
785         } 
786
787         /*
788          * Now that we have the original dispatch the query was sent
789          * from check that the address and port the response was
790          * sent to make sense.
791          */
792         if (disp != resp->disp) {
793                 isc_sockaddr_t a1;
794                 isc_sockaddr_t a2;
795                 
796                 /*
797                  * Check that the socket types and ports match.
798                  */
799                 if (disp->socktype != resp->disp->socktype ||
800                     isc_sockaddr_getport(&disp->local) !=
801                     isc_sockaddr_getport(&resp->disp->local)) {
802                         free_buffer(disp, ev->region.base, ev->region.length);
803                         goto unlock;
804                 }
805
806                 /*
807                  * If both dispatches are bound to an address then fail as
808                  * the addresses can't be equal (enforced by the IP stack).  
809                  *
810                  * Note under Linux a packet can be sent out via IPv4 socket
811                  * and the response be received via a IPv6 socket.
812                  * 
813                  * Requests sent out via IPv6 should always come back in
814                  * via IPv6.
815                  */
816                 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
817                     isc_sockaddr_pf(&disp->local) != PF_INET6) {
818                         free_buffer(disp, ev->region.base, ev->region.length);
819                         goto unlock;
820                 }
821                 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
822                 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
823                 if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
824                     !isc_sockaddr_eqaddr(&a2, &disp->local)) {
825                         free_buffer(disp, ev->region.base, ev->region.length);
826                         goto unlock;
827                 }
828         }
829
830         queue_response = resp->item_out;
831         rev = allocate_event(resp->disp);
832         if (rev == NULL) {
833                 free_buffer(disp, ev->region.base, ev->region.length);
834                 goto unlock;
835         }
836
837         /*
838          * At this point, rev contains the event we want to fill in, and
839          * resp contains the information on the place to send it to.
840          * Send the event off.
841          */
842         isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
843         isc_buffer_add(&rev->buffer, ev->n);
844         rev->result = ISC_R_SUCCESS;
845         rev->id = id;
846         rev->addr = ev->address;
847         rev->pktinfo = ev->pktinfo;
848         rev->attributes = ev->attributes;
849         if (queue_response) {
850                 ISC_LIST_APPEND(resp->items, rev, ev_link);
851         } else {
852                 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
853                                DNS_EVENT_DISPATCH,
854                                resp->action, resp->arg, resp, NULL, NULL);
855                 request_log(disp, resp, LVL(90),
856                             "[a] Sent event %p buffer %p len %d to task %p",
857                             rev, rev->buffer.base, rev->buffer.length,
858                             resp->task);
859                 resp->item_out = ISC_TRUE;
860                 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
861         }
862  unlock:
863         UNLOCK(&qid->lock);
864
865         /*
866          * Restart recv() to get the next packet.
867          */
868  restart:
869         startrecv(disp);
870
871         UNLOCK(&disp->lock);
872
873         isc_event_free(&ev_in);
874 }
875
876 /*
877  * General flow:
878  *
879  * If I/O result == CANCELED, EOF, or error, notify everyone as the
880  * various queues drain.
881  *
882  * If query, restart.
883  *
884  * If response:
885  *      Allocate event, fill in details.
886  *              If cannot allocate, restart.
887  *      find target.  If not found, restart.
888  *      if event queue is not empty, queue.  else, send.
889  *      restart.
890  */
891 static void
892 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
893         dns_dispatch_t *disp = ev_in->ev_arg;
894         dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
895         dns_messageid_t id;
896         isc_result_t dres;
897         unsigned int flags;
898         dns_dispentry_t *resp;
899         dns_dispatchevent_t *rev;
900         unsigned int bucket;
901         isc_boolean_t killit;
902         isc_boolean_t queue_response;
903         dns_qid_t *qid;
904         int level;
905         char buf[ISC_SOCKADDR_FORMATSIZE];
906
907         UNUSED(task);
908
909         REQUIRE(VALID_DISPATCH(disp));
910
911         qid = disp->qid;
912
913         dispatch_log(disp, LVL(90),
914                      "got TCP packet: requests %d, buffers %d, recvs %d",
915                      disp->requests, disp->tcpbuffers, disp->recv_pending);
916
917         LOCK(&disp->lock);
918
919         INSIST(disp->recv_pending != 0);
920         disp->recv_pending = 0;
921
922         if (disp->refcount == 0) {
923                 /*
924                  * This dispatcher is shutting down.  Force cancelation.
925                  */
926                 tcpmsg->result = ISC_R_CANCELED;
927         }
928
929         if (tcpmsg->result != ISC_R_SUCCESS) {
930                 switch (tcpmsg->result) {
931                 case ISC_R_CANCELED:
932                         break;
933                         
934                 case ISC_R_EOF:
935                         dispatch_log(disp, LVL(90), "shutting down on EOF");
936                         do_cancel(disp);
937                         break;
938
939                 case ISC_R_CONNECTIONRESET:
940                         level = ISC_LOG_INFO;
941                         goto logit;
942
943                 default:
944                         level = ISC_LOG_ERROR;
945                 logit:
946                         isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
947                         dispatch_log(disp, level, "shutting down due to TCP "
948                                      "receive error: %s: %s", buf,
949                                      isc_result_totext(tcpmsg->result));
950                         do_cancel(disp);
951                         break;
952                 }
953
954                 /*
955                  * The event is statically allocated in the tcpmsg
956                  * structure, and destroy_disp() frees the tcpmsg, so we must
957                  * free the event *before* calling destroy_disp().
958                  */
959                 isc_event_free(&ev_in);
960
961                 disp->shutting_down = 1;
962                 disp->shutdown_why = tcpmsg->result;
963
964                 /*
965                  * If the recv() was canceled pass the word on.
966                  */
967                 killit = destroy_disp_ok(disp);
968                 UNLOCK(&disp->lock);
969                 if (killit)
970                         isc_task_send(disp->task, &disp->ctlevent);
971                 return;
972         }
973
974         dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
975                      tcpmsg->result,
976                      tcpmsg->buffer.length, tcpmsg->buffer.base);
977
978         /*
979          * Peek into the buffer to see what we can see.
980          */
981         dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
982         if (dres != ISC_R_SUCCESS) {
983                 dispatch_log(disp, LVL(10), "got garbage packet");
984                 goto restart;
985         }
986
987         dispatch_log(disp, LVL(92),
988                      "got valid DNS message header, /QR %c, id %u",
989                      ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
990
991         /*
992          * Allocate an event to send to the query or response client, and
993          * allocate a new buffer for our use.
994          */
995
996         /*
997          * Look at flags.  If query, drop it. If response,
998          * look to see where it goes.
999          */
1000         queue_response = ISC_FALSE;
1001         if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1002                 /*
1003                  * Query.
1004                  */
1005                 goto restart;
1006         }
1007
1008         /*
1009          * Response.
1010          */
1011         bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1012         LOCK(&qid->lock);
1013         resp = bucket_search(qid, &tcpmsg->address, id, disp->localport,
1014                              bucket);
1015         dispatch_log(disp, LVL(90),
1016                      "search for response in bucket %d: %s",
1017                      bucket, (resp == NULL ? "not found" : "found"));
1018
1019         if (resp == NULL)
1020                 goto unlock;
1021         queue_response = resp->item_out;
1022         rev = allocate_event(disp);
1023         if (rev == NULL)
1024                 goto unlock;
1025
1026         /*
1027          * At this point, rev contains the event we want to fill in, and
1028          * resp contains the information on the place to send it to.
1029          * Send the event off.
1030          */
1031         dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1032         disp->tcpbuffers++;
1033         rev->result = ISC_R_SUCCESS;
1034         rev->id = id;
1035         rev->addr = tcpmsg->address;
1036         if (queue_response) {
1037                 ISC_LIST_APPEND(resp->items, rev, ev_link);
1038         } else {
1039                 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1040                                resp->action, resp->arg, resp, NULL, NULL);
1041                 request_log(disp, resp, LVL(90),
1042                             "[b] Sent event %p buffer %p len %d to task %p",
1043                             rev, rev->buffer.base, rev->buffer.length,
1044                             resp->task);
1045                 resp->item_out = ISC_TRUE;
1046                 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1047         }
1048  unlock:
1049         UNLOCK(&qid->lock);
1050
1051         /*
1052          * Restart recv() to get the next packet.
1053          */
1054  restart:
1055         startrecv(disp);
1056
1057         UNLOCK(&disp->lock);
1058
1059         isc_event_free(&ev_in);
1060 }
1061
1062 /*
1063  * disp must be locked.
1064  */
1065 static void
1066 startrecv(dns_dispatch_t *disp) {
1067         isc_result_t res;
1068         isc_region_t region;
1069
1070         if (disp->shutting_down == 1)
1071                 return;
1072
1073         if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1074                 return;
1075
1076         if (disp->recv_pending != 0)
1077                 return;
1078
1079         if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1080                 return;
1081
1082         switch (disp->socktype) {
1083                 /*
1084                  * UDP reads are always maximal.
1085                  */
1086         case isc_sockettype_udp:
1087                 region.length = disp->mgr->buffersize;
1088                 region.base = allocate_udp_buffer(disp);
1089                 if (region.base == NULL)
1090                         return;
1091                 res = isc_socket_recv(disp->socket, &region, 1,
1092                                       disp->task, udp_recv, disp);
1093                 if (res != ISC_R_SUCCESS) {
1094                         free_buffer(disp, region.base, region.length);
1095                         disp->shutdown_why = res;
1096                         disp->shutting_down = 1;
1097                         do_cancel(disp);
1098                         return;
1099                 }
1100                 INSIST(disp->recv_pending == 0);
1101                 disp->recv_pending = 1;
1102                 break;
1103
1104         case isc_sockettype_tcp:
1105                 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task,
1106                                              tcp_recv, disp);
1107                 if (res != ISC_R_SUCCESS) {
1108                         disp->shutdown_why = res;
1109                         disp->shutting_down = 1;
1110                         do_cancel(disp);
1111                         return;
1112                 }
1113                 INSIST(disp->recv_pending == 0);
1114                 disp->recv_pending = 1;
1115                 break;
1116         default:
1117                 INSIST(0);
1118                 break;
1119         }
1120 }
1121
1122 /*
1123  * Mgr must be locked when calling this function.
1124  */
1125 static isc_boolean_t
1126 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1127         mgr_log(mgr, LVL(90),
1128                 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1129                 "epool=%d, rpool=%d, dpool=%d",
1130                 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1131                 isc_mempool_getallocated(mgr->epool),
1132                 isc_mempool_getallocated(mgr->rpool),
1133                 isc_mempool_getallocated(mgr->dpool));
1134         if (!MGR_IS_SHUTTINGDOWN(mgr))
1135                 return (ISC_FALSE);
1136         if (!ISC_LIST_EMPTY(mgr->list))
1137                 return (ISC_FALSE);
1138         if (isc_mempool_getallocated(mgr->epool) != 0)
1139                 return (ISC_FALSE);
1140         if (isc_mempool_getallocated(mgr->rpool) != 0)
1141                 return (ISC_FALSE);
1142         if (isc_mempool_getallocated(mgr->dpool) != 0)
1143                 return (ISC_FALSE);
1144
1145         return (ISC_TRUE);
1146 }
1147
1148 /*
1149  * Mgr must be unlocked when calling this function.
1150  */
1151 static void
1152 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1153         isc_mem_t *mctx;
1154         dns_dispatchmgr_t *mgr;
1155
1156         mgr = *mgrp;
1157         *mgrp = NULL;
1158
1159         mctx = mgr->mctx;
1160
1161         mgr->magic = 0;
1162         mgr->mctx = NULL;
1163         DESTROYLOCK(&mgr->lock);
1164         mgr->state = 0;
1165
1166         DESTROYLOCK(&mgr->arc4_lock);
1167
1168         isc_mempool_destroy(&mgr->epool);
1169         isc_mempool_destroy(&mgr->rpool);
1170         isc_mempool_destroy(&mgr->dpool);
1171         isc_mempool_destroy(&mgr->bpool);
1172
1173         DESTROYLOCK(&mgr->pool_lock);
1174
1175         if (mgr->entropy != NULL)
1176                 isc_entropy_detach(&mgr->entropy);
1177         if (mgr->qid != NULL)
1178                 qid_destroy(mctx, &mgr->qid);
1179
1180         DESTROYLOCK(&mgr->buffer_lock);
1181
1182         if (mgr->blackhole != NULL)
1183                 dns_acl_detach(&mgr->blackhole);
1184
1185         if (mgr->portlist != NULL)
1186                 dns_portlist_detach(&mgr->portlist);
1187
1188         isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1189         isc_mem_detach(&mctx);
1190 }
1191
1192 static isc_result_t
1193 create_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1194               unsigned int options, isc_socket_t **sockp)
1195 {
1196         isc_socket_t *sock;
1197         isc_result_t result;
1198
1199         sock = NULL;
1200         result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1201                                    isc_sockettype_udp, &sock);
1202         if (result != ISC_R_SUCCESS)
1203                 return (result);
1204
1205 #ifndef ISC_ALLOW_MAPPED
1206         isc_socket_ipv6only(sock, ISC_TRUE);
1207 #endif
1208         result = isc_socket_bind(sock, local, options);
1209         if (result != ISC_R_SUCCESS) {
1210                 isc_socket_detach(&sock);
1211                 return (result);
1212         }
1213
1214         *sockp = sock;
1215         return (ISC_R_SUCCESS);
1216 }
1217
1218 /*
1219  * Publics.
1220  */
1221
1222 isc_result_t
1223 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1224                        dns_dispatchmgr_t **mgrp)
1225 {
1226         dns_dispatchmgr_t *mgr;
1227         isc_result_t result;
1228
1229         REQUIRE(mctx != NULL);
1230         REQUIRE(mgrp != NULL && *mgrp == NULL);
1231
1232         mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1233         if (mgr == NULL)
1234                 return (ISC_R_NOMEMORY);
1235
1236         mgr->mctx = NULL;
1237         isc_mem_attach(mctx, &mgr->mctx);
1238
1239         mgr->blackhole = NULL;
1240         mgr->portlist = NULL;
1241
1242         result = isc_mutex_init(&mgr->lock);
1243         if (result != ISC_R_SUCCESS)
1244                 goto deallocate;
1245
1246         result = isc_mutex_init(&mgr->arc4_lock);
1247         if (result != ISC_R_SUCCESS)
1248                 goto kill_lock;
1249
1250         result = isc_mutex_init(&mgr->buffer_lock);
1251         if (result != ISC_R_SUCCESS)
1252                 goto kill_arc4_lock;
1253
1254         result = isc_mutex_init(&mgr->pool_lock);
1255         if (result != ISC_R_SUCCESS)
1256                 goto kill_buffer_lock;
1257
1258         mgr->epool = NULL;
1259         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1260                                &mgr->epool) != ISC_R_SUCCESS) {
1261                 result = ISC_R_NOMEMORY;
1262                 goto kill_pool_lock;
1263         }
1264
1265         mgr->rpool = NULL;
1266         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1267                                &mgr->rpool) != ISC_R_SUCCESS) {
1268                 result = ISC_R_NOMEMORY;
1269                 goto kill_epool;
1270         }
1271
1272         mgr->dpool = NULL;
1273         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1274                                &mgr->dpool) != ISC_R_SUCCESS) {
1275                 result = ISC_R_NOMEMORY;
1276                 goto kill_rpool;
1277         }
1278
1279         isc_mempool_setname(mgr->epool, "dispmgr_epool");
1280         isc_mempool_setfreemax(mgr->epool, 1024);
1281         isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
1282
1283         isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1284         isc_mempool_setfreemax(mgr->rpool, 1024);
1285         isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
1286
1287         isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1288         isc_mempool_setfreemax(mgr->dpool, 1024);
1289         isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
1290
1291         mgr->buffers = 0;
1292         mgr->buffersize = 0;
1293         mgr->maxbuffers = 0;
1294         mgr->bpool = NULL;
1295         mgr->entropy = NULL;
1296         mgr->qid = NULL;
1297         mgr->state = 0;
1298         ISC_LIST_INIT(mgr->list);
1299         mgr->magic = DNS_DISPATCHMGR_MAGIC;
1300
1301         if (entropy != NULL)
1302                 isc_entropy_attach(entropy, &mgr->entropy);
1303
1304         dispatch_arc4init(&mgr->arc4ctx);
1305
1306         *mgrp = mgr;
1307         return (ISC_R_SUCCESS);
1308
1309  kill_rpool:
1310         isc_mempool_destroy(&mgr->rpool);
1311  kill_epool:
1312         isc_mempool_destroy(&mgr->epool);
1313  kill_pool_lock:
1314         DESTROYLOCK(&mgr->pool_lock);
1315  kill_buffer_lock:
1316         DESTROYLOCK(&mgr->buffer_lock);
1317  kill_arc4_lock:
1318         DESTROYLOCK(&mgr->arc4_lock);
1319  kill_lock:
1320         DESTROYLOCK(&mgr->lock);
1321  deallocate:
1322         isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1323         isc_mem_detach(&mctx);
1324
1325         return (result);
1326 }
1327
1328 void
1329 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1330         REQUIRE(VALID_DISPATCHMGR(mgr));
1331         if (mgr->blackhole != NULL)
1332                 dns_acl_detach(&mgr->blackhole);
1333         dns_acl_attach(blackhole, &mgr->blackhole);
1334 }
1335
1336 dns_acl_t *
1337 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
1338         REQUIRE(VALID_DISPATCHMGR(mgr));
1339         return (mgr->blackhole);
1340 }
1341
1342 void
1343 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
1344                                  dns_portlist_t *portlist)
1345 {
1346         REQUIRE(VALID_DISPATCHMGR(mgr));
1347         if (mgr->portlist != NULL)
1348                 dns_portlist_detach(&mgr->portlist);
1349         if (portlist != NULL)
1350                 dns_portlist_attach(portlist, &mgr->portlist);
1351 }
1352
1353 dns_portlist_t *
1354 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
1355         REQUIRE(VALID_DISPATCHMGR(mgr));
1356         return (mgr->portlist);
1357 }
1358
1359 static isc_result_t
1360 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
1361                         unsigned int buffersize, unsigned int maxbuffers,
1362                         unsigned int buckets, unsigned int increment)
1363 {
1364         isc_result_t result;
1365
1366         REQUIRE(VALID_DISPATCHMGR(mgr));
1367         REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1368         REQUIRE(maxbuffers > 0);
1369         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
1370         REQUIRE(increment > buckets);
1371
1372         /*
1373          * Keep some number of items around.  This should be a config
1374          * option.  For now, keep 8, but later keep at least two even
1375          * if the caller wants less.  This allows us to ensure certain
1376          * things, like an event can be "freed" and the next allocation
1377          * will always succeed.
1378          *
1379          * Note that if limits are placed on anything here, we use one
1380          * event internally, so the actual limit should be "wanted + 1."
1381          *
1382          * XXXMLG
1383          */
1384
1385         if (maxbuffers < 8)
1386                 maxbuffers = 8;
1387
1388         LOCK(&mgr->buffer_lock);
1389         if (mgr->bpool != NULL) {
1390                 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1391                 mgr->maxbuffers = maxbuffers;
1392                 UNLOCK(&mgr->buffer_lock);
1393                 return (ISC_R_SUCCESS);
1394         }
1395
1396         if (isc_mempool_create(mgr->mctx, buffersize,
1397                                &mgr->bpool) != ISC_R_SUCCESS) {
1398                 UNLOCK(&mgr->buffer_lock);
1399                 return (ISC_R_NOMEMORY);
1400         }
1401
1402         isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
1403         isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1404         isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
1405
1406         result = qid_allocate(mgr, buckets, increment, &mgr->qid);
1407         if (result != ISC_R_SUCCESS)
1408                 goto cleanup;
1409
1410         mgr->buffersize = buffersize;
1411         mgr->maxbuffers = maxbuffers;
1412         UNLOCK(&mgr->buffer_lock);
1413         return (ISC_R_SUCCESS);
1414
1415  cleanup:
1416         isc_mempool_destroy(&mgr->bpool);
1417         UNLOCK(&mgr->buffer_lock);
1418         return (ISC_R_NOMEMORY);
1419 }
1420
1421 void
1422 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
1423         dns_dispatchmgr_t *mgr;
1424         isc_boolean_t killit;
1425
1426         REQUIRE(mgrp != NULL);
1427         REQUIRE(VALID_DISPATCHMGR(*mgrp));
1428
1429         mgr = *mgrp;
1430         *mgrp = NULL;
1431
1432         LOCK(&mgr->lock);
1433         mgr->state |= MGR_SHUTTINGDOWN;
1434
1435         killit = destroy_mgr_ok(mgr);
1436         UNLOCK(&mgr->lock);
1437
1438         mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
1439
1440         if (killit)
1441                 destroy_mgr(&mgr);
1442 }
1443
1444 static isc_boolean_t
1445 blacklisted(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
1446             isc_sockaddr_t *sockaddrp)
1447 {
1448         isc_sockaddr_t sockaddr;
1449         isc_result_t result;
1450
1451         REQUIRE(sock != NULL || sockaddrp != NULL);
1452
1453         if (mgr->portlist == NULL)
1454                 return (ISC_FALSE);
1455
1456         if (sock != NULL) {
1457                 sockaddrp = &sockaddr;
1458                 result = isc_socket_getsockname(sock, sockaddrp);
1459                 if (result != ISC_R_SUCCESS)
1460                         return (ISC_FALSE);
1461         }
1462
1463         if (mgr->portlist != NULL &&
1464             dns_portlist_match(mgr->portlist, isc_sockaddr_pf(sockaddrp),
1465                                isc_sockaddr_getport(sockaddrp)))
1466                 return (ISC_TRUE);
1467         return (ISC_FALSE);
1468 }
1469
1470 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
1471
1472 static isc_boolean_t
1473 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
1474         isc_sockaddr_t sockaddr;
1475         isc_result_t result;
1476
1477         if (addr == NULL)
1478                 return (ISC_TRUE);
1479
1480         /*
1481          * Don't match wildcard ports against newly blacklisted ports.
1482          */
1483         if (disp->mgr->portlist != NULL &&
1484             isc_sockaddr_getport(addr) == 0 &&
1485             isc_sockaddr_getport(&disp->local) == 0 &&
1486             blacklisted(disp->mgr, disp->socket, NULL))
1487                 return (ISC_FALSE);
1488
1489         /*
1490          * Check if we match the binding <address,port>.
1491          * Wildcard ports match/fail here.
1492          */
1493         if (isc_sockaddr_equal(&disp->local, addr))
1494                 return (ISC_TRUE);
1495         if (isc_sockaddr_getport(addr) == 0)
1496                 return (ISC_FALSE);
1497
1498         /*
1499          * Check if we match a bound wildcard port <address,port>.
1500          */
1501         if (!isc_sockaddr_eqaddr(&disp->local, addr))
1502                 return (ISC_FALSE);
1503         result = isc_socket_getsockname(disp->socket, &sockaddr);
1504         if (result != ISC_R_SUCCESS)
1505                 return (ISC_FALSE);
1506
1507         return (isc_sockaddr_equal(&sockaddr, addr));
1508 }
1509
1510 /*
1511  * Requires mgr be locked.
1512  *
1513  * No dispatcher can be locked by this thread when calling this function.
1514  *
1515  *
1516  * NOTE:
1517  *      If a matching dispatcher is found, it is locked after this function
1518  *      returns, and must be unlocked by the caller.
1519  */
1520 static isc_result_t
1521 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
1522               unsigned int attributes, unsigned int mask,
1523               dns_dispatch_t **dispp)
1524 {
1525         dns_dispatch_t *disp;
1526         isc_result_t result;
1527
1528         /*
1529          * Make certain that we will not match a private dispatch.
1530          */
1531         attributes &= ~DNS_DISPATCHATTR_PRIVATE;
1532         mask |= DNS_DISPATCHATTR_PRIVATE;
1533
1534         disp = ISC_LIST_HEAD(mgr->list);
1535         while (disp != NULL) {
1536                 LOCK(&disp->lock);
1537                 if ((disp->shutting_down == 0)
1538                     && ATTRMATCH(disp->attributes, attributes, mask)
1539                     && local_addr_match(disp, local))
1540                         break;
1541                 UNLOCK(&disp->lock);
1542                 disp = ISC_LIST_NEXT(disp, link);
1543         }
1544
1545         if (disp == NULL) {
1546                 result = ISC_R_NOTFOUND;
1547                 goto out;
1548         }
1549
1550         *dispp = disp;
1551         result = ISC_R_SUCCESS;
1552  out:
1553
1554         return (result);
1555 }
1556
1557 static isc_result_t
1558 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
1559              unsigned int increment, dns_qid_t **qidp)
1560 {
1561         dns_qid_t *qid;
1562         unsigned int i;
1563         isc_result_t result;
1564
1565         REQUIRE(VALID_DISPATCHMGR(mgr));
1566         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
1567         REQUIRE(increment > buckets);
1568         REQUIRE(qidp != NULL && *qidp == NULL);
1569
1570         qid = isc_mem_get(mgr->mctx, sizeof(*qid));
1571         if (qid == NULL)
1572                 return (ISC_R_NOMEMORY);
1573
1574         qid->qid_table = isc_mem_get(mgr->mctx,
1575                                      buckets * sizeof(dns_displist_t));
1576         if (qid->qid_table == NULL) {
1577                 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
1578                 return (ISC_R_NOMEMORY);
1579         }
1580
1581         result = isc_mutex_init(&qid->lock);
1582         if (result != ISC_R_SUCCESS) {
1583                 isc_mem_put(mgr->mctx, qid->qid_table,
1584                             buckets * sizeof(dns_displist_t));
1585                 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
1586                 return (result);
1587         }
1588
1589         for (i = 0; i < buckets; i++)
1590                 ISC_LIST_INIT(qid->qid_table[i]);
1591
1592         qid->qid_nbuckets = buckets;
1593         qid->qid_increment = increment;
1594         qid->magic = QID_MAGIC;
1595         *qidp = qid;
1596         return (ISC_R_SUCCESS);
1597 }
1598
1599 static void
1600 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
1601         dns_qid_t *qid;
1602
1603         REQUIRE(qidp != NULL);
1604         qid = *qidp;
1605
1606         REQUIRE(VALID_QID(qid));
1607
1608         *qidp = NULL;
1609         qid->magic = 0;
1610         isc_mem_put(mctx, qid->qid_table,
1611                     qid->qid_nbuckets * sizeof(dns_displist_t));
1612         DESTROYLOCK(&qid->lock);
1613         isc_mem_put(mctx, qid, sizeof(*qid));
1614 }
1615
1616 /*
1617  * Allocate and set important limits.
1618  */
1619 static isc_result_t
1620 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
1621                   dns_dispatch_t **dispp)
1622 {
1623         dns_dispatch_t *disp;
1624         isc_result_t result;
1625
1626         REQUIRE(VALID_DISPATCHMGR(mgr));
1627         REQUIRE(dispp != NULL && *dispp == NULL);
1628
1629         /*
1630          * Set up the dispatcher, mostly.  Don't bother setting some of
1631          * the options that are controlled by tcp vs. udp, etc.
1632          */
1633
1634         disp = isc_mempool_get(mgr->dpool);
1635         if (disp == NULL)
1636                 return (ISC_R_NOMEMORY);
1637
1638         disp->magic = 0;
1639         disp->mgr = mgr;
1640         disp->maxrequests = maxrequests;
1641         disp->attributes = 0;
1642         ISC_LINK_INIT(disp, link);
1643         disp->refcount = 1;
1644         disp->recv_pending = 0;
1645         memset(&disp->local, 0, sizeof(disp->local));
1646         disp->localport = 0;
1647         disp->shutting_down = 0;
1648         disp->shutdown_out = 0;
1649         disp->connected = 0;
1650         disp->tcpmsg_valid = 0;
1651         disp->shutdown_why = ISC_R_UNEXPECTED;
1652         disp->requests = 0;
1653         disp->tcpbuffers = 0;
1654         disp->qid = NULL;
1655
1656         result = isc_mutex_init(&disp->lock);
1657         if (result != ISC_R_SUCCESS)
1658                 goto deallocate;
1659
1660         disp->failsafe_ev = allocate_event(disp);
1661         if (disp->failsafe_ev == NULL) {
1662                 result = ISC_R_NOMEMORY;
1663                 goto kill_lock;
1664         }
1665
1666         disp->magic = DISPATCH_MAGIC;
1667
1668         *dispp = disp;
1669         return (ISC_R_SUCCESS);
1670
1671         /*
1672          * error returns
1673          */
1674  kill_lock:
1675         DESTROYLOCK(&disp->lock);
1676  deallocate:
1677         isc_mempool_put(mgr->dpool, disp);
1678
1679         return (result);
1680 }
1681
1682
1683 /*
1684  * MUST be unlocked, and not used by anthing.
1685  */
1686 static void
1687 dispatch_free(dns_dispatch_t **dispp)
1688 {
1689         dns_dispatch_t *disp;
1690         dns_dispatchmgr_t *mgr;
1691
1692         REQUIRE(VALID_DISPATCH(*dispp));
1693         disp = *dispp;
1694         *dispp = NULL;
1695
1696         mgr = disp->mgr;
1697         REQUIRE(VALID_DISPATCHMGR(mgr));
1698
1699         if (disp->tcpmsg_valid) {
1700                 dns_tcpmsg_invalidate(&disp->tcpmsg);
1701                 disp->tcpmsg_valid = 0;
1702         }
1703
1704         INSIST(disp->tcpbuffers == 0);
1705         INSIST(disp->requests == 0);
1706         INSIST(disp->recv_pending == 0);
1707
1708         isc_mempool_put(mgr->epool, disp->failsafe_ev);
1709         disp->failsafe_ev = NULL;
1710
1711         if (disp->qid != NULL)
1712                 qid_destroy(mgr->mctx, &disp->qid);
1713         disp->mgr = NULL;
1714         DESTROYLOCK(&disp->lock);
1715         disp->magic = 0;
1716         isc_mempool_put(mgr->dpool, disp);
1717 }
1718
1719 isc_result_t
1720 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
1721                        isc_taskmgr_t *taskmgr, unsigned int buffersize,
1722                        unsigned int maxbuffers, unsigned int maxrequests,
1723                        unsigned int buckets, unsigned int increment,
1724                        unsigned int attributes, dns_dispatch_t **dispp)
1725 {
1726         isc_result_t result;
1727         dns_dispatch_t *disp;
1728
1729         UNUSED(maxbuffers);
1730         UNUSED(buffersize);
1731
1732         REQUIRE(VALID_DISPATCHMGR(mgr));
1733         REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
1734         REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
1735         REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
1736
1737         attributes |= DNS_DISPATCHATTR_PRIVATE;  /* XXXMLG */
1738
1739         LOCK(&mgr->lock);
1740
1741         /*
1742          * dispatch_allocate() checks mgr for us.
1743          * qid_allocate() checks buckets and increment for us.
1744          */
1745         disp = NULL;
1746         result = dispatch_allocate(mgr, maxrequests, &disp);
1747         if (result != ISC_R_SUCCESS) {
1748                 UNLOCK(&mgr->lock);
1749                 return (result);
1750         }
1751
1752         result = qid_allocate(mgr, buckets, increment, &disp->qid);
1753         if (result != ISC_R_SUCCESS)
1754                 goto deallocate_dispatch;
1755
1756         disp->socktype = isc_sockettype_tcp;
1757         disp->socket = NULL;
1758         isc_socket_attach(sock, &disp->socket);
1759
1760         disp->task = NULL;
1761         result = isc_task_create(taskmgr, 0, &disp->task);
1762         if (result != ISC_R_SUCCESS)
1763                 goto kill_socket;
1764
1765         disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
1766                                             DNS_EVENT_DISPATCHCONTROL,
1767                                             destroy_disp, disp,
1768                                             sizeof(isc_event_t));
1769         if (disp->ctlevent == NULL) {
1770                 result = ISC_R_NOMEMORY;
1771                 goto kill_task;
1772         }
1773
1774         isc_task_setname(disp->task, "tcpdispatch", disp);
1775
1776         dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
1777         disp->tcpmsg_valid = 1;
1778
1779         disp->attributes = attributes;
1780
1781         /*
1782          * Append it to the dispatcher list.
1783          */
1784         ISC_LIST_APPEND(mgr->list, disp, link);
1785         UNLOCK(&mgr->lock);
1786
1787         mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
1788         dispatch_log(disp, LVL(90), "created task %p", disp->task);
1789
1790         *dispp = disp;
1791
1792         return (ISC_R_SUCCESS);
1793
1794         /*
1795          * Error returns.
1796          */
1797  kill_task:
1798         isc_task_detach(&disp->task);
1799  kill_socket:
1800         isc_socket_detach(&disp->socket);
1801  deallocate_dispatch:
1802         dispatch_free(&disp);
1803
1804         UNLOCK(&mgr->lock);
1805
1806         return (result);
1807 }
1808
1809 isc_result_t
1810 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
1811                     isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
1812                     unsigned int buffersize,
1813                     unsigned int maxbuffers, unsigned int maxrequests,
1814                     unsigned int buckets, unsigned int increment,
1815                     unsigned int attributes, unsigned int mask,
1816                     dns_dispatch_t **dispp)
1817 {
1818         isc_result_t result;
1819         dns_dispatch_t *disp = NULL;
1820
1821         REQUIRE(VALID_DISPATCHMGR(mgr));
1822         REQUIRE(sockmgr != NULL);
1823         REQUIRE(localaddr != NULL);
1824         REQUIRE(taskmgr != NULL);
1825         REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1826         REQUIRE(maxbuffers > 0);
1827         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
1828         REQUIRE(increment > buckets);
1829         REQUIRE(dispp != NULL && *dispp == NULL);
1830         REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
1831
1832         result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
1833                                         buckets, increment);
1834         if (result != ISC_R_SUCCESS)
1835                 return (result);
1836
1837         LOCK(&mgr->lock);
1838
1839         if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) {
1840                 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
1841                 goto createudp;
1842         }
1843
1844         /*
1845          * First, see if we have a dispatcher that matches.
1846          */
1847         disp = NULL;
1848         result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
1849         if (result == ISC_R_SUCCESS) {
1850                 disp->refcount++;
1851
1852                 if (disp->maxrequests < maxrequests)
1853                         disp->maxrequests = maxrequests;
1854
1855                 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
1856                     (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1857                 {
1858                         disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
1859                         if (disp->recv_pending != 0)
1860                                 isc_socket_cancel(disp->socket, disp->task,
1861                                                   ISC_SOCKCANCEL_RECV);
1862                 }
1863
1864                 UNLOCK(&disp->lock);
1865                 UNLOCK(&mgr->lock);
1866
1867                 *dispp = disp;
1868
1869                 return (ISC_R_SUCCESS);
1870         }
1871
1872  createudp:
1873         /*
1874          * Nope, create one.
1875          */
1876         result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
1877                                     maxrequests, attributes, &disp);
1878         if (result != ISC_R_SUCCESS) {
1879                 UNLOCK(&mgr->lock);
1880                 return (result);
1881         }
1882
1883         UNLOCK(&mgr->lock);
1884         *dispp = disp;
1885         return (ISC_R_SUCCESS);
1886 }
1887
1888 /*
1889  * mgr should be locked.
1890  */
1891
1892 #ifndef DNS_DISPATCH_HELD
1893 #define DNS_DISPATCH_HELD 20U
1894 #endif
1895
1896 static isc_result_t
1897 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
1898                    isc_taskmgr_t *taskmgr,
1899                    isc_sockaddr_t *localaddr,
1900                    unsigned int maxrequests,
1901                    unsigned int attributes,
1902                    dns_dispatch_t **dispp)
1903 {
1904         isc_result_t result;
1905         dns_dispatch_t *disp;
1906         isc_socket_t *sock = NULL;
1907         isc_socket_t *held[DNS_DISPATCH_HELD];
1908         unsigned int i = 0, j = 0, k = 0;
1909         isc_sockaddr_t localaddr_bound;
1910         in_port_t localport = 0;
1911
1912         /*
1913          * dispatch_allocate() checks mgr for us.
1914          */
1915         disp = NULL;
1916         result = dispatch_allocate(mgr, maxrequests, &disp);
1917         if (result != ISC_R_SUCCESS)
1918                 return (result);
1919
1920         /*
1921          * Try to allocate a socket that is not on the blacklist.
1922          * Hold up to DNS_DISPATCH_HELD sockets to prevent the OS
1923          * from returning the same port to us too quickly.
1924          */
1925         memset(held, 0, sizeof(held));
1926         localaddr_bound = *localaddr;
1927  getsocket:
1928         if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) {
1929                 in_port_t prt;
1930
1931                 /* XXX: should the range be configurable? */
1932                 prt = 1024 + dispatch_arc4uniformrandom(mgr, 65535 - 1023);
1933                 isc_sockaddr_setport(&localaddr_bound, prt);
1934                 if (blacklisted(mgr, NULL, &localaddr_bound)) {
1935                         if (++k == 1024)
1936                                 attributes &= ~DNS_DISPATCHATTR_RANDOMPORT;
1937                         goto getsocket;
1938                 }
1939                 result = create_socket(sockmgr, &localaddr_bound, 0, &sock);
1940                 if (result == ISC_R_ADDRINUSE) {
1941                         if (++k == 1024)
1942                                 attributes &= ~DNS_DISPATCHATTR_RANDOMPORT;
1943                         goto getsocket;
1944                 }
1945                 localport = prt;
1946         } else
1947                 result = create_socket(sockmgr, localaddr,
1948                                        ISC_SOCKET_REUSEADDRESS, &sock);
1949         if (result != ISC_R_SUCCESS)
1950                 goto deallocate_dispatch;
1951         if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) == 0 &&
1952             isc_sockaddr_getport(localaddr) == 0 &&
1953             blacklisted(mgr, sock, NULL))
1954         {
1955                 if (held[i] != NULL)
1956                         isc_socket_detach(&held[i]);
1957                 held[i++] = sock;
1958                 sock = NULL;
1959                 if (i == DNS_DISPATCH_HELD)
1960                         i = 0;
1961                 if (j++ == 0xffffU) {
1962                         mgr_log(mgr, ISC_LOG_ERROR, "avoid-v%s-udp-ports: "
1963                                 "unable to allocate a non-blacklisted port",
1964                                 isc_sockaddr_pf(localaddr) == AF_INET ?
1965                                         "4" : "6");
1966                         result = ISC_R_FAILURE;
1967                         goto deallocate_dispatch;
1968                 }
1969                 goto getsocket;
1970         }
1971
1972         disp->socktype = isc_sockettype_udp;
1973         disp->socket = sock;
1974         disp->local = *localaddr;
1975         disp->localport = localport;
1976
1977         disp->task = NULL;
1978         result = isc_task_create(taskmgr, 0, &disp->task);
1979         if (result != ISC_R_SUCCESS)
1980                 goto kill_socket;
1981
1982         disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
1983                                             DNS_EVENT_DISPATCHCONTROL,
1984                                             destroy_disp, disp,
1985                                             sizeof(isc_event_t));
1986         if (disp->ctlevent == NULL) {
1987                 result = ISC_R_NOMEMORY;
1988                 goto kill_task;
1989         }
1990
1991         isc_task_setname(disp->task, "udpdispatch", disp);
1992
1993         attributes &= ~DNS_DISPATCHATTR_TCP;
1994         attributes |= DNS_DISPATCHATTR_UDP;
1995         disp->attributes = attributes;
1996
1997         /*
1998          * Append it to the dispatcher list.
1999          */
2000         ISC_LIST_APPEND(mgr->list, disp, link);
2001
2002         mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
2003         dispatch_log(disp, LVL(90), "created task %p", disp->task);
2004         dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
2005
2006         *dispp = disp;
2007
2008         goto cleanheld;
2009
2010         /*
2011          * Error returns.
2012          */
2013  kill_task:
2014         isc_task_detach(&disp->task);
2015  kill_socket:
2016         isc_socket_detach(&disp->socket);
2017  deallocate_dispatch:
2018         dispatch_free(&disp);
2019  cleanheld:
2020         for (i = 0; i < DNS_DISPATCH_HELD; i++)
2021                 if (held[i] != NULL)
2022                         isc_socket_detach(&held[i]);
2023         return (result);
2024 }
2025
2026 void
2027 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
2028         REQUIRE(VALID_DISPATCH(disp));
2029         REQUIRE(dispp != NULL && *dispp == NULL);
2030
2031         LOCK(&disp->lock);
2032         disp->refcount++;
2033         UNLOCK(&disp->lock);
2034
2035         *dispp = disp;
2036 }
2037
2038 /*
2039  * It is important to lock the manager while we are deleting the dispatch,
2040  * since dns_dispatch_getudp will call dispatch_find, which returns to
2041  * the caller a dispatch but does not attach to it until later.  _getudp
2042  * locks the manager, however, so locking it here will keep us from attaching
2043  * to a dispatcher that is in the process of going away.
2044  */
2045 void
2046 dns_dispatch_detach(dns_dispatch_t **dispp) {
2047         dns_dispatch_t *disp;
2048         isc_boolean_t killit;
2049
2050         REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2051
2052         disp = *dispp;
2053         *dispp = NULL;
2054
2055         LOCK(&disp->lock);
2056
2057         INSIST(disp->refcount > 0);
2058         disp->refcount--;
2059         killit = ISC_FALSE;
2060         if (disp->refcount == 0) {
2061                 if (disp->recv_pending > 0)
2062                         isc_socket_cancel(disp->socket, disp->task,
2063                                           ISC_SOCKCANCEL_RECV);
2064                 disp->shutting_down = 1;
2065         }
2066
2067         dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
2068
2069         killit = destroy_disp_ok(disp);
2070         UNLOCK(&disp->lock);
2071         if (killit)
2072                 isc_task_send(disp->task, &disp->ctlevent);
2073 }
2074
2075 isc_result_t
2076 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
2077                          isc_task_t *task, isc_taskaction_t action, void *arg,
2078                          dns_messageid_t *idp, dns_dispentry_t **resp)
2079 {
2080         dns_dispentry_t *res;
2081         unsigned int bucket;
2082         dns_messageid_t id;
2083         int i;
2084         isc_boolean_t ok;
2085         dns_qid_t *qid;
2086
2087         REQUIRE(VALID_DISPATCH(disp));
2088         REQUIRE(task != NULL);
2089         REQUIRE(dest != NULL);
2090         REQUIRE(resp != NULL && *resp == NULL);
2091         REQUIRE(idp != NULL);
2092
2093         LOCK(&disp->lock);
2094
2095         if (disp->shutting_down == 1) {
2096                 UNLOCK(&disp->lock);
2097                 return (ISC_R_SHUTTINGDOWN);
2098         }
2099
2100         if (disp->requests >= disp->maxrequests) {
2101                 UNLOCK(&disp->lock);
2102                 return (ISC_R_QUOTA);
2103         }
2104
2105         /*
2106          * Try somewhat hard to find an unique ID.
2107          */
2108         id = (dns_messageid_t)dispatch_arc4random(disp->mgr);
2109         qid = DNS_QID(disp);
2110         LOCK(&qid->lock);
2111         bucket = dns_hash(qid, dest, id, disp->localport);
2112         ok = ISC_FALSE;
2113         for (i = 0; i < 64; i++) {
2114                 if (bucket_search(qid, dest, id, disp->localport, bucket) ==
2115                     NULL) {
2116                         ok = ISC_TRUE;
2117                         break;
2118                 }
2119                 id += qid->qid_increment;
2120                 id &= 0x0000ffff;
2121                 bucket = dns_hash(qid, dest, id, disp->localport);
2122         }
2123
2124         if (!ok) {
2125                 UNLOCK(&qid->lock);
2126                 UNLOCK(&disp->lock);
2127                 return (ISC_R_NOMORE);
2128         }
2129
2130         res = isc_mempool_get(disp->mgr->rpool);
2131         if (res == NULL) {
2132                 UNLOCK(&qid->lock);
2133                 UNLOCK(&disp->lock);
2134                 return (ISC_R_NOMEMORY);
2135         }
2136
2137         disp->refcount++;
2138         disp->requests++;
2139         res->task = NULL;
2140         isc_task_attach(task, &res->task);
2141         res->disp = disp;
2142         res->id = id;
2143         res->port = disp->localport;
2144         res->bucket = bucket;
2145         res->host = *dest;
2146         res->action = action;
2147         res->arg = arg;
2148         res->item_out = ISC_FALSE;
2149         ISC_LIST_INIT(res->items);
2150         ISC_LINK_INIT(res, link);
2151         res->magic = RESPONSE_MAGIC;
2152         ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
2153         UNLOCK(&qid->lock);
2154
2155         request_log(disp, res, LVL(90),
2156                     "attached to task %p", res->task);
2157
2158         if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
2159             ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0))
2160                 startrecv(disp);
2161
2162         UNLOCK(&disp->lock);
2163
2164         *idp = id;
2165         *resp = res;
2166
2167         return (ISC_R_SUCCESS);
2168 }
2169
2170 void
2171 dns_dispatch_starttcp(dns_dispatch_t *disp) {
2172
2173         REQUIRE(VALID_DISPATCH(disp));
2174
2175         dispatch_log(disp, LVL(90), "starttcp %p", disp->task);
2176
2177         LOCK(&disp->lock);
2178         disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
2179         startrecv(disp);
2180         UNLOCK(&disp->lock);
2181 }
2182
2183 void
2184 dns_dispatch_removeresponse(dns_dispentry_t **resp,
2185                             dns_dispatchevent_t **sockevent)
2186 {
2187         dns_dispatchmgr_t *mgr;
2188         dns_dispatch_t *disp;
2189         dns_dispentry_t *res;
2190         dns_dispatchevent_t *ev;
2191         unsigned int bucket;
2192         isc_boolean_t killit;
2193         unsigned int n;
2194         isc_eventlist_t events;
2195         dns_qid_t *qid;
2196
2197         REQUIRE(resp != NULL);
2198         REQUIRE(VALID_RESPONSE(*resp));
2199
2200         res = *resp;
2201         *resp = NULL;
2202
2203         disp = res->disp;
2204         REQUIRE(VALID_DISPATCH(disp));
2205         mgr = disp->mgr;
2206         REQUIRE(VALID_DISPATCHMGR(mgr));
2207
2208         qid = DNS_QID(disp);
2209
2210         if (sockevent != NULL) {
2211                 REQUIRE(*sockevent != NULL);
2212                 ev = *sockevent;
2213                 *sockevent = NULL;
2214         } else {
2215                 ev = NULL;
2216         }
2217
2218         LOCK(&disp->lock);
2219
2220         INSIST(disp->requests > 0);
2221         disp->requests--;
2222         INSIST(disp->refcount > 0);
2223         disp->refcount--;
2224         killit = ISC_FALSE;
2225         if (disp->refcount == 0) {
2226                 if (disp->recv_pending > 0)
2227                         isc_socket_cancel(disp->socket, disp->task,
2228                                           ISC_SOCKCANCEL_RECV);
2229                 disp->shutting_down = 1;
2230         }
2231
2232         bucket = res->bucket;
2233
2234         LOCK(&qid->lock);
2235         ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
2236         UNLOCK(&qid->lock);
2237
2238         if (ev == NULL && res->item_out) {
2239                 /*
2240                  * We've posted our event, but the caller hasn't gotten it
2241                  * yet.  Take it back.
2242                  */
2243                 ISC_LIST_INIT(events);
2244                 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
2245                                     NULL, &events);
2246                 /*
2247                  * We had better have gotten it back.
2248                  */
2249                 INSIST(n == 1);
2250                 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
2251         }
2252
2253         if (ev != NULL) {
2254                 REQUIRE(res->item_out == ISC_TRUE);
2255                 res->item_out = ISC_FALSE;
2256                 if (ev->buffer.base != NULL)
2257                         free_buffer(disp, ev->buffer.base, ev->buffer.length);
2258                 free_event(disp, ev);
2259         }
2260
2261         request_log(disp, res, LVL(90), "detaching from task %p", res->task);
2262         isc_task_detach(&res->task);
2263
2264         /*
2265          * Free any buffered requests as well
2266          */
2267         ev = ISC_LIST_HEAD(res->items);
2268         while (ev != NULL) {
2269                 ISC_LIST_UNLINK(res->items, ev, ev_link);
2270                 if (ev->buffer.base != NULL)
2271                         free_buffer(disp, ev->buffer.base, ev->buffer.length);
2272                 free_event(disp, ev);
2273                 ev = ISC_LIST_HEAD(res->items);
2274         }
2275         res->magic = 0;
2276         isc_mempool_put(disp->mgr->rpool, res);
2277         if (disp->shutting_down == 1)
2278                 do_cancel(disp);
2279         else
2280                 startrecv(disp);
2281
2282         killit = destroy_disp_ok(disp);
2283         UNLOCK(&disp->lock);
2284         if (killit)
2285                 isc_task_send(disp->task, &disp->ctlevent);
2286 }
2287
2288 static void
2289 do_cancel(dns_dispatch_t *disp) {
2290         dns_dispatchevent_t *ev;
2291         dns_dispentry_t *resp;
2292         dns_qid_t *qid;
2293
2294         if (disp->shutdown_out == 1)
2295                 return;
2296
2297         qid = DNS_QID(disp);
2298
2299         /*
2300          * Search for the first response handler without packets outstanding.
2301          */
2302         LOCK(&qid->lock);
2303         for (resp = linear_first(qid);
2304              resp != NULL && resp->item_out != ISC_FALSE;
2305              /* Empty. */)
2306                 resp = linear_next(qid, resp);
2307         /*
2308          * No one to send the cancel event to, so nothing to do.
2309          */
2310         if (resp == NULL)
2311                 goto unlock;
2312
2313         /*
2314          * Send the shutdown failsafe event to this resp.
2315          */
2316         ev = disp->failsafe_ev;
2317         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
2318                        resp->action, resp->arg, resp, NULL, NULL);
2319         ev->result = disp->shutdown_why;
2320         ev->buffer.base = NULL;
2321         ev->buffer.length = 0;
2322         disp->shutdown_out = 1;
2323         request_log(disp, resp, LVL(10),
2324                     "cancel: failsafe event %p -> task %p",
2325                     ev, resp->task);
2326         resp->item_out = ISC_TRUE;
2327         isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
2328  unlock:
2329         UNLOCK(&qid->lock);
2330 }
2331
2332 isc_socket_t *
2333 dns_dispatch_getsocket(dns_dispatch_t *disp) {
2334         REQUIRE(VALID_DISPATCH(disp));
2335
2336         return (disp->socket);
2337 }
2338
2339 isc_result_t
2340 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
2341
2342         REQUIRE(VALID_DISPATCH(disp));
2343         REQUIRE(addrp != NULL);
2344
2345         if (disp->socktype == isc_sockettype_udp) {
2346                 *addrp = disp->local;
2347                 return (ISC_R_SUCCESS);
2348         }
2349         return (ISC_R_NOTIMPLEMENTED);
2350 }
2351
2352 void
2353 dns_dispatch_cancel(dns_dispatch_t *disp) {
2354         REQUIRE(VALID_DISPATCH(disp));
2355
2356         LOCK(&disp->lock);
2357
2358         if (disp->shutting_down == 1) {
2359                 UNLOCK(&disp->lock);
2360                 return;
2361         }
2362
2363         disp->shutdown_why = ISC_R_CANCELED;
2364         disp->shutting_down = 1;
2365         do_cancel(disp);
2366
2367         UNLOCK(&disp->lock);
2368
2369         return;
2370 }
2371
2372 void
2373 dns_dispatch_changeattributes(dns_dispatch_t *disp,
2374                               unsigned int attributes, unsigned int mask)
2375 {
2376         REQUIRE(VALID_DISPATCH(disp));
2377
2378         /* XXXMLG
2379          * Should check for valid attributes here!
2380          */
2381
2382         LOCK(&disp->lock);
2383
2384         if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
2385                 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
2386                     (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
2387                         disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
2388                         startrecv(disp);
2389                 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
2390                            == 0 &&
2391                            (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
2392                         disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2393                         if (disp->recv_pending != 0)
2394                                 isc_socket_cancel(disp->socket, disp->task,
2395                                                   ISC_SOCKCANCEL_RECV);
2396                 }
2397         }
2398
2399         disp->attributes &= ~mask;
2400         disp->attributes |= (attributes & mask);
2401         UNLOCK(&disp->lock);
2402 }
2403
2404 void
2405 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
2406         void *buf;
2407         isc_socketevent_t *sevent, *newsevent;
2408
2409         REQUIRE(VALID_DISPATCH(disp));
2410         REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
2411         REQUIRE(event != NULL);
2412
2413         sevent = (isc_socketevent_t *)event;
2414
2415         INSIST(sevent->n <= disp->mgr->buffersize);
2416         newsevent = (isc_socketevent_t *)
2417                     isc_event_allocate(disp->mgr->mctx, NULL,
2418                                       DNS_EVENT_IMPORTRECVDONE, udp_recv,
2419                                       disp, sizeof(isc_socketevent_t));
2420         if (newsevent == NULL)
2421                 return;
2422
2423         buf = allocate_udp_buffer(disp);
2424         if (buf == NULL) {
2425                 isc_event_free(ISC_EVENT_PTR(&newsevent));
2426                 return;
2427         }
2428         memcpy(buf, sevent->region.base, sevent->n);
2429         newsevent->region.base = buf;
2430         newsevent->region.length = disp->mgr->buffersize;
2431         newsevent->n = sevent->n;
2432         newsevent->result = sevent->result;
2433         newsevent->address = sevent->address;
2434         newsevent->timestamp = sevent->timestamp;
2435         newsevent->pktinfo = sevent->pktinfo;
2436         newsevent->attributes = sevent->attributes;
2437         
2438         isc_task_send(disp->task, ISC_EVENT_PTR(&newsevent));
2439 }
2440
2441 #if 0
2442 void
2443 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
2444         dns_dispatch_t *disp;
2445         char foo[1024];
2446
2447         disp = ISC_LIST_HEAD(mgr->list);
2448         while (disp != NULL) {
2449                 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
2450                 printf("\tdispatch %p, addr %s\n", disp, foo);
2451                 disp = ISC_LIST_NEXT(disp, link);
2452         }
2453 }
2454 #endif