]> CyberLeo.Net >> Repos - FreeBSD/releng/9.3.git/blob - contrib/bind9/lib/dns/dispatch.c
Copy stable/9 to releng/9.3 as part of the 9.3-RELEASE cycle.
[FreeBSD/releng/9.3.git] / contrib / bind9 / lib / dns / dispatch.c
1 /*
2  * Copyright (C) 2004-2009, 2011-2014  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: dispatch.c,v 1.175 2011/11/29 01:03:47 marka Exp $ */
19
20 /*! \file */
21
22 #include <config.h>
23
24 #include <stdlib.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 #include <stdlib.h>
28
29 #include <isc/entropy.h>
30 #include <isc/mem.h>
31 #include <isc/mutex.h>
32 #include <isc/portset.h>
33 #include <isc/print.h>
34 #include <isc/random.h>
35 #include <isc/socket.h>
36 #include <isc/stats.h>
37 #include <isc/string.h>
38 #include <isc/task.h>
39 #include <isc/time.h>
40 #include <isc/util.h>
41
42 #include <dns/acl.h>
43 #include <dns/dispatch.h>
44 #include <dns/events.h>
45 #include <dns/log.h>
46 #include <dns/message.h>
47 #include <dns/portlist.h>
48 #include <dns/stats.h>
49 #include <dns/tcpmsg.h>
50 #include <dns/types.h>
51
52 typedef ISC_LIST(dns_dispentry_t)       dns_displist_t;
53
54 typedef struct dispsocket               dispsocket_t;
55 typedef ISC_LIST(dispsocket_t)          dispsocketlist_t;
56
57 typedef struct dispportentry            dispportentry_t;
58 typedef ISC_LIST(dispportentry_t)       dispportlist_t;
59
60 /* ARC4 Random generator state */
61 typedef struct arc4ctx {
62         isc_uint8_t     i;
63         isc_uint8_t     j;
64         isc_uint8_t     s[256];
65         int             count;
66         isc_entropy_t   *entropy;       /*%< entropy source for ARC4 */
67         isc_mutex_t     *lock;
68 } arc4ctx_t;
69
70 typedef struct dns_qid {
71         unsigned int    magic;
72         unsigned int    qid_nbuckets;   /*%< hash table size */
73         unsigned int    qid_increment;  /*%< id increment on collision */
74         isc_mutex_t     lock;
75         dns_displist_t  *qid_table;     /*%< the table itself */
76         dispsocketlist_t *sock_table;   /*%< socket table */
77 } dns_qid_t;
78
79 struct dns_dispatchmgr {
80         /* Unlocked. */
81         unsigned int                    magic;
82         isc_mem_t                      *mctx;
83         dns_acl_t                      *blackhole;
84         dns_portlist_t                 *portlist;
85         isc_stats_t                    *stats;
86         isc_entropy_t                  *entropy; /*%< entropy source */
87
88         /* Locked by "lock". */
89         isc_mutex_t                     lock;
90         unsigned int                    state;
91         ISC_LIST(dns_dispatch_t)        list;
92
93         /* Locked by arc4_lock. */
94         isc_mutex_t                     arc4_lock;
95         arc4ctx_t                       arc4ctx;    /*%< ARC4 context for QID */
96
97         /* locked by buffer lock */
98         dns_qid_t                       *qid;
99         isc_mutex_t                     buffer_lock;
100         unsigned int                    buffers;    /*%< allocated buffers */
101         unsigned int                    buffersize; /*%< size of each buffer */
102         unsigned int                    maxbuffers; /*%< max buffers */
103
104         /* Locked internally. */
105         isc_mutex_t                     depool_lock;
106         isc_mempool_t                  *depool; /*%< pool for dispatch events */
107         isc_mutex_t                     rpool_lock;
108         isc_mempool_t                  *rpool;  /*%< pool for replies */
109         isc_mutex_t                     dpool_lock;
110         isc_mempool_t                  *dpool;  /*%< dispatch allocations */
111         isc_mutex_t                     bpool_lock;
112         isc_mempool_t                  *bpool;  /*%< pool for buffers */
113         isc_mutex_t                     spool_lock;
114         isc_mempool_t                  *spool;  /*%< pool for dispsocks */
115
116         /*%
117          * Locked by qid->lock if qid exists; otherwise, can be used without
118          * being locked.
119          * Memory footprint considerations: this is a simple implementation of
120          * available ports, i.e., an ordered array of the actual port numbers.
121          * This will require about 256KB of memory in the worst case (128KB for
122          * each of IPv4 and IPv6).  We could reduce it by representing it as a
123          * more sophisticated way such as a list (or array) of ranges that are
124          * searched to identify a specific port.  Our decision here is the saved
125          * memory isn't worth the implementation complexity, considering the
126          * fact that the whole BIND9 process (which is mainly named) already
127          * requires a pretty large memory footprint.  We may, however, have to
128          * revisit the decision when we want to use it as a separate module for
129          * an environment where memory requirement is severer.
130          */
131         in_port_t       *v4ports;       /*%< available ports for IPv4 */
132         unsigned int    nv4ports;       /*%< # of available ports for IPv4 */
133         in_port_t       *v6ports;       /*%< available ports for IPv4 */
134         unsigned int    nv6ports;       /*%< # of available ports for IPv4 */
135 };
136
137 #define MGR_SHUTTINGDOWN                0x00000001U
138 #define MGR_IS_SHUTTINGDOWN(l)  (((l)->state & MGR_SHUTTINGDOWN) != 0)
139
140 #define IS_PRIVATE(d)   (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
141
142 struct dns_dispentry {
143         unsigned int                    magic;
144         dns_dispatch_t                 *disp;
145         dns_messageid_t                 id;
146         in_port_t                       port;
147         unsigned int                    bucket;
148         isc_sockaddr_t                  host;
149         isc_task_t                     *task;
150         isc_taskaction_t                action;
151         void                           *arg;
152         isc_boolean_t                   item_out;
153         dispsocket_t                    *dispsocket;
154         ISC_LIST(dns_dispatchevent_t)   items;
155         ISC_LINK(dns_dispentry_t)       link;
156 };
157
158 /*%
159  * Maximum number of dispatch sockets that can be pooled for reuse.  The
160  * appropriate value may vary, but experiments have shown a busy caching server
161  * may need more than 1000 sockets concurrently opened.  The maximum allowable
162  * number of dispatch sockets (per manager) will be set to the double of this
163  * value.
164  */
165 #ifndef DNS_DISPATCH_POOLSOCKS
166 #define DNS_DISPATCH_POOLSOCKS                  2048
167 #endif
168
169 /*%
170  * Quota to control the number of dispatch sockets.  If a dispatch has more
171  * than the quota of sockets, new queries will purge oldest ones, so that
172  * a massive number of outstanding queries won't prevent subsequent queries
173  * (especially if the older ones take longer time and result in timeout).
174  */
175 #ifndef DNS_DISPATCH_SOCKSQUOTA
176 #define DNS_DISPATCH_SOCKSQUOTA                 3072
177 #endif
178
179 struct dispsocket {
180         unsigned int                    magic;
181         isc_socket_t                    *socket;
182         dns_dispatch_t                  *disp;
183         isc_sockaddr_t                  host;
184         in_port_t                       localport; /* XXX: should be removed later */
185         dispportentry_t                 *portentry;
186         dns_dispentry_t                 *resp;
187         isc_task_t                      *task;
188         ISC_LINK(dispsocket_t)          link;
189         unsigned int                    bucket;
190         ISC_LINK(dispsocket_t)          blink;
191 };
192
193 /*%
194  * A port table entry.  We remember every port we first open in a table with a
195  * reference counter so that we can 'reuse' the same port (with different
196  * destination addresses) using the SO_REUSEADDR socket option.
197  */
198 struct dispportentry {
199         in_port_t                       port;
200         unsigned int                    refs;
201         ISC_LINK(struct dispportentry)  link;
202 };
203
204 #ifndef DNS_DISPATCH_PORTTABLESIZE
205 #define DNS_DISPATCH_PORTTABLESIZE      1024
206 #endif
207
208 #define INVALID_BUCKET          (0xffffdead)
209
210 /*%
211  * Number of tasks for each dispatch that use separate sockets for different
212  * transactions.  This must be a power of 2 as it will divide 32 bit numbers
213  * to get an uniformly random tasks selection.  See get_dispsocket().
214  */
215 #define MAX_INTERNAL_TASKS      64
216
217 struct dns_dispatch {
218         /* Unlocked. */
219         unsigned int            magic;          /*%< magic */
220         dns_dispatchmgr_t      *mgr;            /*%< dispatch manager */
221         int                     ntasks;
222         /*%
223          * internal task buckets.  We use multiple tasks to distribute various
224          * socket events well when using separate dispatch sockets.  We use the
225          * 1st task (task[0]) for internal control events.
226          */
227         isc_task_t             *task[MAX_INTERNAL_TASKS];
228         isc_socket_t           *socket;         /*%< isc socket attached to */
229         isc_sockaddr_t          local;          /*%< local address */
230         in_port_t               localport;      /*%< local UDP port */
231         unsigned int            maxrequests;    /*%< max requests */
232         isc_event_t            *ctlevent;
233
234         isc_mutex_t             sepool_lock;
235         isc_mempool_t          *sepool;         /*%< pool for socket events */
236
237         /*% Locked by mgr->lock. */
238         ISC_LINK(dns_dispatch_t) link;
239
240         /* Locked by "lock". */
241         isc_mutex_t             lock;           /*%< locks all below */
242         isc_sockettype_t        socktype;
243         unsigned int            attributes;
244         unsigned int            refcount;       /*%< number of users */
245         dns_dispatchevent_t    *failsafe_ev;    /*%< failsafe cancel event */
246         unsigned int            shutting_down : 1,
247                                 shutdown_out : 1,
248                                 connected : 1,
249                                 tcpmsg_valid : 1,
250                                 recv_pending : 1; /*%< is a recv() pending? */
251         isc_result_t            shutdown_why;
252         ISC_LIST(dispsocket_t)  activesockets;
253         ISC_LIST(dispsocket_t)  inactivesockets;
254         unsigned int            nsockets;
255         unsigned int            requests;       /*%< how many requests we have */
256         unsigned int            tcpbuffers;     /*%< allocated buffers */
257         dns_tcpmsg_t            tcpmsg;         /*%< for tcp streams */
258         dns_qid_t               *qid;
259         arc4ctx_t               arc4ctx;        /*%< for QID/UDP port num */
260         dispportlist_t          *port_table;    /*%< hold ports 'owned' by us */
261         isc_mempool_t           *portpool;      /*%< port table entries  */
262 };
263
264 #define QID_MAGIC               ISC_MAGIC('Q', 'i', 'd', ' ')
265 #define VALID_QID(e)            ISC_MAGIC_VALID((e), QID_MAGIC)
266
267 #define RESPONSE_MAGIC          ISC_MAGIC('D', 'r', 's', 'p')
268 #define VALID_RESPONSE(e)       ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
269
270 #define DISPSOCK_MAGIC          ISC_MAGIC('D', 's', 'o', 'c')
271 #define VALID_DISPSOCK(e)       ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
272
273 #define DISPATCH_MAGIC          ISC_MAGIC('D', 'i', 's', 'p')
274 #define VALID_DISPATCH(e)       ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
275
276 #define DNS_DISPATCHMGR_MAGIC   ISC_MAGIC('D', 'M', 'g', 'r')
277 #define VALID_DISPATCHMGR(e)    ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
278
279 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
280                        (disp)->qid : (disp)->mgr->qid
281 #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
282                         (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
283
284 /*%
285  * Locking a query port buffer is a bit tricky.  We access the buffer without
286  * locking until qid is created.  Technically, there is a possibility of race
287  * between the creation of qid and access to the port buffer; in practice,
288  * however, this should be safe because qid isn't created until the first
289  * dispatch is created and there should be no contending situation until then.
290  */
291 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
292 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
293
294 /*
295  * Statics.
296  */
297 static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
298                                      dns_messageid_t, in_port_t, unsigned int);
299 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
300 static void destroy_disp(isc_task_t *task, isc_event_t *event);
301 static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
302 static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
303 static void udp_exrecv(isc_task_t *, isc_event_t *);
304 static void udp_shrecv(isc_task_t *, isc_event_t *);
305 static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
306 static void tcp_recv(isc_task_t *, isc_event_t *);
307 static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
308 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
309                              in_port_t);
310 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
311 static void *allocate_udp_buffer(dns_dispatch_t *disp);
312 static inline void free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
313 static inline dns_dispatchevent_t *allocate_devent(dns_dispatch_t *disp);
314 static void do_cancel(dns_dispatch_t *disp);
315 static dns_dispentry_t *linear_first(dns_qid_t *disp);
316 static dns_dispentry_t *linear_next(dns_qid_t *disp,
317                                     dns_dispentry_t *resp);
318 static void dispatch_free(dns_dispatch_t **dispp);
319 static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
320                                   dns_dispatch_t *disp,
321                                   isc_socketmgr_t *sockmgr,
322                                   isc_sockaddr_t *localaddr,
323                                   isc_socket_t **sockp,
324                                   isc_socket_t *dup_socket);
325 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
326                                        isc_socketmgr_t *sockmgr,
327                                        isc_taskmgr_t *taskmgr,
328                                        isc_sockaddr_t *localaddr,
329                                        unsigned int maxrequests,
330                                        unsigned int attributes,
331                                        dns_dispatch_t **dispp,
332                                        isc_socket_t *dup_socket);
333 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
334 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
335 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
336                                  unsigned int increment, dns_qid_t **qidp,
337                                  isc_boolean_t needaddrtable);
338 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
339 static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
340                                 unsigned int options, isc_socket_t **sockp,
341                                 isc_socket_t *dup_socket);
342 static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
343                                    isc_sockaddr_t *sockaddrp);
344
345 #define LVL(x) ISC_LOG_DEBUG(x)
346
347 static void
348 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
349      ISC_FORMAT_PRINTF(3, 4);
350
351 static void
352 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
353         char msgbuf[2048];
354         va_list ap;
355
356         if (! isc_log_wouldlog(dns_lctx, level))
357                 return;
358
359         va_start(ap, fmt);
360         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
361         va_end(ap);
362
363         isc_log_write(dns_lctx,
364                       DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
365                       level, "dispatchmgr %p: %s", mgr, msgbuf);
366 }
367
368 static inline void
369 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
370         if (mgr->stats != NULL)
371                 isc_stats_increment(mgr->stats, counter);
372 }
373
374 static void
375 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
376      ISC_FORMAT_PRINTF(3, 4);
377
378 static void
379 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
380         char msgbuf[2048];
381         va_list ap;
382
383         if (! isc_log_wouldlog(dns_lctx, level))
384                 return;
385
386         va_start(ap, fmt);
387         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
388         va_end(ap);
389
390         isc_log_write(dns_lctx,
391                       DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
392                       level, "dispatch %p: %s", disp, msgbuf);
393 }
394
395 static void
396 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
397             int level, const char *fmt, ...)
398      ISC_FORMAT_PRINTF(4, 5);
399
400 static void
401 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
402             int level, const char *fmt, ...)
403 {
404         char msgbuf[2048];
405         char peerbuf[256];
406         va_list ap;
407
408         if (! isc_log_wouldlog(dns_lctx, level))
409                 return;
410
411         va_start(ap, fmt);
412         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
413         va_end(ap);
414
415         if (VALID_RESPONSE(resp)) {
416                 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
417                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
418                               DNS_LOGMODULE_DISPATCH, level,
419                               "dispatch %p response %p %s: %s", disp, resp,
420                               peerbuf, msgbuf);
421         } else {
422                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
423                               DNS_LOGMODULE_DISPATCH, level,
424                               "dispatch %p req/resp %p: %s", disp, resp,
425                               msgbuf);
426         }
427 }
428
429 /*%
430  * ARC4 random number generator derived from OpenBSD.
431  * Only dispatch_random() and dispatch_uniformrandom() are expected
432  * to be called from general dispatch routines; the rest of them are subroutines
433  * for these two.
434  *
435  * The original copyright follows:
436  * Copyright (c) 1996, David Mazieres <dm@uun.org>
437  * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
438  *
439  * Permission to use, copy, modify, and distribute this software for any
440  * purpose with or without fee is hereby granted, provided that the above
441  * copyright notice and this permission notice appear in all copies.
442  *
443  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
444  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
445  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
446  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
447  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
448  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
449  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
450  */
451 #ifdef BIND9
452 static void
453 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
454                     isc_mutex_t *lock)
455 {
456         int n;
457         for (n = 0; n < 256; n++)
458                 actx->s[n] = n;
459         actx->i = 0;
460         actx->j = 0;
461         actx->count = 0;
462         actx->entropy = entropy; /* don't have to attach */
463         actx->lock = lock;
464 }
465
466 static void
467 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
468         int n;
469         isc_uint8_t si;
470
471         actx->i--;
472         for (n = 0; n < 256; n++) {
473                 actx->i = (actx->i + 1);
474                 si = actx->s[actx->i];
475                 actx->j = (actx->j + si + dat[n % datlen]);
476                 actx->s[actx->i] = actx->s[actx->j];
477                 actx->s[actx->j] = si;
478         }
479         actx->j = actx->i;
480 }
481
482 static inline isc_uint8_t
483 dispatch_arc4get8(arc4ctx_t *actx) {
484         isc_uint8_t si, sj;
485
486         actx->i = (actx->i + 1);
487         si = actx->s[actx->i];
488         actx->j = (actx->j + si);
489         sj = actx->s[actx->j];
490         actx->s[actx->i] = sj;
491         actx->s[actx->j] = si;
492
493         return (actx->s[(si + sj) & 0xff]);
494 }
495
496 static inline isc_uint16_t
497 dispatch_arc4get16(arc4ctx_t *actx) {
498         isc_uint16_t val;
499
500         val = dispatch_arc4get8(actx) << 8;
501         val |= dispatch_arc4get8(actx);
502
503         return (val);
504 }
505
506 static void
507 dispatch_arc4stir(arc4ctx_t *actx) {
508         int i;
509         union {
510                 unsigned char rnd[128];
511                 isc_uint32_t rnd32[32];
512         } rnd;
513         isc_result_t result;
514
515         if (actx->entropy != NULL) {
516                 /*
517                  * We accept any quality of random data to avoid blocking.
518                  */
519                 result = isc_entropy_getdata(actx->entropy, rnd.rnd,
520                                              sizeof(rnd), NULL, 0);
521                 RUNTIME_CHECK(result == ISC_R_SUCCESS);
522         } else {
523                 for (i = 0; i < 32; i++)
524                         isc_random_get(&rnd.rnd32[i]);
525         }
526         dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
527
528         /*
529          * Discard early keystream, as per recommendations in:
530          * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
531          */
532         for (i = 0; i < 256; i++)
533                 (void)dispatch_arc4get8(actx);
534
535         /*
536          * Derived from OpenBSD's implementation.  The rationale is not clear,
537          * but should be conservative enough in safety, and reasonably large
538          * for efficiency.
539          */
540         actx->count = 1600000;
541 }
542
543 static isc_uint16_t
544 dispatch_random(arc4ctx_t *actx) {
545         isc_uint16_t result;
546
547         if (actx->lock != NULL)
548                 LOCK(actx->lock);
549
550         actx->count -= sizeof(isc_uint16_t);
551         if (actx->count <= 0)
552                 dispatch_arc4stir(actx);
553         result = dispatch_arc4get16(actx);
554
555         if (actx->lock != NULL)
556                 UNLOCK(actx->lock);
557
558         return (result);
559 }
560 #else
561 /*
562  * For general purpose library, we don't have to be too strict about the
563  * quality of random values.  Performance doesn't matter much, either.
564  * So we simply use the isc_random module to keep the library as small as
565  * possible.
566  */
567
568 static void
569 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
570                     isc_mutex_t *lock)
571 {
572         UNUSED(actx);
573         UNUSED(entropy);
574         UNUSED(lock);
575
576         return;
577 }
578
579 static isc_uint16_t
580 dispatch_random(arc4ctx_t *actx) {
581         isc_uint32_t r;
582
583         UNUSED(actx);
584
585         isc_random_get(&r);
586         return (r & 0xffff);
587 }
588 #endif  /* BIND9 */
589
590 static isc_uint16_t
591 dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
592         isc_uint16_t min, r;
593
594         if (upper_bound < 2)
595                 return (0);
596
597         /*
598          * Ensure the range of random numbers [min, 0xffff] be a multiple of
599          * upper_bound and contain at least a half of the 16 bit range.
600          */
601
602         if (upper_bound > 0x8000)
603                 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
604         else
605                 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
606
607         /*
608          * This could theoretically loop forever but each retry has
609          * p > 0.5 (worst case, usually far better) of selecting a
610          * number inside the range we need, so it should rarely need
611          * to re-roll.
612          */
613         for (;;) {
614                 r = dispatch_random(actx);
615                 if (r >= min)
616                         break;
617         }
618
619         return (r % upper_bound);
620 }
621
622 /*
623  * Return a hash of the destination and message id.
624  */
625 static isc_uint32_t
626 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
627          in_port_t port)
628 {
629         unsigned int ret;
630
631         ret = isc_sockaddr_hash(dest, ISC_TRUE);
632         ret ^= (id << 16) | port;
633         ret %= qid->qid_nbuckets;
634
635         INSIST(ret < qid->qid_nbuckets);
636
637         return (ret);
638 }
639
640 /*
641  * Find the first entry in 'qid'.  Returns NULL if there are no entries.
642  */
643 static dns_dispentry_t *
644 linear_first(dns_qid_t *qid) {
645         dns_dispentry_t *ret;
646         unsigned int bucket;
647
648         bucket = 0;
649
650         while (bucket < qid->qid_nbuckets) {
651                 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
652                 if (ret != NULL)
653                         return (ret);
654                 bucket++;
655         }
656
657         return (NULL);
658 }
659
660 /*
661  * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
662  * no more entries.
663  */
664 static dns_dispentry_t *
665 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
666         dns_dispentry_t *ret;
667         unsigned int bucket;
668
669         ret = ISC_LIST_NEXT(resp, link);
670         if (ret != NULL)
671                 return (ret);
672
673         bucket = resp->bucket;
674         bucket++;
675         while (bucket < qid->qid_nbuckets) {
676                 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
677                 if (ret != NULL)
678                         return (ret);
679                 bucket++;
680         }
681
682         return (NULL);
683 }
684
685 /*
686  * The dispatch must be locked.
687  */
688 static isc_boolean_t
689 destroy_disp_ok(dns_dispatch_t *disp)
690 {
691         if (disp->refcount != 0)
692                 return (ISC_FALSE);
693
694         if (disp->recv_pending != 0)
695                 return (ISC_FALSE);
696
697         if (!ISC_LIST_EMPTY(disp->activesockets))
698                 return (ISC_FALSE);
699
700         if (disp->shutting_down == 0)
701                 return (ISC_FALSE);
702
703         return (ISC_TRUE);
704 }
705
706 /*
707  * Called when refcount reaches 0 (and safe to destroy).
708  *
709  * The dispatcher must be locked.
710  * The manager must not be locked.
711  */
712 static void
713 destroy_disp(isc_task_t *task, isc_event_t *event) {
714         dns_dispatch_t *disp;
715         dns_dispatchmgr_t *mgr;
716         isc_boolean_t killmgr;
717         dispsocket_t *dispsocket;
718         int i;
719
720         INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
721
722         UNUSED(task);
723
724         disp = event->ev_arg;
725         mgr = disp->mgr;
726
727         LOCK(&mgr->lock);
728         ISC_LIST_UNLINK(mgr->list, disp, link);
729
730         dispatch_log(disp, LVL(90),
731                      "shutting down; detaching from sock %p, task %p",
732                      disp->socket, disp->task[0]); /* XXXX */
733
734         if (disp->sepool != NULL) {
735                 isc_mempool_destroy(&disp->sepool);
736                 (void)isc_mutex_destroy(&disp->sepool_lock);
737         }
738
739         if (disp->socket != NULL)
740                 isc_socket_detach(&disp->socket);
741         while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
742                 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
743                 destroy_dispsocket(disp, &dispsocket);
744         }
745         for (i = 0; i < disp->ntasks; i++)
746                 isc_task_detach(&disp->task[i]);
747         isc_event_free(&event);
748
749         dispatch_free(&disp);
750
751         killmgr = destroy_mgr_ok(mgr);
752         UNLOCK(&mgr->lock);
753         if (killmgr)
754                 destroy_mgr(&mgr);
755 }
756
757 /*%
758  * Manipulate port table per dispatch: find an entry for a given port number,
759  * create a new entry, and decrement a given entry with possible clean-up.
760  */
761 static dispportentry_t *
762 port_search(dns_dispatch_t *disp, in_port_t port) {
763         dispportentry_t *portentry;
764
765         REQUIRE(disp->port_table != NULL);
766
767         portentry = ISC_LIST_HEAD(disp->port_table[port %
768                                                    DNS_DISPATCH_PORTTABLESIZE]);
769         while (portentry != NULL) {
770                 if (portentry->port == port)
771                         return (portentry);
772                 portentry = ISC_LIST_NEXT(portentry, link);
773         }
774
775         return (NULL);
776 }
777
778 static dispportentry_t *
779 new_portentry(dns_dispatch_t *disp, in_port_t port) {
780         dispportentry_t *portentry;
781         dns_qid_t *qid;
782
783         REQUIRE(disp->port_table != NULL);
784
785         portentry = isc_mempool_get(disp->portpool);
786         if (portentry == NULL)
787                 return (portentry);
788
789         portentry->port = port;
790         portentry->refs = 1;
791         ISC_LINK_INIT(portentry, link);
792         qid = DNS_QID(disp);
793         LOCK(&qid->lock);
794         ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
795                         portentry, link);
796         UNLOCK(&qid->lock);
797
798         return (portentry);
799 }
800
801 /*%
802  * The caller must not hold the qid->lock.
803  */
804 static void
805 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
806         dispportentry_t *portentry = *portentryp;
807         dns_qid_t *qid;
808
809         REQUIRE(disp->port_table != NULL);
810         REQUIRE(portentry != NULL && portentry->refs > 0);
811
812         qid = DNS_QID(disp);
813         LOCK(&qid->lock);
814         portentry->refs--;
815
816         if (portentry->refs == 0) {
817                 ISC_LIST_UNLINK(disp->port_table[portentry->port %
818                                                  DNS_DISPATCH_PORTTABLESIZE],
819                                 portentry, link);
820                 isc_mempool_put(disp->portpool, portentry);
821         }
822         UNLOCK(&qid->lock);
823
824         *portentryp = NULL;
825 }
826
827 /*%
828  * Find a dispsocket for socket address 'dest', and port number 'port'.
829  * Return NULL if no such entry exists.
830  */
831 static dispsocket_t *
832 socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
833               unsigned int bucket)
834 {
835         dispsocket_t *dispsock;
836
837         REQUIRE(VALID_QID(qid));
838         REQUIRE(bucket < qid->qid_nbuckets);
839
840         dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
841
842         while (dispsock != NULL) {
843                 if (dispsock->portentry != NULL &&
844                     dispsock->portentry->port == port &&
845                     isc_sockaddr_equal(dest, &dispsock->host))
846                         return (dispsock);
847                 dispsock = ISC_LIST_NEXT(dispsock, blink);
848         }
849
850         return (NULL);
851 }
852
853 /*%
854  * Make a new socket for a single dispatch with a random port number.
855  * The caller must hold the disp->lock
856  */
857 static isc_result_t
858 get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
859                isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp,
860                in_port_t *portp)
861 {
862         int i;
863         isc_uint32_t r;
864         dns_dispatchmgr_t *mgr = disp->mgr;
865         isc_socket_t *sock = NULL;
866         isc_result_t result = ISC_R_FAILURE;
867         in_port_t port;
868         isc_sockaddr_t localaddr;
869         unsigned int bucket = 0;
870         dispsocket_t *dispsock;
871         unsigned int nports;
872         in_port_t *ports;
873         unsigned int bindoptions;
874         dispportentry_t *portentry = NULL;
875         dns_qid_t *qid;
876
877         if (isc_sockaddr_pf(&disp->local) == AF_INET) {
878                 nports = disp->mgr->nv4ports;
879                 ports = disp->mgr->v4ports;
880         } else {
881                 nports = disp->mgr->nv6ports;
882                 ports = disp->mgr->v6ports;
883         }
884         if (nports == 0)
885                 return (ISC_R_ADDRNOTAVAIL);
886
887         dispsock = ISC_LIST_HEAD(disp->inactivesockets);
888         if (dispsock != NULL) {
889                 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
890                 sock = dispsock->socket;
891                 dispsock->socket = NULL;
892         } else {
893                 dispsock = isc_mempool_get(mgr->spool);
894                 if (dispsock == NULL)
895                         return (ISC_R_NOMEMORY);
896
897                 disp->nsockets++;
898                 dispsock->socket = NULL;
899                 dispsock->disp = disp;
900                 dispsock->resp = NULL;
901                 dispsock->portentry = NULL;
902                 isc_random_get(&r);
903                 dispsock->task = NULL;
904                 isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
905                 ISC_LINK_INIT(dispsock, link);
906                 ISC_LINK_INIT(dispsock, blink);
907                 dispsock->magic = DISPSOCK_MAGIC;
908         }
909
910         /*
911          * Pick up a random UDP port and open a new socket with it.  Avoid
912          * choosing ports that share the same destination because it will be
913          * very likely to fail in bind(2) or connect(2).
914          */
915         localaddr = disp->local;
916         qid = DNS_QID(disp);
917
918         for (i = 0; i < 64; i++) {
919                 port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
920                                                         nports)];
921                 isc_sockaddr_setport(&localaddr, port);
922
923                 LOCK(&qid->lock);
924                 bucket = dns_hash(qid, dest, 0, port);
925                 if (socket_search(qid, dest, port, bucket) != NULL) {
926                         UNLOCK(&qid->lock);
927                         continue;
928                 }
929                 UNLOCK(&qid->lock);
930                 bindoptions = 0;
931                 portentry = port_search(disp, port);
932
933                 if (portentry != NULL)
934                         bindoptions |= ISC_SOCKET_REUSEADDRESS;
935                 result = open_socket(sockmgr, &localaddr, bindoptions, &sock,
936                                      NULL);
937                 if (result == ISC_R_SUCCESS) {
938                         if (portentry == NULL) {
939                                 portentry = new_portentry(disp, port);
940                                 if (portentry == NULL) {
941                                         result = ISC_R_NOMEMORY;
942                                         break;
943                                 }
944                         } else {
945                                 LOCK(&qid->lock);
946                                 portentry->refs++;
947                                 UNLOCK(&qid->lock);
948                         }
949                         break;
950                 } else if (result == ISC_R_NOPERM) {
951                         char buf[ISC_SOCKADDR_FORMATSIZE];
952                         isc_sockaddr_format(&localaddr, buf, sizeof(buf));
953                         dispatch_log(disp, ISC_LOG_WARNING,
954                                      "open_socket(%s) -> %s: continuing",
955                                      buf, isc_result_totext(result));
956                 } else if (result != ISC_R_ADDRINUSE)
957                         break;
958         }
959
960         if (result == ISC_R_SUCCESS) {
961                 dispsock->socket = sock;
962                 dispsock->host = *dest;
963                 dispsock->portentry = portentry;
964                 dispsock->bucket = bucket;
965                 LOCK(&qid->lock);
966                 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
967                 UNLOCK(&qid->lock);
968                 *dispsockp = dispsock;
969                 *portp = port;
970         } else {
971                 /*
972                  * We could keep it in the inactive list, but since this should
973                  * be an exceptional case and might be resource shortage, we'd
974                  * rather destroy it.
975                  */
976                 if (sock != NULL)
977                         isc_socket_detach(&sock);
978                 destroy_dispsocket(disp, &dispsock);
979         }
980
981         return (result);
982 }
983
984 /*%
985  * Destroy a dedicated dispatch socket.
986  */
987 static void
988 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
989         dispsocket_t *dispsock;
990         dns_qid_t *qid;
991
992         /*
993          * The dispatch must be locked.
994          */
995
996         REQUIRE(dispsockp != NULL && *dispsockp != NULL);
997         dispsock = *dispsockp;
998         REQUIRE(!ISC_LINK_LINKED(dispsock, link));
999
1000         disp->nsockets--;
1001         dispsock->magic = 0;
1002         if (dispsock->portentry != NULL)
1003                 deref_portentry(disp, &dispsock->portentry);
1004         if (dispsock->socket != NULL)
1005                 isc_socket_detach(&dispsock->socket);
1006         if (ISC_LINK_LINKED(dispsock, blink)) {
1007                 qid = DNS_QID(disp);
1008                 LOCK(&qid->lock);
1009                 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1010                                 blink);
1011                 UNLOCK(&qid->lock);
1012         }
1013         if (dispsock->task != NULL)
1014                 isc_task_detach(&dispsock->task);
1015         isc_mempool_put(disp->mgr->spool, dispsock);
1016
1017         *dispsockp = NULL;
1018 }
1019
1020 /*%
1021  * Deactivate a dedicated dispatch socket.  Move it to the inactive list for
1022  * future reuse unless the total number of sockets are exceeding the maximum.
1023  */
1024 static void
1025 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1026         isc_result_t result;
1027         dns_qid_t *qid;
1028
1029         /*
1030          * The dispatch must be locked.
1031          */
1032         ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
1033         if (dispsock->resp != NULL) {
1034                 INSIST(dispsock->resp->dispsocket == dispsock);
1035                 dispsock->resp->dispsocket = NULL;
1036         }
1037
1038         INSIST(dispsock->portentry != NULL);
1039         deref_portentry(disp, &dispsock->portentry);
1040
1041 #ifdef BIND9
1042         if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
1043                 destroy_dispsocket(disp, &dispsock);
1044         else {
1045                 result = isc_socket_close(dispsock->socket);
1046
1047                 qid = DNS_QID(disp);
1048                 LOCK(&qid->lock);
1049                 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1050                                 blink);
1051                 UNLOCK(&qid->lock);
1052
1053                 if (result == ISC_R_SUCCESS)
1054                         ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
1055                 else {
1056                         /*
1057                          * If the underlying system does not allow this
1058                          * optimization, destroy this temporary structure (and
1059                          * create a new one for a new transaction).
1060                          */
1061                         INSIST(result == ISC_R_NOTIMPLEMENTED);
1062                         destroy_dispsocket(disp, &dispsock);
1063                 }
1064         }
1065 #else
1066         /* This kind of optimization isn't necessary for normal use */
1067         UNUSED(qid);
1068         UNUSED(result);
1069
1070         destroy_dispsocket(disp, &dispsock);
1071 #endif
1072 }
1073
1074 /*
1075  * Find an entry for query ID 'id', socket address 'dest', and port number
1076  * 'port'.
1077  * Return NULL if no such entry exists.
1078  */
1079 static dns_dispentry_t *
1080 entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
1081              in_port_t port, unsigned int bucket)
1082 {
1083         dns_dispentry_t *res;
1084
1085         REQUIRE(VALID_QID(qid));
1086         REQUIRE(bucket < qid->qid_nbuckets);
1087
1088         res = ISC_LIST_HEAD(qid->qid_table[bucket]);
1089
1090         while (res != NULL) {
1091                 if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1092                     res->port == port) {
1093                         return (res);
1094                 }
1095                 res = ISC_LIST_NEXT(res, link);
1096         }
1097
1098         return (NULL);
1099 }
1100
1101 static void
1102 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1103         isc_mempool_t *bpool;
1104         INSIST(buf != NULL && len != 0);
1105
1106
1107         switch (disp->socktype) {
1108         case isc_sockettype_tcp:
1109                 INSIST(disp->tcpbuffers > 0);
1110                 disp->tcpbuffers--;
1111                 isc_mem_put(disp->mgr->mctx, buf, len);
1112                 break;
1113         case isc_sockettype_udp:
1114                 LOCK(&disp->mgr->buffer_lock);
1115                 INSIST(disp->mgr->buffers > 0);
1116                 INSIST(len == disp->mgr->buffersize);
1117                 disp->mgr->buffers--;
1118                 bpool = disp->mgr->bpool;
1119                 UNLOCK(&disp->mgr->buffer_lock);
1120                 isc_mempool_put(bpool, buf);
1121                 break;
1122         default:
1123                 INSIST(0);
1124                 break;
1125         }
1126 }
1127
1128 static void *
1129 allocate_udp_buffer(dns_dispatch_t *disp) {
1130         isc_mempool_t *bpool;
1131         void *temp;
1132
1133         LOCK(&disp->mgr->buffer_lock);
1134         bpool = disp->mgr->bpool;
1135         disp->mgr->buffers++;
1136         UNLOCK(&disp->mgr->buffer_lock);
1137
1138         temp = isc_mempool_get(bpool);
1139
1140         if (temp == NULL) {
1141                 LOCK(&disp->mgr->buffer_lock);
1142                 disp->mgr->buffers--;
1143                 UNLOCK(&disp->mgr->buffer_lock);
1144         }
1145
1146         return (temp);
1147 }
1148
1149 static inline void
1150 free_sevent(isc_event_t *ev) {
1151         isc_mempool_t *pool = ev->ev_destroy_arg;
1152         isc_socketevent_t *sev = (isc_socketevent_t *) ev;
1153         isc_mempool_put(pool, sev);
1154 }
1155
1156 static inline isc_socketevent_t *
1157 allocate_sevent(dns_dispatch_t *disp, isc_socket_t *socket,
1158                 isc_eventtype_t type, isc_taskaction_t action, const void *arg)
1159 {
1160         isc_socketevent_t *ev;
1161         void *deconst_arg;
1162
1163         ev = isc_mempool_get(disp->sepool);
1164         if (ev == NULL)
1165                 return (NULL);
1166         DE_CONST(arg, deconst_arg);
1167         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type,
1168                        action, deconst_arg, socket,
1169                        free_sevent, disp->sepool);
1170         ev->result = ISC_R_UNSET;
1171         ISC_LINK_INIT(ev, ev_link);
1172         ISC_LIST_INIT(ev->bufferlist);
1173         ev->region.base = NULL;
1174         ev->n = 0;
1175         ev->offset = 0;
1176         ev->attributes = 0;
1177
1178         return (ev);
1179 }
1180
1181
1182 static inline void
1183 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1184         if (disp->failsafe_ev == ev) {
1185                 INSIST(disp->shutdown_out == 1);
1186                 disp->shutdown_out = 0;
1187
1188                 return;
1189         }
1190
1191         isc_mempool_put(disp->mgr->depool, ev);
1192 }
1193
1194 static inline dns_dispatchevent_t *
1195 allocate_devent(dns_dispatch_t *disp) {
1196         dns_dispatchevent_t *ev;
1197
1198         ev = isc_mempool_get(disp->mgr->depool);
1199         if (ev == NULL)
1200                 return (NULL);
1201         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1202                        NULL, NULL, NULL, NULL, NULL);
1203
1204         return (ev);
1205 }
1206
1207 static void
1208 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1209         dispsocket_t *dispsock = ev->ev_arg;
1210
1211         UNUSED(task);
1212
1213         REQUIRE(VALID_DISPSOCK(dispsock));
1214         udp_recv(ev, dispsock->disp, dispsock);
1215 }
1216
1217 static void
1218 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1219         dns_dispatch_t *disp = ev->ev_arg;
1220
1221         UNUSED(task);
1222
1223         REQUIRE(VALID_DISPATCH(disp));
1224         udp_recv(ev, disp, NULL);
1225 }
1226
1227 /*
1228  * General flow:
1229  *
1230  * If I/O result == CANCELED or error, free the buffer.
1231  *
1232  * If query, free the buffer, restart.
1233  *
1234  * If response:
1235  *      Allocate event, fill in details.
1236  *              If cannot allocate, free buffer, restart.
1237  *      find target.  If not found, free buffer, restart.
1238  *      if event queue is not empty, queue.  else, send.
1239  *      restart.
1240  */
1241 static void
1242 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1243         isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1244         dns_messageid_t id;
1245         isc_result_t dres;
1246         isc_buffer_t source;
1247         unsigned int flags;
1248         dns_dispentry_t *resp = NULL;
1249         dns_dispatchevent_t *rev;
1250         unsigned int bucket;
1251         isc_boolean_t killit;
1252         isc_boolean_t queue_response;
1253         dns_dispatchmgr_t *mgr;
1254         dns_qid_t *qid;
1255         isc_netaddr_t netaddr;
1256         int match;
1257         int result;
1258         isc_boolean_t qidlocked = ISC_FALSE;
1259
1260         LOCK(&disp->lock);
1261
1262         mgr = disp->mgr;
1263         qid = mgr->qid;
1264
1265         dispatch_log(disp, LVL(90),
1266                      "got packet: requests %d, buffers %d, recvs %d",
1267                      disp->requests, disp->mgr->buffers, disp->recv_pending);
1268
1269         if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1270                 /*
1271                  * Unless the receive event was imported from a listening
1272                  * interface, in which case the event type is
1273                  * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1274                  */
1275                 INSIST(disp->recv_pending != 0);
1276                 disp->recv_pending = 0;
1277         }
1278
1279         if (dispsock != NULL &&
1280             (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1281                 /*
1282                  * dispsock->resp can be NULL if this transaction was canceled
1283                  * just after receiving a response.  Since this socket is
1284                  * exclusively used and there should be at most one receive
1285                  * event the canceled event should have been no effect.  So
1286                  * we can (and should) deactivate the socket right now.
1287                  */
1288                 deactivate_dispsocket(disp, dispsock);
1289                 dispsock = NULL;
1290         }
1291
1292         if (disp->shutting_down) {
1293                 /*
1294                  * This dispatcher is shutting down.
1295                  */
1296                 free_buffer(disp, ev->region.base, ev->region.length);
1297
1298                 isc_event_free(&ev_in);
1299                 ev = NULL;
1300
1301                 killit = destroy_disp_ok(disp);
1302                 UNLOCK(&disp->lock);
1303                 if (killit)
1304                         isc_task_send(disp->task[0], &disp->ctlevent);
1305
1306                 return;
1307         }
1308
1309         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1310                 if (dispsock != NULL) {
1311                         resp = dispsock->resp;
1312                         id = resp->id;
1313                         if (ev->result != ISC_R_SUCCESS) {
1314                                 /*
1315                                  * This is most likely a network error on a
1316                                  * connected socket.  It makes no sense to
1317                                  * check the address or parse the packet, but it
1318                                  * will help to return the error to the caller.
1319                                  */
1320                                 goto sendresponse;
1321                         }
1322                 } else {
1323                         free_buffer(disp, ev->region.base, ev->region.length);
1324
1325                         UNLOCK(&disp->lock);
1326                         isc_event_free(&ev_in);
1327                         return;
1328                 }
1329         } else if (ev->result != ISC_R_SUCCESS) {
1330                 free_buffer(disp, ev->region.base, ev->region.length);
1331
1332                 if (ev->result != ISC_R_CANCELED)
1333                         dispatch_log(disp, ISC_LOG_ERROR,
1334                                      "odd socket result in udp_recv(): %s",
1335                                      isc_result_totext(ev->result));
1336
1337                 UNLOCK(&disp->lock);
1338                 isc_event_free(&ev_in);
1339                 return;
1340         }
1341
1342         /*
1343          * If this is from a blackholed address, drop it.
1344          */
1345         isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1346         if (disp->mgr->blackhole != NULL &&
1347             dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1348                           NULL, &match, NULL) == ISC_R_SUCCESS &&
1349             match > 0)
1350         {
1351                 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1352                         char netaddrstr[ISC_NETADDR_FORMATSIZE];
1353                         isc_netaddr_format(&netaddr, netaddrstr,
1354                                            sizeof(netaddrstr));
1355                         dispatch_log(disp, LVL(10),
1356                                      "blackholed packet from %s",
1357                                      netaddrstr);
1358                 }
1359                 free_buffer(disp, ev->region.base, ev->region.length);
1360                 goto restart;
1361         }
1362
1363         /*
1364          * Peek into the buffer to see what we can see.
1365          */
1366         isc_buffer_init(&source, ev->region.base, ev->region.length);
1367         isc_buffer_add(&source, ev->n);
1368         dres = dns_message_peekheader(&source, &id, &flags);
1369         if (dres != ISC_R_SUCCESS) {
1370                 free_buffer(disp, ev->region.base, ev->region.length);
1371                 dispatch_log(disp, LVL(10), "got garbage packet");
1372                 goto restart;
1373         }
1374
1375         dispatch_log(disp, LVL(92),
1376                      "got valid DNS message header, /QR %c, id %u",
1377                      ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1378
1379         /*
1380          * Look at flags.  If query, drop it. If response,
1381          * look to see where it goes.
1382          */
1383         if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1384                 /* query */
1385                 free_buffer(disp, ev->region.base, ev->region.length);
1386                 goto restart;
1387         }
1388
1389         /*
1390          * Search for the corresponding response.  If we are using an exclusive
1391          * socket, we've already identified it and we can skip the search; but
1392          * the ID and the address must match the expected ones.
1393          */
1394         if (resp == NULL) {
1395                 bucket = dns_hash(qid, &ev->address, id, disp->localport);
1396                 LOCK(&qid->lock);
1397                 qidlocked = ISC_TRUE;
1398                 resp = entry_search(qid, &ev->address, id, disp->localport,
1399                                     bucket);
1400                 dispatch_log(disp, LVL(90),
1401                              "search for response in bucket %d: %s",
1402                              bucket, (resp == NULL ? "not found" : "found"));
1403
1404                 if (resp == NULL) {
1405                         inc_stats(mgr, dns_resstatscounter_mismatch);
1406                         free_buffer(disp, ev->region.base, ev->region.length);
1407                         goto unlock;
1408                 }
1409         } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1410                                                          &resp->host)) {
1411                 dispatch_log(disp, LVL(90),
1412                              "response to an exclusive socket doesn't match");
1413                 inc_stats(mgr, dns_resstatscounter_mismatch);
1414                 free_buffer(disp, ev->region.base, ev->region.length);
1415                 goto unlock;
1416         }
1417
1418         /*
1419          * Now that we have the original dispatch the query was sent
1420          * from check that the address and port the response was
1421          * sent to make sense.
1422          */
1423         if (disp != resp->disp) {
1424                 isc_sockaddr_t a1;
1425                 isc_sockaddr_t a2;
1426
1427                 /*
1428                  * Check that the socket types and ports match.
1429                  */
1430                 if (disp->socktype != resp->disp->socktype ||
1431                     isc_sockaddr_getport(&disp->local) !=
1432                     isc_sockaddr_getport(&resp->disp->local)) {
1433                         free_buffer(disp, ev->region.base, ev->region.length);
1434                         goto unlock;
1435                 }
1436
1437                 /*
1438                  * If each dispatch is bound to a different address
1439                  * then fail.
1440                  *
1441                  * Note under Linux a packet can be sent out via IPv4 socket
1442                  * and the response be received via a IPv6 socket.
1443                  *
1444                  * Requests sent out via IPv6 should always come back in
1445                  * via IPv6.
1446                  */
1447                 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1448                     isc_sockaddr_pf(&disp->local) != PF_INET6) {
1449                         free_buffer(disp, ev->region.base, ev->region.length);
1450                         goto unlock;
1451                 }
1452                 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1453                 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1454                 if (!isc_sockaddr_eqaddr(&disp->local, &resp->disp->local) &&
1455                     !isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1456                     !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1457                         free_buffer(disp, ev->region.base, ev->region.length);
1458                         goto unlock;
1459                 }
1460         }
1461
1462   sendresponse:
1463         queue_response = resp->item_out;
1464         rev = allocate_devent(resp->disp);
1465         if (rev == NULL) {
1466                 free_buffer(disp, ev->region.base, ev->region.length);
1467                 goto unlock;
1468         }
1469
1470         /*
1471          * At this point, rev contains the event we want to fill in, and
1472          * resp contains the information on the place to send it to.
1473          * Send the event off.
1474          */
1475         isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1476         isc_buffer_add(&rev->buffer, ev->n);
1477         rev->result = ev->result;
1478         rev->id = id;
1479         rev->addr = ev->address;
1480         rev->pktinfo = ev->pktinfo;
1481         rev->attributes = ev->attributes;
1482         if (queue_response) {
1483                 ISC_LIST_APPEND(resp->items, rev, ev_link);
1484         } else {
1485                 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1486                                DNS_EVENT_DISPATCH,
1487                                resp->action, resp->arg, resp, NULL, NULL);
1488                 request_log(disp, resp, LVL(90),
1489                             "[a] Sent event %p buffer %p len %d to task %p",
1490                             rev, rev->buffer.base, rev->buffer.length,
1491                             resp->task);
1492                 resp->item_out = ISC_TRUE;
1493                 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1494         }
1495  unlock:
1496         if (qidlocked)
1497                 UNLOCK(&qid->lock);
1498
1499         /*
1500          * Restart recv() to get the next packet.
1501          */
1502  restart:
1503         result = startrecv(disp, dispsock);
1504         if (result != ISC_R_SUCCESS && dispsock != NULL) {
1505                 /*
1506                  * XXX: wired. There seems to be no recovery process other than
1507                  * deactivate this socket anyway (since we cannot start
1508                  * receiving, we won't be able to receive a cancel event
1509                  * from the user).
1510                  */
1511                 deactivate_dispsocket(disp, dispsock);
1512         }
1513         UNLOCK(&disp->lock);
1514
1515         isc_event_free(&ev_in);
1516 }
1517
1518 /*
1519  * General flow:
1520  *
1521  * If I/O result == CANCELED, EOF, or error, notify everyone as the
1522  * various queues drain.
1523  *
1524  * If query, restart.
1525  *
1526  * If response:
1527  *      Allocate event, fill in details.
1528  *              If cannot allocate, restart.
1529  *      find target.  If not found, restart.
1530  *      if event queue is not empty, queue.  else, send.
1531  *      restart.
1532  */
1533 static void
1534 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1535         dns_dispatch_t *disp = ev_in->ev_arg;
1536         dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1537         dns_messageid_t id;
1538         isc_result_t dres;
1539         unsigned int flags;
1540         dns_dispentry_t *resp;
1541         dns_dispatchevent_t *rev;
1542         unsigned int bucket;
1543         isc_boolean_t killit;
1544         isc_boolean_t queue_response;
1545         dns_qid_t *qid;
1546         int level;
1547         char buf[ISC_SOCKADDR_FORMATSIZE];
1548
1549         UNUSED(task);
1550
1551         REQUIRE(VALID_DISPATCH(disp));
1552
1553         qid = disp->qid;
1554
1555         dispatch_log(disp, LVL(90),
1556                      "got TCP packet: requests %d, buffers %d, recvs %d",
1557                      disp->requests, disp->tcpbuffers, disp->recv_pending);
1558
1559         LOCK(&disp->lock);
1560
1561         INSIST(disp->recv_pending != 0);
1562         disp->recv_pending = 0;
1563
1564         if (disp->refcount == 0) {
1565                 /*
1566                  * This dispatcher is shutting down.  Force cancelation.
1567                  */
1568                 tcpmsg->result = ISC_R_CANCELED;
1569         }
1570
1571         if (tcpmsg->result != ISC_R_SUCCESS) {
1572                 switch (tcpmsg->result) {
1573                 case ISC_R_CANCELED:
1574                         break;
1575
1576                 case ISC_R_EOF:
1577                         dispatch_log(disp, LVL(90), "shutting down on EOF");
1578                         do_cancel(disp);
1579                         break;
1580
1581                 case ISC_R_CONNECTIONRESET:
1582                         level = ISC_LOG_INFO;
1583                         goto logit;
1584
1585                 default:
1586                         level = ISC_LOG_ERROR;
1587                 logit:
1588                         isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1589                         dispatch_log(disp, level, "shutting down due to TCP "
1590                                      "receive error: %s: %s", buf,
1591                                      isc_result_totext(tcpmsg->result));
1592                         do_cancel(disp);
1593                         break;
1594                 }
1595
1596                 /*
1597                  * The event is statically allocated in the tcpmsg
1598                  * structure, and destroy_disp() frees the tcpmsg, so we must
1599                  * free the event *before* calling destroy_disp().
1600                  */
1601                 isc_event_free(&ev_in);
1602
1603                 disp->shutting_down = 1;
1604                 disp->shutdown_why = tcpmsg->result;
1605
1606                 /*
1607                  * If the recv() was canceled pass the word on.
1608                  */
1609                 killit = destroy_disp_ok(disp);
1610                 UNLOCK(&disp->lock);
1611                 if (killit)
1612                         isc_task_send(disp->task[0], &disp->ctlevent);
1613                 return;
1614         }
1615
1616         dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1617                      tcpmsg->result,
1618                      tcpmsg->buffer.length, tcpmsg->buffer.base);
1619
1620         /*
1621          * Peek into the buffer to see what we can see.
1622          */
1623         dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1624         if (dres != ISC_R_SUCCESS) {
1625                 dispatch_log(disp, LVL(10), "got garbage packet");
1626                 goto restart;
1627         }
1628
1629         dispatch_log(disp, LVL(92),
1630                      "got valid DNS message header, /QR %c, id %u",
1631                      ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1632
1633         /*
1634          * Allocate an event to send to the query or response client, and
1635          * allocate a new buffer for our use.
1636          */
1637
1638         /*
1639          * Look at flags.  If query, drop it. If response,
1640          * look to see where it goes.
1641          */
1642         if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1643                 /*
1644                  * Query.
1645                  */
1646                 goto restart;
1647         }
1648
1649         /*
1650          * Response.
1651          */
1652         bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1653         LOCK(&qid->lock);
1654         resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1655         dispatch_log(disp, LVL(90),
1656                      "search for response in bucket %d: %s",
1657                      bucket, (resp == NULL ? "not found" : "found"));
1658
1659         if (resp == NULL)
1660                 goto unlock;
1661         queue_response = resp->item_out;
1662         rev = allocate_devent(disp);
1663         if (rev == NULL)
1664                 goto unlock;
1665
1666         /*
1667          * At this point, rev contains the event we want to fill in, and
1668          * resp contains the information on the place to send it to.
1669          * Send the event off.
1670          */
1671         dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1672         disp->tcpbuffers++;
1673         rev->result = ISC_R_SUCCESS;
1674         rev->id = id;
1675         rev->addr = tcpmsg->address;
1676         if (queue_response) {
1677                 ISC_LIST_APPEND(resp->items, rev, ev_link);
1678         } else {
1679                 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1680                                resp->action, resp->arg, resp, NULL, NULL);
1681                 request_log(disp, resp, LVL(90),
1682                             "[b] Sent event %p buffer %p len %d to task %p",
1683                             rev, rev->buffer.base, rev->buffer.length,
1684                             resp->task);
1685                 resp->item_out = ISC_TRUE;
1686                 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1687         }
1688  unlock:
1689         UNLOCK(&qid->lock);
1690
1691         /*
1692          * Restart recv() to get the next packet.
1693          */
1694  restart:
1695         (void)startrecv(disp, NULL);
1696
1697         UNLOCK(&disp->lock);
1698
1699         isc_event_free(&ev_in);
1700 }
1701
1702 /*
1703  * disp must be locked.
1704  */
1705 static isc_result_t
1706 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1707         isc_result_t res;
1708         isc_region_t region;
1709         isc_socket_t *socket;
1710
1711         if (disp->shutting_down == 1)
1712                 return (ISC_R_SUCCESS);
1713
1714         if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1715                 return (ISC_R_SUCCESS);
1716
1717         if (disp->recv_pending != 0 && dispsock == NULL)
1718                 return (ISC_R_SUCCESS);
1719
1720         if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1721                 return (ISC_R_NOMEMORY);
1722
1723         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1724             dispsock == NULL)
1725                 return (ISC_R_SUCCESS);
1726
1727         if (dispsock != NULL)
1728                 socket = dispsock->socket;
1729         else
1730                 socket = disp->socket;
1731         INSIST(socket != NULL);
1732
1733         switch (disp->socktype) {
1734                 /*
1735                  * UDP reads are always maximal.
1736                  */
1737         case isc_sockettype_udp:
1738                 region.length = disp->mgr->buffersize;
1739                 region.base = allocate_udp_buffer(disp);
1740                 if (region.base == NULL)
1741                         return (ISC_R_NOMEMORY);
1742                 if (dispsock != NULL) {
1743                         isc_task_t *dt = dispsock->task;
1744                         isc_socketevent_t *sev =
1745                                 allocate_sevent(disp, socket,
1746                                                 ISC_SOCKEVENT_RECVDONE,
1747                                                 udp_exrecv, dispsock);
1748                         if (sev == NULL) {
1749                                 free_buffer(disp, region.base, region.length);
1750                                 return (ISC_R_NOMEMORY);
1751                         }
1752
1753                         res = isc_socket_recv2(socket, &region, 1, dt, sev, 0);
1754                         if (res != ISC_R_SUCCESS) {
1755                                 free_buffer(disp, region.base, region.length);
1756                                 return (res);
1757                         }
1758                 } else {
1759                         isc_task_t *dt = disp->task[0];
1760                         isc_socketevent_t *sev =
1761                                 allocate_sevent(disp, socket,
1762                                                 ISC_SOCKEVENT_RECVDONE,
1763                                                 udp_shrecv, disp);
1764                         if (sev == NULL) {
1765                                 free_buffer(disp, region.base, region.length);
1766                                 return (ISC_R_NOMEMORY);
1767                         }
1768
1769                         res = isc_socket_recv2(socket, &region, 1, dt, sev, 0);
1770                         if (res != ISC_R_SUCCESS) {
1771                                 free_buffer(disp, region.base, region.length);
1772                                 disp->shutdown_why = res;
1773                                 disp->shutting_down = 1;
1774                                 do_cancel(disp);
1775                                 return (ISC_R_SUCCESS); /* recover by cancel */
1776                         }
1777                         INSIST(disp->recv_pending == 0);
1778                         disp->recv_pending = 1;
1779                 }
1780                 break;
1781
1782         case isc_sockettype_tcp:
1783                 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1784                                              tcp_recv, disp);
1785                 if (res != ISC_R_SUCCESS) {
1786                         disp->shutdown_why = res;
1787                         disp->shutting_down = 1;
1788                         do_cancel(disp);
1789                         return (ISC_R_SUCCESS); /* recover by cancel */
1790                 }
1791                 INSIST(disp->recv_pending == 0);
1792                 disp->recv_pending = 1;
1793                 break;
1794         default:
1795                 INSIST(0);
1796                 break;
1797         }
1798
1799         return (ISC_R_SUCCESS);
1800 }
1801
1802 /*
1803  * Mgr must be locked when calling this function.
1804  */
1805 static isc_boolean_t
1806 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1807         mgr_log(mgr, LVL(90),
1808                 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1809                 "depool=%d, rpool=%d, dpool=%d",
1810                 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1811                 isc_mempool_getallocated(mgr->depool),
1812                 isc_mempool_getallocated(mgr->rpool),
1813                 isc_mempool_getallocated(mgr->dpool));
1814         if (!MGR_IS_SHUTTINGDOWN(mgr))
1815                 return (ISC_FALSE);
1816         if (!ISC_LIST_EMPTY(mgr->list))
1817                 return (ISC_FALSE);
1818         if (isc_mempool_getallocated(mgr->depool) != 0)
1819                 return (ISC_FALSE);
1820         if (isc_mempool_getallocated(mgr->rpool) != 0)
1821                 return (ISC_FALSE);
1822         if (isc_mempool_getallocated(mgr->dpool) != 0)
1823                 return (ISC_FALSE);
1824
1825         return (ISC_TRUE);
1826 }
1827
1828 /*
1829  * Mgr must be unlocked when calling this function.
1830  */
1831 static void
1832 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1833         isc_mem_t *mctx;
1834         dns_dispatchmgr_t *mgr;
1835
1836         mgr = *mgrp;
1837         *mgrp = NULL;
1838
1839         mctx = mgr->mctx;
1840
1841         mgr->magic = 0;
1842         mgr->mctx = NULL;
1843         DESTROYLOCK(&mgr->lock);
1844         mgr->state = 0;
1845
1846         DESTROYLOCK(&mgr->arc4_lock);
1847
1848         isc_mempool_destroy(&mgr->depool);
1849         isc_mempool_destroy(&mgr->rpool);
1850         isc_mempool_destroy(&mgr->dpool);
1851         if (mgr->bpool != NULL)
1852                 isc_mempool_destroy(&mgr->bpool);
1853         if (mgr->spool != NULL)
1854                 isc_mempool_destroy(&mgr->spool);
1855
1856         DESTROYLOCK(&mgr->spool_lock);
1857         DESTROYLOCK(&mgr->bpool_lock);
1858         DESTROYLOCK(&mgr->dpool_lock);
1859         DESTROYLOCK(&mgr->rpool_lock);
1860         DESTROYLOCK(&mgr->depool_lock);
1861
1862 #ifdef BIND9
1863         if (mgr->entropy != NULL)
1864                 isc_entropy_detach(&mgr->entropy);
1865 #endif /* BIND9 */
1866         if (mgr->qid != NULL)
1867                 qid_destroy(mctx, &mgr->qid);
1868
1869         DESTROYLOCK(&mgr->buffer_lock);
1870
1871         if (mgr->blackhole != NULL)
1872                 dns_acl_detach(&mgr->blackhole);
1873
1874         if (mgr->stats != NULL)
1875                 isc_stats_detach(&mgr->stats);
1876
1877         if (mgr->v4ports != NULL) {
1878                 isc_mem_put(mctx, mgr->v4ports,
1879                             mgr->nv4ports * sizeof(in_port_t));
1880         }
1881         if (mgr->v6ports != NULL) {
1882                 isc_mem_put(mctx, mgr->v6ports,
1883                             mgr->nv6ports * sizeof(in_port_t));
1884         }
1885         isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1886         isc_mem_detach(&mctx);
1887 }
1888
1889 static isc_result_t
1890 open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1891             unsigned int options, isc_socket_t **sockp,
1892             isc_socket_t *dup_socket)
1893 {
1894         isc_socket_t *sock;
1895         isc_result_t result;
1896
1897         sock = *sockp;
1898         if (sock != NULL) {
1899 #ifdef BIND9
1900                 result = isc_socket_open(sock);
1901                 if (result != ISC_R_SUCCESS)
1902                         return (result);
1903 #else
1904                 INSIST(0);
1905 #endif
1906         } else if (dup_socket != NULL) {
1907                 result = isc_socket_dup(dup_socket, &sock);
1908                 if (result != ISC_R_SUCCESS)
1909                         return (result);
1910
1911                 isc_socket_setname(sock, "dispatcher", NULL);
1912                 *sockp = sock;
1913                 return (ISC_R_SUCCESS);
1914         } else {
1915                 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1916                                         isc_sockettype_udp, &sock);
1917                 if (result != ISC_R_SUCCESS)
1918                         return (result);
1919         }
1920
1921         isc_socket_setname(sock, "dispatcher", NULL);
1922
1923 #ifndef ISC_ALLOW_MAPPED
1924         isc_socket_ipv6only(sock, ISC_TRUE);
1925 #endif
1926         result = isc_socket_bind(sock, local, options);
1927         if (result != ISC_R_SUCCESS) {
1928                 if (*sockp == NULL)
1929                         isc_socket_detach(&sock);
1930                 else {
1931 #ifdef BIND9
1932                         isc_socket_close(sock);
1933 #else
1934                         INSIST(0);
1935 #endif
1936                 }
1937                 return (result);
1938         }
1939
1940         *sockp = sock;
1941         return (ISC_R_SUCCESS);
1942 }
1943
1944 /*%
1945  * Create a temporary port list to set the initial default set of dispatch
1946  * ports: [1024, 65535].  This is almost meaningless as the application will
1947  * normally set the ports explicitly, but is provided to fill some minor corner
1948  * cases.
1949  */
1950 static isc_result_t
1951 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1952         isc_result_t result;
1953
1954         result = isc_portset_create(mctx, portsetp);
1955         if (result != ISC_R_SUCCESS)
1956                 return (result);
1957         isc_portset_addrange(*portsetp, 1024, 65535);
1958
1959         return (ISC_R_SUCCESS);
1960 }
1961
1962 /*
1963  * Publics.
1964  */
1965
1966 isc_result_t
1967 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1968                        dns_dispatchmgr_t **mgrp)
1969 {
1970         dns_dispatchmgr_t *mgr;
1971         isc_result_t result;
1972         isc_portset_t *v4portset = NULL;
1973         isc_portset_t *v6portset = NULL;
1974
1975         REQUIRE(mctx != NULL);
1976         REQUIRE(mgrp != NULL && *mgrp == NULL);
1977
1978         mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1979         if (mgr == NULL)
1980                 return (ISC_R_NOMEMORY);
1981
1982         mgr->mctx = NULL;
1983         isc_mem_attach(mctx, &mgr->mctx);
1984
1985         mgr->blackhole = NULL;
1986         mgr->stats = NULL;
1987
1988         result = isc_mutex_init(&mgr->lock);
1989         if (result != ISC_R_SUCCESS)
1990                 goto deallocate;
1991
1992         result = isc_mutex_init(&mgr->arc4_lock);
1993         if (result != ISC_R_SUCCESS)
1994                 goto kill_lock;
1995
1996         result = isc_mutex_init(&mgr->buffer_lock);
1997         if (result != ISC_R_SUCCESS)
1998                 goto kill_arc4_lock;
1999
2000         result = isc_mutex_init(&mgr->depool_lock);
2001         if (result != ISC_R_SUCCESS)
2002                 goto kill_buffer_lock;
2003
2004         result = isc_mutex_init(&mgr->rpool_lock);
2005         if (result != ISC_R_SUCCESS)
2006                 goto kill_depool_lock;
2007
2008         result = isc_mutex_init(&mgr->dpool_lock);
2009         if (result != ISC_R_SUCCESS)
2010                 goto kill_rpool_lock;
2011
2012         result = isc_mutex_init(&mgr->bpool_lock);
2013         if (result != ISC_R_SUCCESS)
2014                 goto kill_dpool_lock;
2015
2016         result = isc_mutex_init(&mgr->spool_lock);
2017         if (result != ISC_R_SUCCESS)
2018                 goto kill_bpool_lock;
2019
2020         mgr->depool = NULL;
2021         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
2022                                &mgr->depool) != ISC_R_SUCCESS) {
2023                 result = ISC_R_NOMEMORY;
2024                 goto kill_spool_lock;
2025         }
2026
2027         mgr->rpool = NULL;
2028         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
2029                                &mgr->rpool) != ISC_R_SUCCESS) {
2030                 result = ISC_R_NOMEMORY;
2031                 goto kill_depool;
2032         }
2033
2034         mgr->dpool = NULL;
2035         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
2036                                &mgr->dpool) != ISC_R_SUCCESS) {
2037                 result = ISC_R_NOMEMORY;
2038                 goto kill_rpool;
2039         }
2040
2041         isc_mempool_setname(mgr->depool, "dispmgr_depool");
2042         isc_mempool_setmaxalloc(mgr->depool, 32768);
2043         isc_mempool_setfreemax(mgr->depool, 32768);
2044         isc_mempool_associatelock(mgr->depool, &mgr->depool_lock);
2045         isc_mempool_setfillcount(mgr->depool, 256);
2046
2047         isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
2048         isc_mempool_setmaxalloc(mgr->rpool, 32768);
2049         isc_mempool_setfreemax(mgr->rpool, 32768);
2050         isc_mempool_associatelock(mgr->rpool, &mgr->rpool_lock);
2051         isc_mempool_setfillcount(mgr->rpool, 256);
2052
2053         isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
2054         isc_mempool_setmaxalloc(mgr->dpool, 32768);
2055         isc_mempool_setfreemax(mgr->dpool, 32768);
2056         isc_mempool_associatelock(mgr->dpool, &mgr->dpool_lock);
2057         isc_mempool_setfillcount(mgr->dpool, 256);
2058
2059         mgr->buffers = 0;
2060         mgr->buffersize = 0;
2061         mgr->maxbuffers = 0;
2062         mgr->bpool = NULL;
2063         mgr->spool = NULL;
2064         mgr->entropy = NULL;
2065         mgr->qid = NULL;
2066         mgr->state = 0;
2067         ISC_LIST_INIT(mgr->list);
2068         mgr->v4ports = NULL;
2069         mgr->v6ports = NULL;
2070         mgr->nv4ports = 0;
2071         mgr->nv6ports = 0;
2072         mgr->magic = DNS_DISPATCHMGR_MAGIC;
2073
2074         result = create_default_portset(mctx, &v4portset);
2075         if (result == ISC_R_SUCCESS) {
2076                 result = create_default_portset(mctx, &v6portset);
2077                 if (result == ISC_R_SUCCESS) {
2078                         result = dns_dispatchmgr_setavailports(mgr,
2079                                                                v4portset,
2080                                                                v6portset);
2081                 }
2082         }
2083         if (v4portset != NULL)
2084                 isc_portset_destroy(mctx, &v4portset);
2085         if (v6portset != NULL)
2086                 isc_portset_destroy(mctx, &v6portset);
2087         if (result != ISC_R_SUCCESS)
2088                 goto kill_dpool;
2089
2090 #ifdef BIND9
2091         if (entropy != NULL)
2092                 isc_entropy_attach(entropy, &mgr->entropy);
2093 #else
2094         UNUSED(entropy);
2095 #endif
2096
2097         dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
2098
2099         *mgrp = mgr;
2100         return (ISC_R_SUCCESS);
2101
2102  kill_dpool:
2103         isc_mempool_destroy(&mgr->dpool);
2104  kill_rpool:
2105         isc_mempool_destroy(&mgr->rpool);
2106  kill_depool:
2107         isc_mempool_destroy(&mgr->depool);
2108  kill_spool_lock:
2109         DESTROYLOCK(&mgr->spool_lock);
2110  kill_bpool_lock:
2111         DESTROYLOCK(&mgr->bpool_lock);
2112  kill_dpool_lock:
2113         DESTROYLOCK(&mgr->dpool_lock);
2114  kill_rpool_lock:
2115         DESTROYLOCK(&mgr->rpool_lock);
2116  kill_depool_lock:
2117         DESTROYLOCK(&mgr->depool_lock);
2118  kill_buffer_lock:
2119         DESTROYLOCK(&mgr->buffer_lock);
2120  kill_arc4_lock:
2121         DESTROYLOCK(&mgr->arc4_lock);
2122  kill_lock:
2123         DESTROYLOCK(&mgr->lock);
2124  deallocate:
2125         isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
2126         isc_mem_detach(&mctx);
2127
2128         return (result);
2129 }
2130
2131 void
2132 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
2133         REQUIRE(VALID_DISPATCHMGR(mgr));
2134         if (mgr->blackhole != NULL)
2135                 dns_acl_detach(&mgr->blackhole);
2136         dns_acl_attach(blackhole, &mgr->blackhole);
2137 }
2138
2139 dns_acl_t *
2140 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
2141         REQUIRE(VALID_DISPATCHMGR(mgr));
2142         return (mgr->blackhole);
2143 }
2144
2145 void
2146 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
2147                                  dns_portlist_t *portlist)
2148 {
2149         REQUIRE(VALID_DISPATCHMGR(mgr));
2150         UNUSED(portlist);
2151
2152         /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
2153         return;
2154 }
2155
2156 dns_portlist_t *
2157 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
2158         REQUIRE(VALID_DISPATCHMGR(mgr));
2159         return (NULL);          /* this function is deprecated */
2160 }
2161
2162 isc_result_t
2163 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
2164                               isc_portset_t *v6portset)
2165 {
2166         in_port_t *v4ports, *v6ports, p;
2167         unsigned int nv4ports, nv6ports, i4, i6;
2168
2169         REQUIRE(VALID_DISPATCHMGR(mgr));
2170
2171         nv4ports = isc_portset_nports(v4portset);
2172         nv6ports = isc_portset_nports(v6portset);
2173
2174         v4ports = NULL;
2175         if (nv4ports != 0) {
2176                 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
2177                 if (v4ports == NULL)
2178                         return (ISC_R_NOMEMORY);
2179         }
2180         v6ports = NULL;
2181         if (nv6ports != 0) {
2182                 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
2183                 if (v6ports == NULL) {
2184                         if (v4ports != NULL) {
2185                                 isc_mem_put(mgr->mctx, v4ports,
2186                                             sizeof(in_port_t) *
2187                                             isc_portset_nports(v4portset));
2188                         }
2189                         return (ISC_R_NOMEMORY);
2190                 }
2191         }
2192
2193         p = 0;
2194         i4 = 0;
2195         i6 = 0;
2196         do {
2197                 if (isc_portset_isset(v4portset, p)) {
2198                         INSIST(i4 < nv4ports);
2199                         v4ports[i4++] = p;
2200                 }
2201                 if (isc_portset_isset(v6portset, p)) {
2202                         INSIST(i6 < nv6ports);
2203                         v6ports[i6++] = p;
2204                 }
2205         } while (p++ < 65535);
2206         INSIST(i4 == nv4ports && i6 == nv6ports);
2207
2208         PORTBUFLOCK(mgr);
2209         if (mgr->v4ports != NULL) {
2210                 isc_mem_put(mgr->mctx, mgr->v4ports,
2211                             mgr->nv4ports * sizeof(in_port_t));
2212         }
2213         mgr->v4ports = v4ports;
2214         mgr->nv4ports = nv4ports;
2215
2216         if (mgr->v6ports != NULL) {
2217                 isc_mem_put(mgr->mctx, mgr->v6ports,
2218                             mgr->nv6ports * sizeof(in_port_t));
2219         }
2220         mgr->v6ports = v6ports;
2221         mgr->nv6ports = nv6ports;
2222         PORTBUFUNLOCK(mgr);
2223
2224         return (ISC_R_SUCCESS);
2225 }
2226
2227 static isc_result_t
2228 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2229                        unsigned int buffersize, unsigned int maxbuffers,
2230                        unsigned int maxrequests, unsigned int buckets,
2231                        unsigned int increment)
2232 {
2233         isc_result_t result;
2234
2235         REQUIRE(VALID_DISPATCHMGR(mgr));
2236         REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2237         REQUIRE(maxbuffers > 0);
2238         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2239         REQUIRE(increment > buckets);
2240
2241         /*
2242          * Keep some number of items around.  This should be a config
2243          * option.  For now, keep 8, but later keep at least two even
2244          * if the caller wants less.  This allows us to ensure certain
2245          * things, like an event can be "freed" and the next allocation
2246          * will always succeed.
2247          *
2248          * Note that if limits are placed on anything here, we use one
2249          * event internally, so the actual limit should be "wanted + 1."
2250          *
2251          * XXXMLG
2252          */
2253
2254         if (maxbuffers < 8)
2255                 maxbuffers = 8;
2256
2257         LOCK(&mgr->buffer_lock);
2258
2259         /* Create or adjust buffer pool */
2260         if (mgr->bpool != NULL) {
2261                 /*
2262                  * We only increase the maxbuffers to avoid accidental buffer
2263                  * shortage.  Ideally we'd separate the manager-wide maximum
2264                  * from per-dispatch limits and respect the latter within the
2265                  * global limit.  But at this moment that's deemed to be
2266                  * overkilling and isn't worth additional implementation
2267                  * complexity.
2268                  */
2269                 if (maxbuffers > mgr->maxbuffers) {
2270                         isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2271                         isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2272                         mgr->maxbuffers = maxbuffers;
2273                 }
2274         } else {
2275                 result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2276                 if (result != ISC_R_SUCCESS) {
2277                         UNLOCK(&mgr->buffer_lock);
2278                         return (result);
2279                 }
2280                 isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2281                 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2282                 isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2283                 isc_mempool_associatelock(mgr->bpool, &mgr->bpool_lock);
2284                 isc_mempool_setfillcount(mgr->bpool, 256);
2285         }
2286
2287         /* Create or adjust socket pool */
2288         if (mgr->spool != NULL) {
2289                 if (maxrequests < DNS_DISPATCH_POOLSOCKS * 2)
2290                   isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2291                   isc_mempool_setfreemax(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2292                 UNLOCK(&mgr->buffer_lock);
2293                 return (ISC_R_SUCCESS);
2294         }
2295         result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2296                                     &mgr->spool);
2297         if (result != ISC_R_SUCCESS) {
2298                 UNLOCK(&mgr->buffer_lock);
2299                 goto cleanup;
2300         }
2301         isc_mempool_setname(mgr->spool, "dispmgr_spool");
2302         isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2303         isc_mempool_setfreemax(mgr->spool, maxrequests);
2304         isc_mempool_associatelock(mgr->spool, &mgr->spool_lock);
2305         isc_mempool_setfillcount(mgr->spool, 256);
2306
2307         result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
2308         if (result != ISC_R_SUCCESS)
2309                 goto cleanup;
2310
2311         mgr->buffersize = buffersize;
2312         mgr->maxbuffers = maxbuffers;
2313         UNLOCK(&mgr->buffer_lock);
2314         return (ISC_R_SUCCESS);
2315
2316  cleanup:
2317         isc_mempool_destroy(&mgr->bpool);
2318         if (mgr->spool != NULL)
2319                 isc_mempool_destroy(&mgr->spool);
2320         UNLOCK(&mgr->buffer_lock);
2321         return (result);
2322 }
2323
2324 void
2325 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2326         dns_dispatchmgr_t *mgr;
2327         isc_boolean_t killit;
2328
2329         REQUIRE(mgrp != NULL);
2330         REQUIRE(VALID_DISPATCHMGR(*mgrp));
2331
2332         mgr = *mgrp;
2333         *mgrp = NULL;
2334
2335         LOCK(&mgr->lock);
2336         mgr->state |= MGR_SHUTTINGDOWN;
2337
2338         killit = destroy_mgr_ok(mgr);
2339         UNLOCK(&mgr->lock);
2340
2341         mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2342
2343         if (killit)
2344                 destroy_mgr(&mgr);
2345 }
2346
2347 void
2348 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2349         REQUIRE(VALID_DISPATCHMGR(mgr));
2350         REQUIRE(ISC_LIST_EMPTY(mgr->list));
2351         REQUIRE(mgr->stats == NULL);
2352
2353         isc_stats_attach(stats, &mgr->stats);
2354 }
2355
2356 static int
2357 port_cmp(const void *key, const void *ent) {
2358         in_port_t p1 = *(const in_port_t *)key;
2359         in_port_t p2 = *(const in_port_t *)ent;
2360
2361         if (p1 < p2)
2362                 return (-1);
2363         else if (p1 == p2)
2364                 return (0);
2365         else
2366                 return (1);
2367 }
2368
2369 static isc_boolean_t
2370 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2371               isc_sockaddr_t *sockaddrp)
2372 {
2373         isc_sockaddr_t sockaddr;
2374         isc_result_t result;
2375         in_port_t *ports, port;
2376         unsigned int nports;
2377         isc_boolean_t available = ISC_FALSE;
2378
2379         REQUIRE(sock != NULL || sockaddrp != NULL);
2380
2381         PORTBUFLOCK(mgr);
2382         if (sock != NULL) {
2383                 sockaddrp = &sockaddr;
2384                 result = isc_socket_getsockname(sock, sockaddrp);
2385                 if (result != ISC_R_SUCCESS)
2386                         goto unlock;
2387         }
2388
2389         if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2390                 ports = mgr->v4ports;
2391                 nports = mgr->nv4ports;
2392         } else {
2393                 ports = mgr->v6ports;
2394                 nports = mgr->nv6ports;
2395         }
2396         if (ports == NULL)
2397                 goto unlock;
2398
2399         port = isc_sockaddr_getport(sockaddrp);
2400         if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2401                 available = ISC_TRUE;
2402
2403 unlock:
2404         PORTBUFUNLOCK(mgr);
2405         return (available);
2406 }
2407
2408 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2409
2410 static isc_boolean_t
2411 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2412         isc_sockaddr_t sockaddr;
2413         isc_result_t result;
2414
2415         REQUIRE(disp->socket != NULL);
2416
2417         if (addr == NULL)
2418                 return (ISC_TRUE);
2419
2420         /*
2421          * Don't match wildcard ports unless the port is available in the
2422          * current configuration.
2423          */
2424         if (isc_sockaddr_getport(addr) == 0 &&
2425             isc_sockaddr_getport(&disp->local) == 0 &&
2426             !portavailable(disp->mgr, disp->socket, NULL)) {
2427                 return (ISC_FALSE);
2428         }
2429
2430         /*
2431          * Check if we match the binding <address,port>.
2432          * Wildcard ports match/fail here.
2433          */
2434         if (isc_sockaddr_equal(&disp->local, addr))
2435                 return (ISC_TRUE);
2436         if (isc_sockaddr_getport(addr) == 0)
2437                 return (ISC_FALSE);
2438
2439         /*
2440          * Check if we match a bound wildcard port <address,port>.
2441          */
2442         if (!isc_sockaddr_eqaddr(&disp->local, addr))
2443                 return (ISC_FALSE);
2444         result = isc_socket_getsockname(disp->socket, &sockaddr);
2445         if (result != ISC_R_SUCCESS)
2446                 return (ISC_FALSE);
2447
2448         return (isc_sockaddr_equal(&sockaddr, addr));
2449 }
2450
2451 /*
2452  * Requires mgr be locked.
2453  *
2454  * No dispatcher can be locked by this thread when calling this function.
2455  *
2456  *
2457  * NOTE:
2458  *      If a matching dispatcher is found, it is locked after this function
2459  *      returns, and must be unlocked by the caller.
2460  */
2461 static isc_result_t
2462 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2463               unsigned int attributes, unsigned int mask,
2464               dns_dispatch_t **dispp)
2465 {
2466         dns_dispatch_t *disp;
2467         isc_result_t result;
2468
2469         /*
2470          * Make certain that we will not match a private or exclusive dispatch.
2471          */
2472         attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2473         mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2474
2475         disp = ISC_LIST_HEAD(mgr->list);
2476         while (disp != NULL) {
2477                 LOCK(&disp->lock);
2478                 if ((disp->shutting_down == 0)
2479                     && ATTRMATCH(disp->attributes, attributes, mask)
2480                     && local_addr_match(disp, local))
2481                         break;
2482                 UNLOCK(&disp->lock);
2483                 disp = ISC_LIST_NEXT(disp, link);
2484         }
2485
2486         if (disp == NULL) {
2487                 result = ISC_R_NOTFOUND;
2488                 goto out;
2489         }
2490
2491         *dispp = disp;
2492         result = ISC_R_SUCCESS;
2493  out:
2494
2495         return (result);
2496 }
2497
2498 static isc_result_t
2499 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2500              unsigned int increment, dns_qid_t **qidp,
2501              isc_boolean_t needsocktable)
2502 {
2503         dns_qid_t *qid;
2504         unsigned int i;
2505         isc_result_t result;
2506
2507         REQUIRE(VALID_DISPATCHMGR(mgr));
2508         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2509         REQUIRE(increment > buckets);
2510         REQUIRE(qidp != NULL && *qidp == NULL);
2511
2512         qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2513         if (qid == NULL)
2514                 return (ISC_R_NOMEMORY);
2515
2516         qid->qid_table = isc_mem_get(mgr->mctx,
2517                                      buckets * sizeof(dns_displist_t));
2518         if (qid->qid_table == NULL) {
2519                 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2520                 return (ISC_R_NOMEMORY);
2521         }
2522
2523         qid->sock_table = NULL;
2524         if (needsocktable) {
2525                 qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2526                                               sizeof(dispsocketlist_t));
2527                 if (qid->sock_table == NULL) {
2528                         isc_mem_put(mgr->mctx, qid->qid_table,
2529                                     buckets * sizeof(dns_displist_t));
2530                         isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2531                         return (ISC_R_NOMEMORY);
2532                 }
2533         }
2534
2535         result = isc_mutex_init(&qid->lock);
2536         if (result != ISC_R_SUCCESS) {
2537                 if (qid->sock_table != NULL) {
2538                         isc_mem_put(mgr->mctx, qid->sock_table,
2539                                     buckets * sizeof(dispsocketlist_t));
2540                 }
2541                 isc_mem_put(mgr->mctx, qid->qid_table,
2542                             buckets * sizeof(dns_displist_t));
2543                 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2544                 return (result);
2545         }
2546
2547         for (i = 0; i < buckets; i++) {
2548                 ISC_LIST_INIT(qid->qid_table[i]);
2549                 if (qid->sock_table != NULL)
2550                         ISC_LIST_INIT(qid->sock_table[i]);
2551         }
2552
2553         qid->qid_nbuckets = buckets;
2554         qid->qid_increment = increment;
2555         qid->magic = QID_MAGIC;
2556         *qidp = qid;
2557         return (ISC_R_SUCCESS);
2558 }
2559
2560 static void
2561 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2562         dns_qid_t *qid;
2563
2564         REQUIRE(qidp != NULL);
2565         qid = *qidp;
2566
2567         REQUIRE(VALID_QID(qid));
2568
2569         *qidp = NULL;
2570         qid->magic = 0;
2571         isc_mem_put(mctx, qid->qid_table,
2572                     qid->qid_nbuckets * sizeof(dns_displist_t));
2573         if (qid->sock_table != NULL) {
2574                 isc_mem_put(mctx, qid->sock_table,
2575                             qid->qid_nbuckets * sizeof(dispsocketlist_t));
2576         }
2577         DESTROYLOCK(&qid->lock);
2578         isc_mem_put(mctx, qid, sizeof(*qid));
2579 }
2580
2581 /*
2582  * Allocate and set important limits.
2583  */
2584 static isc_result_t
2585 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2586                   dns_dispatch_t **dispp)
2587 {
2588         dns_dispatch_t *disp;
2589         isc_result_t result;
2590
2591         REQUIRE(VALID_DISPATCHMGR(mgr));
2592         REQUIRE(dispp != NULL && *dispp == NULL);
2593
2594         /*
2595          * Set up the dispatcher, mostly.  Don't bother setting some of
2596          * the options that are controlled by tcp vs. udp, etc.
2597          */
2598
2599         disp = isc_mempool_get(mgr->dpool);
2600         if (disp == NULL)
2601                 return (ISC_R_NOMEMORY);
2602
2603         disp->magic = 0;
2604         disp->mgr = mgr;
2605         disp->maxrequests = maxrequests;
2606         disp->attributes = 0;
2607         ISC_LINK_INIT(disp, link);
2608         disp->refcount = 1;
2609         disp->recv_pending = 0;
2610         memset(&disp->local, 0, sizeof(disp->local));
2611         disp->localport = 0;
2612         disp->shutting_down = 0;
2613         disp->shutdown_out = 0;
2614         disp->connected = 0;
2615         disp->tcpmsg_valid = 0;
2616         disp->shutdown_why = ISC_R_UNEXPECTED;
2617         disp->requests = 0;
2618         disp->tcpbuffers = 0;
2619         disp->qid = NULL;
2620         ISC_LIST_INIT(disp->activesockets);
2621         ISC_LIST_INIT(disp->inactivesockets);
2622         disp->nsockets = 0;
2623         dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL);
2624         disp->port_table = NULL;
2625         disp->portpool = NULL;
2626
2627         result = isc_mutex_init(&disp->lock);
2628         if (result != ISC_R_SUCCESS)
2629                 goto deallocate;
2630
2631         disp->failsafe_ev = allocate_devent(disp);
2632         if (disp->failsafe_ev == NULL) {
2633                 result = ISC_R_NOMEMORY;
2634                 goto kill_lock;
2635         }
2636
2637         disp->magic = DISPATCH_MAGIC;
2638
2639         *dispp = disp;
2640         return (ISC_R_SUCCESS);
2641
2642         /*
2643          * error returns
2644          */
2645  kill_lock:
2646         DESTROYLOCK(&disp->lock);
2647  deallocate:
2648         isc_mempool_put(mgr->dpool, disp);
2649
2650         return (result);
2651 }
2652
2653
2654 /*
2655  * MUST be unlocked, and not used by anything.
2656  */
2657 static void
2658 dispatch_free(dns_dispatch_t **dispp) {
2659         dns_dispatch_t *disp;
2660         dns_dispatchmgr_t *mgr;
2661         int i;
2662
2663         REQUIRE(VALID_DISPATCH(*dispp));
2664         disp = *dispp;
2665         *dispp = NULL;
2666
2667         mgr = disp->mgr;
2668         REQUIRE(VALID_DISPATCHMGR(mgr));
2669
2670         if (disp->tcpmsg_valid) {
2671                 dns_tcpmsg_invalidate(&disp->tcpmsg);
2672                 disp->tcpmsg_valid = 0;
2673         }
2674
2675         INSIST(disp->tcpbuffers == 0);
2676         INSIST(disp->requests == 0);
2677         INSIST(disp->recv_pending == 0);
2678         INSIST(ISC_LIST_EMPTY(disp->activesockets));
2679         INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2680
2681         isc_mempool_put(mgr->depool, disp->failsafe_ev);
2682         disp->failsafe_ev = NULL;
2683
2684         if (disp->qid != NULL)
2685                 qid_destroy(mgr->mctx, &disp->qid);
2686
2687         if (disp->port_table != NULL) {
2688                 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2689                         INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2690                 isc_mem_put(mgr->mctx, disp->port_table,
2691                             sizeof(disp->port_table[0]) *
2692                             DNS_DISPATCH_PORTTABLESIZE);
2693         }
2694
2695         if (disp->portpool != NULL)
2696                 isc_mempool_destroy(&disp->portpool);
2697
2698         disp->mgr = NULL;
2699         DESTROYLOCK(&disp->lock);
2700         disp->magic = 0;
2701         isc_mempool_put(mgr->dpool, disp);
2702 }
2703
2704 isc_result_t
2705 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2706                        isc_taskmgr_t *taskmgr, unsigned int buffersize,
2707                        unsigned int maxbuffers, unsigned int maxrequests,
2708                        unsigned int buckets, unsigned int increment,
2709                        unsigned int attributes, dns_dispatch_t **dispp)
2710 {
2711         isc_result_t result;
2712         dns_dispatch_t *disp;
2713
2714         UNUSED(maxbuffers);
2715         UNUSED(buffersize);
2716
2717         REQUIRE(VALID_DISPATCHMGR(mgr));
2718         REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2719         REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2720         REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2721
2722         attributes |= DNS_DISPATCHATTR_PRIVATE;  /* XXXMLG */
2723
2724         LOCK(&mgr->lock);
2725
2726         /*
2727          * dispatch_allocate() checks mgr for us.
2728          * qid_allocate() checks buckets and increment for us.
2729          */
2730         disp = NULL;
2731         result = dispatch_allocate(mgr, maxrequests, &disp);
2732         if (result != ISC_R_SUCCESS) {
2733                 UNLOCK(&mgr->lock);
2734                 return (result);
2735         }
2736
2737         result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2738         if (result != ISC_R_SUCCESS)
2739                 goto deallocate_dispatch;
2740
2741         disp->socktype = isc_sockettype_tcp;
2742         disp->socket = NULL;
2743         isc_socket_attach(sock, &disp->socket);
2744
2745         disp->sepool = NULL;
2746
2747         disp->ntasks = 1;
2748         disp->task[0] = NULL;
2749         result = isc_task_create(taskmgr, 0, &disp->task[0]);
2750         if (result != ISC_R_SUCCESS)
2751                 goto kill_socket;
2752
2753         disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2754                                             DNS_EVENT_DISPATCHCONTROL,
2755                                             destroy_disp, disp,
2756                                             sizeof(isc_event_t));
2757         if (disp->ctlevent == NULL) {
2758                 result = ISC_R_NOMEMORY;
2759                 goto kill_task;
2760         }
2761
2762         isc_task_setname(disp->task[0], "tcpdispatch", disp);
2763
2764         dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2765         disp->tcpmsg_valid = 1;
2766
2767         disp->attributes = attributes;
2768
2769         /*
2770          * Append it to the dispatcher list.
2771          */
2772         ISC_LIST_APPEND(mgr->list, disp, link);
2773         UNLOCK(&mgr->lock);
2774
2775         mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2776         dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2777
2778         *dispp = disp;
2779
2780         return (ISC_R_SUCCESS);
2781
2782         /*
2783          * Error returns.
2784          */
2785  kill_task:
2786         isc_task_detach(&disp->task[0]);
2787  kill_socket:
2788         isc_socket_detach(&disp->socket);
2789  deallocate_dispatch:
2790         dispatch_free(&disp);
2791
2792         UNLOCK(&mgr->lock);
2793
2794         return (result);
2795 }
2796
2797 isc_result_t
2798 dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2799                     isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2800                     unsigned int buffersize,
2801                     unsigned int maxbuffers, unsigned int maxrequests,
2802                     unsigned int buckets, unsigned int increment,
2803                     unsigned int attributes, unsigned int mask,
2804                     dns_dispatch_t **dispp, dns_dispatch_t *dup_dispatch)
2805 {
2806         isc_result_t result;
2807         dns_dispatch_t *disp = NULL;
2808
2809         REQUIRE(VALID_DISPATCHMGR(mgr));
2810         REQUIRE(sockmgr != NULL);
2811         REQUIRE(localaddr != NULL);
2812         REQUIRE(taskmgr != NULL);
2813         REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2814         REQUIRE(maxbuffers > 0);
2815         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2816         REQUIRE(increment > buckets);
2817         REQUIRE(dispp != NULL && *dispp == NULL);
2818         REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2819
2820         result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2821                                         maxrequests, buckets, increment);
2822         if (result != ISC_R_SUCCESS)
2823                 return (result);
2824
2825         LOCK(&mgr->lock);
2826
2827         if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2828                 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2829                 goto createudp;
2830         }
2831
2832         /*
2833          * See if we have a dispatcher that matches.
2834          */
2835         if (dup_dispatch == NULL) {
2836                 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2837                 if (result == ISC_R_SUCCESS) {
2838                         disp->refcount++;
2839
2840                         if (disp->maxrequests < maxrequests)
2841                                 disp->maxrequests = maxrequests;
2842
2843                         if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0
2844                             && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2845                         {
2846                                 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2847                                 if (disp->recv_pending != 0)
2848                                         isc_socket_cancel(disp->socket,
2849                                                           disp->task[0],
2850                                                           ISC_SOCKCANCEL_RECV);
2851                         }
2852
2853                         UNLOCK(&disp->lock);
2854                         UNLOCK(&mgr->lock);
2855
2856                         *dispp = disp;
2857
2858                         return (ISC_R_SUCCESS);
2859                 }
2860         }
2861
2862  createudp:
2863         /*
2864          * Nope, create one.
2865          */
2866         result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2867                                     maxrequests, attributes, &disp,
2868                                     dup_dispatch == NULL
2869                                             ? NULL
2870                                             : dup_dispatch->socket);
2871
2872         if (result != ISC_R_SUCCESS) {
2873                 UNLOCK(&mgr->lock);
2874                 return (result);
2875         }
2876
2877         UNLOCK(&mgr->lock);
2878         *dispp = disp;
2879
2880         return (ISC_R_SUCCESS);
2881 }
2882
2883 isc_result_t
2884 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2885                     isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2886                     unsigned int buffersize,
2887                     unsigned int maxbuffers, unsigned int maxrequests,
2888                     unsigned int buckets, unsigned int increment,
2889                     unsigned int attributes, unsigned int mask,
2890                     dns_dispatch_t **dispp)
2891 {
2892         return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr,
2893                                         buffersize, maxbuffers, maxrequests,
2894                                         buckets, increment, attributes,
2895                                         mask, dispp, NULL));
2896 }
2897
2898 /*
2899  * mgr should be locked.
2900  */
2901
2902 #ifndef DNS_DISPATCH_HELD
2903 #define DNS_DISPATCH_HELD 20U
2904 #endif
2905
2906 static isc_result_t
2907 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2908               isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2909               isc_socket_t **sockp, isc_socket_t *dup_socket)
2910 {
2911         unsigned int i, j;
2912         isc_socket_t *held[DNS_DISPATCH_HELD];
2913         isc_sockaddr_t localaddr_bound;
2914         isc_socket_t *sock = NULL;
2915         isc_result_t result = ISC_R_SUCCESS;
2916         isc_boolean_t anyport;
2917
2918         INSIST(sockp != NULL && *sockp == NULL);
2919
2920         localaddr_bound = *localaddr;
2921         anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2922
2923         if (anyport) {
2924                 unsigned int nports;
2925                 in_port_t *ports;
2926
2927                 /*
2928                  * If no port is specified, we first try to pick up a random
2929                  * port by ourselves.
2930                  */
2931                 if (isc_sockaddr_pf(localaddr) == AF_INET) {
2932                         nports = disp->mgr->nv4ports;
2933                         ports = disp->mgr->v4ports;
2934                 } else {
2935                         nports = disp->mgr->nv6ports;
2936                         ports = disp->mgr->v6ports;
2937                 }
2938                 if (nports == 0)
2939                         return (ISC_R_ADDRNOTAVAIL);
2940
2941                 for (i = 0; i < 1024; i++) {
2942                         in_port_t prt;
2943
2944                         prt = ports[dispatch_uniformrandom(
2945                                         DISP_ARC4CTX(disp),
2946                                         nports)];
2947                         isc_sockaddr_setport(&localaddr_bound, prt);
2948                         result = open_socket(sockmgr, &localaddr_bound,
2949                                              0, &sock, NULL);
2950                         /*
2951                          * Continue if the port choosen is already in use
2952                          * or the OS has reserved it.
2953                          */
2954                         if (result == ISC_R_NOPERM ||
2955                             result == ISC_R_ADDRINUSE)
2956                                 continue;
2957                         disp->localport = prt;
2958                         *sockp = sock;
2959                         return (result);
2960                 }
2961
2962                 /*
2963                  * If this fails 1024 times, we then ask the kernel for
2964                  * choosing one.
2965                  */
2966         } else {
2967                 /* Allow to reuse address for non-random ports. */
2968                 result = open_socket(sockmgr, localaddr,
2969                                      ISC_SOCKET_REUSEADDRESS, &sock,
2970                                      dup_socket);
2971
2972                 if (result == ISC_R_SUCCESS)
2973                         *sockp = sock;
2974
2975                 return (result);
2976         }
2977
2978         memset(held, 0, sizeof(held));
2979         i = 0;
2980
2981         for (j = 0; j < 0xffffU; j++) {
2982                 result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
2983                 if (result != ISC_R_SUCCESS)
2984                         goto end;
2985                 else if (portavailable(mgr, sock, NULL))
2986                         break;
2987                 if (held[i] != NULL)
2988                         isc_socket_detach(&held[i]);
2989                 held[i++] = sock;
2990                 sock = NULL;
2991                 if (i == DNS_DISPATCH_HELD)
2992                         i = 0;
2993         }
2994         if (j == 0xffffU) {
2995                 mgr_log(mgr, ISC_LOG_ERROR,
2996                         "avoid-v%s-udp-ports: unable to allocate "
2997                         "an available port",
2998                         isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2999                 result = ISC_R_FAILURE;
3000                 goto end;
3001         }
3002         *sockp = sock;
3003
3004 end:
3005         for (i = 0; i < DNS_DISPATCH_HELD; i++) {
3006                 if (held[i] != NULL)
3007                         isc_socket_detach(&held[i]);
3008         }
3009
3010         return (result);
3011 }
3012
3013 static isc_result_t
3014 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
3015                    isc_taskmgr_t *taskmgr,
3016                    isc_sockaddr_t *localaddr,
3017                    unsigned int maxrequests,
3018                    unsigned int attributes,
3019                    dns_dispatch_t **dispp,
3020                    isc_socket_t *dup_socket)
3021 {
3022         isc_result_t result;
3023         dns_dispatch_t *disp;
3024         isc_socket_t *sock = NULL;
3025         int i = 0;
3026
3027         /*
3028          * dispatch_allocate() checks mgr for us.
3029          */
3030         disp = NULL;
3031         result = dispatch_allocate(mgr, maxrequests, &disp);
3032         if (result != ISC_R_SUCCESS)
3033                 return (result);
3034
3035         if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
3036                 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock,
3037                                        dup_socket);
3038                 if (result != ISC_R_SUCCESS)
3039                         goto deallocate_dispatch;
3040
3041                 if (isc_log_wouldlog(dns_lctx, 90)) {
3042                         char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3043
3044                         isc_sockaddr_format(localaddr, addrbuf,
3045                                             ISC_SOCKADDR_FORMATSIZE);
3046                         mgr_log(mgr, LVL(90), "dns_dispatch_createudp: Created"
3047                                 " UDP dispatch for %s with socket fd %d\n",
3048                                 addrbuf, isc_socket_getfd(sock));
3049                 }
3050
3051         } else {
3052                 isc_sockaddr_t sa_any;
3053
3054                 /*
3055                  * For dispatches using exclusive sockets with a specific
3056                  * source address, we only check if the specified address is
3057                  * available on the system.  Query sockets will be created later
3058                  * on demand.
3059                  */
3060                 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
3061                 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
3062                         result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
3063                         if (sock != NULL)
3064                                 isc_socket_detach(&sock);
3065                         if (result != ISC_R_SUCCESS)
3066                                 goto deallocate_dispatch;
3067                 }
3068
3069                 disp->port_table = isc_mem_get(mgr->mctx,
3070                                                sizeof(disp->port_table[0]) *
3071                                                DNS_DISPATCH_PORTTABLESIZE);
3072                 if (disp->port_table == NULL)
3073                         goto deallocate_dispatch;
3074                 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
3075                         ISC_LIST_INIT(disp->port_table[i]);
3076
3077                 result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
3078                                             &disp->portpool);
3079                 if (result != ISC_R_SUCCESS)
3080                         goto deallocate_dispatch;
3081                 isc_mempool_setname(disp->portpool, "disp_portpool");
3082                 isc_mempool_setfreemax(disp->portpool, 128);
3083         }
3084         disp->socktype = isc_sockettype_udp;
3085         disp->socket = sock;
3086         disp->local = *localaddr;
3087
3088         if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3089                 disp->ntasks = MAX_INTERNAL_TASKS;
3090         else
3091                 disp->ntasks = 1;
3092         for (i = 0; i < disp->ntasks; i++) {
3093                 disp->task[i] = NULL;
3094                 result = isc_task_create(taskmgr, 0, &disp->task[i]);
3095                 if (result != ISC_R_SUCCESS) {
3096                         while (--i >= 0) {
3097                                 isc_task_shutdown(disp->task[i]);
3098                                 isc_task_detach(&disp->task[i]);
3099                         }
3100                         goto kill_socket;
3101                 }
3102                 isc_task_setname(disp->task[i], "udpdispatch", disp);
3103         }
3104
3105         disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
3106                                             DNS_EVENT_DISPATCHCONTROL,
3107                                             destroy_disp, disp,
3108                                             sizeof(isc_event_t));
3109         if (disp->ctlevent == NULL) {
3110                 result = ISC_R_NOMEMORY;
3111                 goto kill_task;
3112         }
3113
3114         disp->sepool = NULL;
3115         if (isc_mempool_create(mgr->mctx, sizeof(isc_socketevent_t),
3116                                &disp->sepool) != ISC_R_SUCCESS)
3117         {
3118                 result = ISC_R_NOMEMORY;
3119                 goto kill_ctlevent;
3120         }
3121
3122         result = isc_mutex_init(&disp->sepool_lock);
3123         if (result != ISC_R_SUCCESS)
3124                 goto kill_sepool;
3125
3126         isc_mempool_setname(disp->sepool, "disp_sepool");
3127         isc_mempool_setmaxalloc(disp->sepool, 32768);
3128         isc_mempool_setfreemax(disp->sepool, 32768);
3129         isc_mempool_associatelock(disp->sepool, &disp->sepool_lock);
3130         isc_mempool_setfillcount(disp->sepool, 16);
3131
3132         attributes &= ~DNS_DISPATCHATTR_TCP;
3133         attributes |= DNS_DISPATCHATTR_UDP;
3134         disp->attributes = attributes;
3135
3136         /*
3137          * Append it to the dispatcher list.
3138          */
3139         ISC_LIST_APPEND(mgr->list, disp, link);
3140
3141         mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
3142         dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
3143         if (disp->socket != NULL)
3144                 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
3145
3146         *dispp = disp;
3147
3148         return (result);
3149
3150         /*
3151          * Error returns.
3152          */
3153  kill_sepool:
3154         isc_mempool_destroy(&disp->sepool);
3155  kill_ctlevent:
3156         isc_event_free(&disp->ctlevent);
3157  kill_task:
3158         for (i = 0; i < disp->ntasks; i++)
3159                 isc_task_detach(&disp->task[i]);
3160  kill_socket:
3161         if (disp->socket != NULL)
3162                 isc_socket_detach(&disp->socket);
3163  deallocate_dispatch:
3164         dispatch_free(&disp);
3165
3166         return (result);
3167 }
3168
3169 void
3170 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
3171         REQUIRE(VALID_DISPATCH(disp));
3172         REQUIRE(dispp != NULL && *dispp == NULL);
3173
3174         LOCK(&disp->lock);
3175         disp->refcount++;
3176         UNLOCK(&disp->lock);
3177
3178         *dispp = disp;
3179 }
3180
3181 /*
3182  * It is important to lock the manager while we are deleting the dispatch,
3183  * since dns_dispatch_getudp will call dispatch_find, which returns to
3184  * the caller a dispatch but does not attach to it until later.  _getudp
3185  * locks the manager, however, so locking it here will keep us from attaching
3186  * to a dispatcher that is in the process of going away.
3187  */
3188 void
3189 dns_dispatch_detach(dns_dispatch_t **dispp) {
3190         dns_dispatch_t *disp;
3191         dispsocket_t *dispsock;
3192         isc_boolean_t killit;
3193
3194         REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
3195
3196         disp = *dispp;
3197         *dispp = NULL;
3198
3199         LOCK(&disp->lock);
3200
3201         INSIST(disp->refcount > 0);
3202         disp->refcount--;
3203         if (disp->refcount == 0) {
3204                 if (disp->recv_pending > 0)
3205                         isc_socket_cancel(disp->socket, disp->task[0],
3206                                           ISC_SOCKCANCEL_RECV);
3207                 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3208                      dispsock != NULL;
3209                      dispsock = ISC_LIST_NEXT(dispsock, link)) {
3210                         isc_socket_cancel(dispsock->socket, dispsock->task,
3211                                           ISC_SOCKCANCEL_RECV);
3212                 }
3213                 disp->shutting_down = 1;
3214         }
3215
3216         dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
3217
3218         killit = destroy_disp_ok(disp);
3219         UNLOCK(&disp->lock);
3220         if (killit)
3221                 isc_task_send(disp->task[0], &disp->ctlevent);
3222 }
3223
3224 isc_result_t
3225 dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3226                           isc_task_t *task, isc_taskaction_t action, void *arg,
3227                           dns_messageid_t *idp, dns_dispentry_t **resp,
3228                           isc_socketmgr_t *sockmgr)
3229 {
3230         dns_dispentry_t *res;
3231         unsigned int bucket;
3232         in_port_t localport = 0;
3233         dns_messageid_t id;
3234         int i;
3235         isc_boolean_t ok;
3236         dns_qid_t *qid;
3237         dispsocket_t *dispsocket = NULL;
3238         isc_result_t result;
3239
3240         REQUIRE(VALID_DISPATCH(disp));
3241         REQUIRE(task != NULL);
3242         REQUIRE(dest != NULL);
3243         REQUIRE(resp != NULL && *resp == NULL);
3244         REQUIRE(idp != NULL);
3245         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3246                 REQUIRE(sockmgr != NULL);
3247
3248         LOCK(&disp->lock);
3249
3250         if (disp->shutting_down == 1) {
3251                 UNLOCK(&disp->lock);
3252                 return (ISC_R_SHUTTINGDOWN);
3253         }
3254
3255         if (disp->requests >= disp->maxrequests) {
3256                 UNLOCK(&disp->lock);
3257                 return (ISC_R_QUOTA);
3258         }
3259
3260         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
3261             disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
3262                 dispsocket_t *oldestsocket;
3263                 dns_dispentry_t *oldestresp;
3264                 dns_dispatchevent_t *rev;
3265
3266                 /*
3267                  * Kill oldest outstanding query if the number of sockets
3268                  * exceeds the quota to keep the room for new queries.
3269                  */
3270                 oldestsocket = ISC_LIST_HEAD(disp->activesockets);
3271                 oldestresp = oldestsocket->resp;
3272                 if (oldestresp != NULL && !oldestresp->item_out) {
3273                         rev = allocate_devent(oldestresp->disp);
3274                         if (rev != NULL) {
3275                                 rev->buffer.base = NULL;
3276                                 rev->result = ISC_R_CANCELED;
3277                                 rev->id = oldestresp->id;
3278                                 ISC_EVENT_INIT(rev, sizeof(*rev), 0,
3279                                                NULL, DNS_EVENT_DISPATCH,
3280                                                oldestresp->action,
3281                                                oldestresp->arg, oldestresp,
3282                                                NULL, NULL);
3283                                 oldestresp->item_out = ISC_TRUE;
3284                                 isc_task_send(oldestresp->task,
3285                                               ISC_EVENT_PTR(&rev));
3286                                 inc_stats(disp->mgr,
3287                                           dns_resstatscounter_dispabort);
3288                         }
3289                 }
3290
3291                 /*
3292                  * Move this entry to the tail so that it won't (easily) be
3293                  * examined before actually being canceled.
3294                  */
3295                 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3296                 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3297         }
3298
3299         qid = DNS_QID(disp);
3300
3301         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3302                 /*
3303                  * Get a separate UDP socket with a random port number.
3304                  */
3305                 result = get_dispsocket(disp, dest, sockmgr, &dispsocket,
3306                                         &localport);
3307                 if (result != ISC_R_SUCCESS) {
3308                         UNLOCK(&disp->lock);
3309                         inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3310                         return (result);
3311                 }
3312         } else {
3313                 localport = disp->localport;
3314         }
3315
3316         /*
3317          * Try somewhat hard to find an unique ID.
3318          */
3319         LOCK(&qid->lock);
3320         id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp));
3321         ok = ISC_FALSE;
3322         i = 0;
3323         do {
3324                 bucket = dns_hash(qid, dest, id, localport);
3325                 if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3326                         ok = ISC_TRUE;
3327                         break;
3328                 }
3329                 id += qid->qid_increment;
3330                 id &= 0x0000ffff;
3331         } while (i++ < 64);
3332         UNLOCK(&qid->lock);
3333
3334         if (!ok) {
3335                 UNLOCK(&disp->lock);
3336                 return (ISC_R_NOMORE);
3337         }
3338
3339         res = isc_mempool_get(disp->mgr->rpool);
3340         if (res == NULL) {
3341                 if (dispsocket != NULL)
3342                         destroy_dispsocket(disp, &dispsocket);
3343                 UNLOCK(&disp->lock);
3344                 return (ISC_R_NOMEMORY);
3345         }
3346
3347         disp->refcount++;
3348         disp->requests++;
3349         res->task = NULL;
3350         isc_task_attach(task, &res->task);
3351         res->disp = disp;
3352         res->id = id;
3353         res->port = localport;
3354         res->bucket = bucket;
3355         res->host = *dest;
3356         res->action = action;
3357         res->arg = arg;
3358         res->dispsocket = dispsocket;
3359         if (dispsocket != NULL)
3360                 dispsocket->resp = res;
3361         res->item_out = ISC_FALSE;
3362         ISC_LIST_INIT(res->items);
3363         ISC_LINK_INIT(res, link);
3364         res->magic = RESPONSE_MAGIC;
3365
3366         LOCK(&qid->lock);
3367         ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3368         UNLOCK(&qid->lock);
3369
3370         request_log(disp, res, LVL(90),
3371                     "attached to task %p", res->task);
3372
3373         if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3374             ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3375                 result = startrecv(disp, dispsocket);
3376                 if (result != ISC_R_SUCCESS) {
3377                         LOCK(&qid->lock);
3378                         ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3379                         UNLOCK(&qid->lock);
3380
3381                         if (dispsocket != NULL)
3382                                 destroy_dispsocket(disp, &dispsocket);
3383
3384                         disp->refcount--;
3385                         disp->requests--;
3386
3387                         UNLOCK(&disp->lock);
3388                         isc_task_detach(&res->task);
3389                         isc_mempool_put(disp->mgr->rpool, res);
3390                         return (result);
3391                 }
3392         }
3393
3394         if (dispsocket != NULL)
3395                 ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3396
3397         UNLOCK(&disp->lock);
3398
3399         *idp = id;
3400         *resp = res;
3401
3402         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3403                 INSIST(res->dispsocket != NULL);
3404
3405         return (ISC_R_SUCCESS);
3406 }
3407
3408 isc_result_t
3409 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3410                          isc_task_t *task, isc_taskaction_t action, void *arg,
3411                          dns_messageid_t *idp, dns_dispentry_t **resp)
3412 {
3413         REQUIRE(VALID_DISPATCH(disp));
3414         REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3415
3416         return (dns_dispatch_addresponse2(disp, dest, task, action, arg,
3417                                           idp, resp, NULL));
3418 }
3419
3420 void
3421 dns_dispatch_starttcp(dns_dispatch_t *disp) {
3422
3423         REQUIRE(VALID_DISPATCH(disp));
3424
3425         dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3426
3427         LOCK(&disp->lock);
3428         disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3429         (void)startrecv(disp, NULL);
3430         UNLOCK(&disp->lock);
3431 }
3432
3433 void
3434 dns_dispatch_removeresponse(dns_dispentry_t **resp,
3435                             dns_dispatchevent_t **sockevent)
3436 {
3437         dns_dispatchmgr_t *mgr;
3438         dns_dispatch_t *disp;
3439         dns_dispentry_t *res;
3440         dispsocket_t *dispsock;
3441         dns_dispatchevent_t *ev;
3442         unsigned int bucket;
3443         isc_boolean_t killit;
3444         unsigned int n;
3445         isc_eventlist_t events;
3446         dns_qid_t *qid;
3447
3448         REQUIRE(resp != NULL);
3449         REQUIRE(VALID_RESPONSE(*resp));
3450
3451         res = *resp;
3452         *resp = NULL;
3453
3454         disp = res->disp;
3455         REQUIRE(VALID_DISPATCH(disp));
3456         mgr = disp->mgr;
3457         REQUIRE(VALID_DISPATCHMGR(mgr));
3458
3459         qid = DNS_QID(disp);
3460
3461         if (sockevent != NULL) {
3462                 REQUIRE(*sockevent != NULL);
3463                 ev = *sockevent;
3464                 *sockevent = NULL;
3465         } else {
3466                 ev = NULL;
3467         }
3468
3469         LOCK(&disp->lock);
3470
3471         INSIST(disp->requests > 0);
3472         disp->requests--;
3473         INSIST(disp->refcount > 0);
3474         disp->refcount--;
3475         if (disp->refcount == 0) {
3476                 if (disp->recv_pending > 0)
3477                         isc_socket_cancel(disp->socket, disp->task[0],
3478                                           ISC_SOCKCANCEL_RECV);
3479                 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3480                      dispsock != NULL;
3481                      dispsock = ISC_LIST_NEXT(dispsock, link)) {
3482                         isc_socket_cancel(dispsock->socket, dispsock->task,
3483                                           ISC_SOCKCANCEL_RECV);
3484                 }
3485                 disp->shutting_down = 1;
3486         }
3487
3488         bucket = res->bucket;
3489
3490         LOCK(&qid->lock);
3491         ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3492         UNLOCK(&qid->lock);
3493
3494         if (ev == NULL && res->item_out) {
3495                 /*
3496                  * We've posted our event, but the caller hasn't gotten it
3497                  * yet.  Take it back.
3498                  */
3499                 ISC_LIST_INIT(events);
3500                 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3501                                     NULL, &events);
3502                 /*
3503                  * We had better have gotten it back.
3504                  */
3505                 INSIST(n == 1);
3506                 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3507         }
3508
3509         if (ev != NULL) {
3510                 REQUIRE(res->item_out == ISC_TRUE);
3511                 res->item_out = ISC_FALSE;
3512                 if (ev->buffer.base != NULL)
3513                         free_buffer(disp, ev->buffer.base, ev->buffer.length);
3514                 free_devent(disp, ev);
3515         }
3516
3517         request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3518         isc_task_detach(&res->task);
3519
3520         if (res->dispsocket != NULL) {
3521                 isc_socket_cancel(res->dispsocket->socket,
3522                                   res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3523                 res->dispsocket->resp = NULL;
3524         }
3525
3526         /*
3527          * Free any buffered requests as well
3528          */
3529         ev = ISC_LIST_HEAD(res->items);
3530         while (ev != NULL) {
3531                 ISC_LIST_UNLINK(res->items, ev, ev_link);
3532                 if (ev->buffer.base != NULL)
3533                         free_buffer(disp, ev->buffer.base, ev->buffer.length);
3534                 free_devent(disp, ev);
3535                 ev = ISC_LIST_HEAD(res->items);
3536         }
3537         res->magic = 0;
3538         isc_mempool_put(disp->mgr->rpool, res);
3539         if (disp->shutting_down == 1)
3540                 do_cancel(disp);
3541         else
3542                 (void)startrecv(disp, NULL);
3543
3544         killit = destroy_disp_ok(disp);
3545         UNLOCK(&disp->lock);
3546         if (killit)
3547                 isc_task_send(disp->task[0], &disp->ctlevent);
3548 }
3549
3550 static void
3551 do_cancel(dns_dispatch_t *disp) {
3552         dns_dispatchevent_t *ev;
3553         dns_dispentry_t *resp;
3554         dns_qid_t *qid;
3555
3556         if (disp->shutdown_out == 1)
3557                 return;
3558
3559         qid = DNS_QID(disp);
3560
3561         /*
3562          * Search for the first response handler without packets outstanding
3563          * unless a specific hander is given.
3564          */
3565         LOCK(&qid->lock);
3566         for (resp = linear_first(qid);
3567              resp != NULL && resp->item_out;
3568              /* Empty. */)
3569                 resp = linear_next(qid, resp);
3570
3571         /*
3572          * No one to send the cancel event to, so nothing to do.
3573          */
3574         if (resp == NULL)
3575                 goto unlock;
3576
3577         /*
3578          * Send the shutdown failsafe event to this resp.
3579          */
3580         ev = disp->failsafe_ev;
3581         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3582                        resp->action, resp->arg, resp, NULL, NULL);
3583         ev->result = disp->shutdown_why;
3584         ev->buffer.base = NULL;
3585         ev->buffer.length = 0;
3586         disp->shutdown_out = 1;
3587         request_log(disp, resp, LVL(10),
3588                     "cancel: failsafe event %p -> task %p",
3589                     ev, resp->task);
3590         resp->item_out = ISC_TRUE;
3591         isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3592  unlock:
3593         UNLOCK(&qid->lock);
3594 }
3595
3596 isc_socket_t *
3597 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3598         REQUIRE(VALID_DISPATCH(disp));
3599
3600         return (disp->socket);
3601 }
3602
3603 isc_socket_t *
3604 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3605         REQUIRE(VALID_RESPONSE(resp));
3606
3607         if (resp->dispsocket != NULL)
3608                 return (resp->dispsocket->socket);
3609         else
3610                 return (NULL);
3611 }
3612
3613 isc_result_t
3614 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3615
3616         REQUIRE(VALID_DISPATCH(disp));
3617         REQUIRE(addrp != NULL);
3618
3619         if (disp->socktype == isc_sockettype_udp) {
3620                 *addrp = disp->local;
3621                 return (ISC_R_SUCCESS);
3622         }
3623         return (ISC_R_NOTIMPLEMENTED);
3624 }
3625
3626 void
3627 dns_dispatch_cancel(dns_dispatch_t *disp) {
3628         REQUIRE(VALID_DISPATCH(disp));
3629
3630         LOCK(&disp->lock);
3631
3632         if (disp->shutting_down == 1) {
3633                 UNLOCK(&disp->lock);
3634                 return;
3635         }
3636
3637         disp->shutdown_why = ISC_R_CANCELED;
3638         disp->shutting_down = 1;
3639         do_cancel(disp);
3640
3641         UNLOCK(&disp->lock);
3642
3643         return;
3644 }
3645
3646 unsigned int
3647 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3648         REQUIRE(VALID_DISPATCH(disp));
3649
3650         /*
3651          * We don't bother locking disp here; it's the caller's responsibility
3652          * to use only non volatile flags.
3653          */
3654         return (disp->attributes);
3655 }
3656
3657 void
3658 dns_dispatch_changeattributes(dns_dispatch_t *disp,
3659                               unsigned int attributes, unsigned int mask)
3660 {
3661         REQUIRE(VALID_DISPATCH(disp));
3662         /* Exclusive attribute can only be set on creation */
3663         REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3664         /* Also, a dispatch with randomport specified cannot start listening */
3665         REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3666                 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3667
3668         /* XXXMLG
3669          * Should check for valid attributes here!
3670          */
3671
3672         LOCK(&disp->lock);
3673
3674         if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3675                 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3676                     (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3677                         disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3678                         (void)startrecv(disp, NULL);
3679                 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3680                            == 0 &&
3681                            (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3682                         disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3683                         if (disp->recv_pending != 0)
3684                                 isc_socket_cancel(disp->socket, disp->task[0],
3685                                                   ISC_SOCKCANCEL_RECV);
3686                 }
3687         }
3688
3689         disp->attributes &= ~mask;
3690         disp->attributes |= (attributes & mask);
3691         UNLOCK(&disp->lock);
3692 }
3693
3694 void
3695 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3696         void *buf;
3697         isc_socketevent_t *sevent, *newsevent;
3698
3699         REQUIRE(VALID_DISPATCH(disp));
3700         REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3701         REQUIRE(event != NULL);
3702
3703         sevent = (isc_socketevent_t *)event;
3704
3705         INSIST(sevent->n <= disp->mgr->buffersize);
3706         newsevent = (isc_socketevent_t *)
3707                     isc_event_allocate(disp->mgr->mctx, NULL,
3708                                       DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3709                                       disp, sizeof(isc_socketevent_t));
3710         if (newsevent == NULL)
3711                 return;
3712
3713         buf = allocate_udp_buffer(disp);
3714         if (buf == NULL) {
3715                 isc_event_free(ISC_EVENT_PTR(&newsevent));
3716                 return;
3717         }
3718         memmove(buf, sevent->region.base, sevent->n);
3719         newsevent->region.base = buf;
3720         newsevent->region.length = disp->mgr->buffersize;
3721         newsevent->n = sevent->n;
3722         newsevent->result = sevent->result;
3723         newsevent->address = sevent->address;
3724         newsevent->timestamp = sevent->timestamp;
3725         newsevent->pktinfo = sevent->pktinfo;
3726         newsevent->attributes = sevent->attributes;
3727
3728         isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3729 }
3730
3731 dns_dispatch_t *
3732 dns_dispatchset_get(dns_dispatchset_t *dset) {
3733         dns_dispatch_t *disp;
3734
3735         /* check that dispatch set is configured */
3736         if (dset == NULL || dset->ndisp == 0)
3737                 return (NULL);
3738
3739         LOCK(&dset->lock);
3740         disp = dset->dispatches[dset->cur];
3741         dset->cur++;
3742         if (dset->cur == dset->ndisp)
3743                 dset->cur = 0;
3744         UNLOCK(&dset->lock);
3745
3746         return (disp);
3747 }
3748
3749 isc_result_t
3750 dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr,
3751                        isc_taskmgr_t *taskmgr, dns_dispatch_t *source,
3752                        dns_dispatchset_t **dsetp, int n)
3753 {
3754         isc_result_t result;
3755         dns_dispatchset_t *dset;
3756         dns_dispatchmgr_t *mgr;
3757         int i, j;
3758
3759         REQUIRE(VALID_DISPATCH(source));
3760         REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0);
3761         REQUIRE(dsetp != NULL && *dsetp == NULL);
3762
3763         mgr = source->mgr;
3764
3765         dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t));
3766         if (dset == NULL)
3767                 return (ISC_R_NOMEMORY);
3768         memset(dset, 0, sizeof(*dset));
3769
3770         result = isc_mutex_init(&dset->lock);
3771         if (result != ISC_R_SUCCESS)
3772                 goto fail_alloc;
3773
3774         dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n);
3775         if (dset == NULL) {
3776                 result = ISC_R_NOMEMORY;
3777                 goto fail_lock;
3778         }
3779
3780         isc_mem_attach(mctx, &dset->mctx);
3781         dset->ndisp = n;
3782         dset->cur = 0;
3783
3784         dset->dispatches[0] = NULL;
3785         dns_dispatch_attach(source, &dset->dispatches[0]);
3786
3787         LOCK(&mgr->lock);
3788         for (i = 1; i < n; i++) {
3789                 dset->dispatches[i] = NULL;
3790                 result = dispatch_createudp(mgr, sockmgr, taskmgr,
3791                                             &source->local,
3792                                             source->maxrequests,
3793                                             source->attributes,
3794                                             &dset->dispatches[i],
3795                                             source->socket);
3796                 if (result != ISC_R_SUCCESS)
3797                         goto fail;
3798         }
3799
3800         UNLOCK(&mgr->lock);
3801         *dsetp = dset;
3802
3803         return (ISC_R_SUCCESS);
3804
3805  fail:
3806         UNLOCK(&mgr->lock);
3807
3808         for (j = 0; j < i; j++)
3809                 dns_dispatch_detach(&(dset->dispatches[j]));
3810         isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n);
3811         if (dset->mctx == mctx)
3812                 isc_mem_detach(&dset->mctx);
3813
3814  fail_lock:
3815         DESTROYLOCK(&dset->lock);
3816
3817  fail_alloc:
3818         isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t));
3819         return (result);
3820 }
3821
3822 void
3823 dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) {
3824         int i;
3825
3826         REQUIRE(dset != NULL);
3827
3828         for (i = 0; i < dset->ndisp; i++) {
3829                 isc_socket_t *sock;
3830                 sock = dns_dispatch_getsocket(dset->dispatches[i]);
3831                 isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL);
3832         }
3833 }
3834
3835 void
3836 dns_dispatchset_destroy(dns_dispatchset_t **dsetp) {
3837         dns_dispatchset_t *dset;
3838         int i;
3839
3840         REQUIRE(dsetp != NULL && *dsetp != NULL);
3841
3842         dset = *dsetp;
3843         for (i = 0; i < dset->ndisp; i++)
3844                 dns_dispatch_detach(&(dset->dispatches[i]));
3845         isc_mem_put(dset->mctx, dset->dispatches,
3846                     sizeof(dns_dispatch_t *) * dset->ndisp);
3847         DESTROYLOCK(&dset->lock);
3848         isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t));
3849
3850         *dsetp = NULL;
3851 }
3852
3853 #if 0
3854 void
3855 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3856         dns_dispatch_t *disp;
3857         char foo[1024];
3858
3859         disp = ISC_LIST_HEAD(mgr->list);
3860         while (disp != NULL) {
3861                 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3862                 printf("\tdispatch %p, addr %s\n", disp, foo);
3863                 disp = ISC_LIST_NEXT(disp, link);
3864         }
3865 }
3866 #endif