]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - contrib/bind9/lib/dns/dispatch.c
Update BIND to 9.9.7.
[FreeBSD/stable/9.git] / contrib / bind9 / lib / dns / dispatch.c
1 /*
2  * Copyright (C) 2004-2009, 2011-2015  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: dispatch.c,v 1.175 2011/11/29 01:03:47 marka Exp $ */
19
20 /*! \file */
21
22 #include <config.h>
23
24 #include <stdlib.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 #include <stdlib.h>
28
29 #include <isc/entropy.h>
30 #include <isc/mem.h>
31 #include <isc/mutex.h>
32 #include <isc/portset.h>
33 #include <isc/print.h>
34 #include <isc/random.h>
35 #include <isc/socket.h>
36 #include <isc/stats.h>
37 #include <isc/string.h>
38 #include <isc/task.h>
39 #include <isc/time.h>
40 #include <isc/util.h>
41
42 #include <dns/acl.h>
43 #include <dns/dispatch.h>
44 #include <dns/events.h>
45 #include <dns/log.h>
46 #include <dns/message.h>
47 #include <dns/portlist.h>
48 #include <dns/stats.h>
49 #include <dns/tcpmsg.h>
50 #include <dns/types.h>
51
52 typedef ISC_LIST(dns_dispentry_t)       dns_displist_t;
53
54 typedef struct dispsocket               dispsocket_t;
55 typedef ISC_LIST(dispsocket_t)          dispsocketlist_t;
56
57 typedef struct dispportentry            dispportentry_t;
58 typedef ISC_LIST(dispportentry_t)       dispportlist_t;
59
60 /* ARC4 Random generator state */
61 typedef struct arc4ctx {
62         isc_uint8_t     i;
63         isc_uint8_t     j;
64         isc_uint8_t     s[256];
65         int             count;
66         isc_entropy_t   *entropy;       /*%< entropy source for ARC4 */
67         isc_mutex_t     *lock;
68 } arc4ctx_t;
69
70 typedef struct dns_qid {
71         unsigned int    magic;
72         unsigned int    qid_nbuckets;   /*%< hash table size */
73         unsigned int    qid_increment;  /*%< id increment on collision */
74         isc_mutex_t     lock;
75         dns_displist_t  *qid_table;     /*%< the table itself */
76         dispsocketlist_t *sock_table;   /*%< socket table */
77 } dns_qid_t;
78
79 struct dns_dispatchmgr {
80         /* Unlocked. */
81         unsigned int                    magic;
82         isc_mem_t                      *mctx;
83         dns_acl_t                      *blackhole;
84         dns_portlist_t                 *portlist;
85         isc_stats_t                    *stats;
86         isc_entropy_t                  *entropy; /*%< entropy source */
87
88         /* Locked by "lock". */
89         isc_mutex_t                     lock;
90         unsigned int                    state;
91         ISC_LIST(dns_dispatch_t)        list;
92
93         /* Locked by arc4_lock. */
94         isc_mutex_t                     arc4_lock;
95         arc4ctx_t                       arc4ctx;    /*%< ARC4 context for QID */
96
97         /* locked by buffer lock */
98         dns_qid_t                       *qid;
99         isc_mutex_t                     buffer_lock;
100         unsigned int                    buffers;    /*%< allocated buffers */
101         unsigned int                    buffersize; /*%< size of each buffer */
102         unsigned int                    maxbuffers; /*%< max buffers */
103
104         /* Locked internally. */
105         isc_mutex_t                     depool_lock;
106         isc_mempool_t                  *depool; /*%< pool for dispatch events */
107         isc_mutex_t                     rpool_lock;
108         isc_mempool_t                  *rpool;  /*%< pool for replies */
109         isc_mutex_t                     dpool_lock;
110         isc_mempool_t                  *dpool;  /*%< dispatch allocations */
111         isc_mutex_t                     bpool_lock;
112         isc_mempool_t                  *bpool;  /*%< pool for buffers */
113         isc_mutex_t                     spool_lock;
114         isc_mempool_t                  *spool;  /*%< pool for dispsocks */
115
116         /*%
117          * Locked by qid->lock if qid exists; otherwise, can be used without
118          * being locked.
119          * Memory footprint considerations: this is a simple implementation of
120          * available ports, i.e., an ordered array of the actual port numbers.
121          * This will require about 256KB of memory in the worst case (128KB for
122          * each of IPv4 and IPv6).  We could reduce it by representing it as a
123          * more sophisticated way such as a list (or array) of ranges that are
124          * searched to identify a specific port.  Our decision here is the saved
125          * memory isn't worth the implementation complexity, considering the
126          * fact that the whole BIND9 process (which is mainly named) already
127          * requires a pretty large memory footprint.  We may, however, have to
128          * revisit the decision when we want to use it as a separate module for
129          * an environment where memory requirement is severer.
130          */
131         in_port_t       *v4ports;       /*%< available ports for IPv4 */
132         unsigned int    nv4ports;       /*%< # of available ports for IPv4 */
133         in_port_t       *v6ports;       /*%< available ports for IPv4 */
134         unsigned int    nv6ports;       /*%< # of available ports for IPv4 */
135 };
136
137 #define MGR_SHUTTINGDOWN                0x00000001U
138 #define MGR_IS_SHUTTINGDOWN(l)  (((l)->state & MGR_SHUTTINGDOWN) != 0)
139
140 #define IS_PRIVATE(d)   (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
141
142 struct dns_dispentry {
143         unsigned int                    magic;
144         dns_dispatch_t                 *disp;
145         dns_messageid_t                 id;
146         in_port_t                       port;
147         unsigned int                    bucket;
148         isc_sockaddr_t                  host;
149         isc_task_t                     *task;
150         isc_taskaction_t                action;
151         void                           *arg;
152         isc_boolean_t                   item_out;
153         dispsocket_t                    *dispsocket;
154         ISC_LIST(dns_dispatchevent_t)   items;
155         ISC_LINK(dns_dispentry_t)       link;
156 };
157
158 /*%
159  * Maximum number of dispatch sockets that can be pooled for reuse.  The
160  * appropriate value may vary, but experiments have shown a busy caching server
161  * may need more than 1000 sockets concurrently opened.  The maximum allowable
162  * number of dispatch sockets (per manager) will be set to the double of this
163  * value.
164  */
165 #ifndef DNS_DISPATCH_POOLSOCKS
166 #define DNS_DISPATCH_POOLSOCKS                  2048
167 #endif
168
169 /*%
170  * Quota to control the number of dispatch sockets.  If a dispatch has more
171  * than the quota of sockets, new queries will purge oldest ones, so that
172  * a massive number of outstanding queries won't prevent subsequent queries
173  * (especially if the older ones take longer time and result in timeout).
174  */
175 #ifndef DNS_DISPATCH_SOCKSQUOTA
176 #define DNS_DISPATCH_SOCKSQUOTA                 3072
177 #endif
178
179 struct dispsocket {
180         unsigned int                    magic;
181         isc_socket_t                    *socket;
182         dns_dispatch_t                  *disp;
183         isc_sockaddr_t                  host;
184         in_port_t                       localport; /* XXX: should be removed later */
185         dispportentry_t                 *portentry;
186         dns_dispentry_t                 *resp;
187         isc_task_t                      *task;
188         ISC_LINK(dispsocket_t)          link;
189         unsigned int                    bucket;
190         ISC_LINK(dispsocket_t)          blink;
191 };
192
193 /*%
194  * A port table entry.  We remember every port we first open in a table with a
195  * reference counter so that we can 'reuse' the same port (with different
196  * destination addresses) using the SO_REUSEADDR socket option.
197  */
198 struct dispportentry {
199         in_port_t                       port;
200         unsigned int                    refs;
201         ISC_LINK(struct dispportentry)  link;
202 };
203
204 #ifndef DNS_DISPATCH_PORTTABLESIZE
205 #define DNS_DISPATCH_PORTTABLESIZE      1024
206 #endif
207
208 #define INVALID_BUCKET          (0xffffdead)
209
210 /*%
211  * Number of tasks for each dispatch that use separate sockets for different
212  * transactions.  This must be a power of 2 as it will divide 32 bit numbers
213  * to get an uniformly random tasks selection.  See get_dispsocket().
214  */
215 #define MAX_INTERNAL_TASKS      64
216
217 struct dns_dispatch {
218         /* Unlocked. */
219         unsigned int            magic;          /*%< magic */
220         dns_dispatchmgr_t      *mgr;            /*%< dispatch manager */
221         int                     ntasks;
222         /*%
223          * internal task buckets.  We use multiple tasks to distribute various
224          * socket events well when using separate dispatch sockets.  We use the
225          * 1st task (task[0]) for internal control events.
226          */
227         isc_task_t             *task[MAX_INTERNAL_TASKS];
228         isc_socket_t           *socket;         /*%< isc socket attached to */
229         isc_sockaddr_t          local;          /*%< local address */
230         in_port_t               localport;      /*%< local UDP port */
231         unsigned int            maxrequests;    /*%< max requests */
232         isc_event_t            *ctlevent;
233
234         isc_mutex_t             sepool_lock;
235         isc_mempool_t          *sepool;         /*%< pool for socket events */
236
237         /*% Locked by mgr->lock. */
238         ISC_LINK(dns_dispatch_t) link;
239
240         /* Locked by "lock". */
241         isc_mutex_t             lock;           /*%< locks all below */
242         isc_sockettype_t        socktype;
243         unsigned int            attributes;
244         unsigned int            refcount;       /*%< number of users */
245         dns_dispatchevent_t    *failsafe_ev;    /*%< failsafe cancel event */
246         unsigned int            shutting_down : 1,
247                                 shutdown_out : 1,
248                                 connected : 1,
249                                 tcpmsg_valid : 1,
250                                 recv_pending : 1; /*%< is a recv() pending? */
251         isc_result_t            shutdown_why;
252         ISC_LIST(dispsocket_t)  activesockets;
253         ISC_LIST(dispsocket_t)  inactivesockets;
254         unsigned int            nsockets;
255         unsigned int            requests;       /*%< how many requests we have */
256         unsigned int            tcpbuffers;     /*%< allocated buffers */
257         dns_tcpmsg_t            tcpmsg;         /*%< for tcp streams */
258         dns_qid_t               *qid;
259         arc4ctx_t               arc4ctx;        /*%< for QID/UDP port num */
260         dispportlist_t          *port_table;    /*%< hold ports 'owned' by us */
261         isc_mempool_t           *portpool;      /*%< port table entries  */
262 };
263
264 #define QID_MAGIC               ISC_MAGIC('Q', 'i', 'd', ' ')
265 #define VALID_QID(e)            ISC_MAGIC_VALID((e), QID_MAGIC)
266
267 #define RESPONSE_MAGIC          ISC_MAGIC('D', 'r', 's', 'p')
268 #define VALID_RESPONSE(e)       ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
269
270 #define DISPSOCK_MAGIC          ISC_MAGIC('D', 's', 'o', 'c')
271 #define VALID_DISPSOCK(e)       ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
272
273 #define DISPATCH_MAGIC          ISC_MAGIC('D', 'i', 's', 'p')
274 #define VALID_DISPATCH(e)       ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
275
276 #define DNS_DISPATCHMGR_MAGIC   ISC_MAGIC('D', 'M', 'g', 'r')
277 #define VALID_DISPATCHMGR(e)    ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
278
279 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
280                        (disp)->qid : (disp)->mgr->qid
281 #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
282                         (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
283
284 /*%
285  * Locking a query port buffer is a bit tricky.  We access the buffer without
286  * locking until qid is created.  Technically, there is a possibility of race
287  * between the creation of qid and access to the port buffer; in practice,
288  * however, this should be safe because qid isn't created until the first
289  * dispatch is created and there should be no contending situation until then.
290  */
291 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
292 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
293
294 /*
295  * Statics.
296  */
297 static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
298                                      dns_messageid_t, in_port_t, unsigned int);
299 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
300 static void destroy_disp(isc_task_t *task, isc_event_t *event);
301 static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
302 static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
303 static void udp_exrecv(isc_task_t *, isc_event_t *);
304 static void udp_shrecv(isc_task_t *, isc_event_t *);
305 static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
306 static void tcp_recv(isc_task_t *, isc_event_t *);
307 static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
308 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
309                              in_port_t);
310 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
311 static void *allocate_udp_buffer(dns_dispatch_t *disp);
312 static inline void free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
313 static inline dns_dispatchevent_t *allocate_devent(dns_dispatch_t *disp);
314 static void do_cancel(dns_dispatch_t *disp);
315 static dns_dispentry_t *linear_first(dns_qid_t *disp);
316 static dns_dispentry_t *linear_next(dns_qid_t *disp,
317                                     dns_dispentry_t *resp);
318 static void dispatch_free(dns_dispatch_t **dispp);
319 static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
320                                   dns_dispatch_t *disp,
321                                   isc_socketmgr_t *sockmgr,
322                                   isc_sockaddr_t *localaddr,
323                                   isc_socket_t **sockp,
324                                   isc_socket_t *dup_socket);
325 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
326                                        isc_socketmgr_t *sockmgr,
327                                        isc_taskmgr_t *taskmgr,
328                                        isc_sockaddr_t *localaddr,
329                                        unsigned int maxrequests,
330                                        unsigned int attributes,
331                                        dns_dispatch_t **dispp,
332                                        isc_socket_t *dup_socket);
333 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
334 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
335 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
336                                  unsigned int increment, dns_qid_t **qidp,
337                                  isc_boolean_t needaddrtable);
338 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
339 static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
340                                 unsigned int options, isc_socket_t **sockp,
341                                 isc_socket_t *dup_socket);
342 static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
343                                    isc_sockaddr_t *sockaddrp);
344
345 #define LVL(x) ISC_LOG_DEBUG(x)
346
347 static void
348 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
349      ISC_FORMAT_PRINTF(3, 4);
350
351 static void
352 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
353         char msgbuf[2048];
354         va_list ap;
355
356         if (! isc_log_wouldlog(dns_lctx, level))
357                 return;
358
359         va_start(ap, fmt);
360         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
361         va_end(ap);
362
363         isc_log_write(dns_lctx,
364                       DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
365                       level, "dispatchmgr %p: %s", mgr, msgbuf);
366 }
367
368 static inline void
369 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
370         if (mgr->stats != NULL)
371                 isc_stats_increment(mgr->stats, counter);
372 }
373
374 static void
375 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
376      ISC_FORMAT_PRINTF(3, 4);
377
378 static void
379 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
380         char msgbuf[2048];
381         va_list ap;
382
383         if (! isc_log_wouldlog(dns_lctx, level))
384                 return;
385
386         va_start(ap, fmt);
387         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
388         va_end(ap);
389
390         isc_log_write(dns_lctx,
391                       DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
392                       level, "dispatch %p: %s", disp, msgbuf);
393 }
394
395 static void
396 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
397             int level, const char *fmt, ...)
398      ISC_FORMAT_PRINTF(4, 5);
399
400 static void
401 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
402             int level, const char *fmt, ...)
403 {
404         char msgbuf[2048];
405         char peerbuf[256];
406         va_list ap;
407
408         if (! isc_log_wouldlog(dns_lctx, level))
409                 return;
410
411         va_start(ap, fmt);
412         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
413         va_end(ap);
414
415         if (VALID_RESPONSE(resp)) {
416                 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
417                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
418                               DNS_LOGMODULE_DISPATCH, level,
419                               "dispatch %p response %p %s: %s", disp, resp,
420                               peerbuf, msgbuf);
421         } else {
422                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
423                               DNS_LOGMODULE_DISPATCH, level,
424                               "dispatch %p req/resp %p: %s", disp, resp,
425                               msgbuf);
426         }
427 }
428
429 /*%
430  * ARC4 random number generator derived from OpenBSD.
431  * Only dispatch_random() and dispatch_uniformrandom() are expected
432  * to be called from general dispatch routines; the rest of them are subroutines
433  * for these two.
434  *
435  * The original copyright follows:
436  * Copyright (c) 1996, David Mazieres <dm@uun.org>
437  * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
438  *
439  * Permission to use, copy, modify, and distribute this software for any
440  * purpose with or without fee is hereby granted, provided that the above
441  * copyright notice and this permission notice appear in all copies.
442  *
443  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
444  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
445  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
446  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
447  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
448  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
449  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
450  */
451 #ifdef BIND9
452 static void
453 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
454                     isc_mutex_t *lock)
455 {
456         int n;
457         for (n = 0; n < 256; n++)
458                 actx->s[n] = n;
459         actx->i = 0;
460         actx->j = 0;
461         actx->count = 0;
462         actx->entropy = entropy; /* don't have to attach */
463         actx->lock = lock;
464 }
465
466 static void
467 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
468         int n;
469         isc_uint8_t si;
470
471         actx->i--;
472         for (n = 0; n < 256; n++) {
473                 actx->i = (actx->i + 1);
474                 si = actx->s[actx->i];
475                 actx->j = (actx->j + si + dat[n % datlen]);
476                 actx->s[actx->i] = actx->s[actx->j];
477                 actx->s[actx->j] = si;
478         }
479         actx->j = actx->i;
480 }
481
482 static inline isc_uint8_t
483 dispatch_arc4get8(arc4ctx_t *actx) {
484         isc_uint8_t si, sj;
485
486         actx->i = (actx->i + 1);
487         si = actx->s[actx->i];
488         actx->j = (actx->j + si);
489         sj = actx->s[actx->j];
490         actx->s[actx->i] = sj;
491         actx->s[actx->j] = si;
492
493         return (actx->s[(si + sj) & 0xff]);
494 }
495
496 static inline isc_uint16_t
497 dispatch_arc4get16(arc4ctx_t *actx) {
498         isc_uint16_t val;
499
500         val = dispatch_arc4get8(actx) << 8;
501         val |= dispatch_arc4get8(actx);
502
503         return (val);
504 }
505
506 static void
507 dispatch_arc4stir(arc4ctx_t *actx) {
508         int i;
509         union {
510                 unsigned char rnd[128];
511                 isc_uint32_t rnd32[32];
512         } rnd;
513         isc_result_t result;
514
515         if (actx->entropy != NULL) {
516                 /*
517                  * We accept any quality of random data to avoid blocking.
518                  */
519                 result = isc_entropy_getdata(actx->entropy, rnd.rnd,
520                                              sizeof(rnd), NULL, 0);
521                 RUNTIME_CHECK(result == ISC_R_SUCCESS);
522         } else {
523                 for (i = 0; i < 32; i++)
524                         isc_random_get(&rnd.rnd32[i]);
525         }
526         dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
527
528         /*
529          * Discard early keystream, as per recommendations in:
530          * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
531          */
532         for (i = 0; i < 256; i++)
533                 (void)dispatch_arc4get8(actx);
534
535         /*
536          * Derived from OpenBSD's implementation.  The rationale is not clear,
537          * but should be conservative enough in safety, and reasonably large
538          * for efficiency.
539          */
540         actx->count = 1600000;
541 }
542
543 static isc_uint16_t
544 dispatch_random(arc4ctx_t *actx) {
545         isc_uint16_t result;
546
547         if (actx->lock != NULL)
548                 LOCK(actx->lock);
549
550         actx->count -= sizeof(isc_uint16_t);
551         if (actx->count <= 0)
552                 dispatch_arc4stir(actx);
553         result = dispatch_arc4get16(actx);
554
555         if (actx->lock != NULL)
556                 UNLOCK(actx->lock);
557
558         return (result);
559 }
560 #else
561 /*
562  * For general purpose library, we don't have to be too strict about the
563  * quality of random values.  Performance doesn't matter much, either.
564  * So we simply use the isc_random module to keep the library as small as
565  * possible.
566  */
567
568 static void
569 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
570                     isc_mutex_t *lock)
571 {
572         UNUSED(actx);
573         UNUSED(entropy);
574         UNUSED(lock);
575
576         return;
577 }
578
579 static isc_uint16_t
580 dispatch_random(arc4ctx_t *actx) {
581         isc_uint32_t r;
582
583         UNUSED(actx);
584
585         isc_random_get(&r);
586         return (r & 0xffff);
587 }
588 #endif  /* BIND9 */
589
590 static isc_uint16_t
591 dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
592         isc_uint16_t min, r;
593
594         if (upper_bound < 2)
595                 return (0);
596
597         /*
598          * Ensure the range of random numbers [min, 0xffff] be a multiple of
599          * upper_bound and contain at least a half of the 16 bit range.
600          */
601
602         if (upper_bound > 0x8000)
603                 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
604         else
605                 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
606
607         /*
608          * This could theoretically loop forever but each retry has
609          * p > 0.5 (worst case, usually far better) of selecting a
610          * number inside the range we need, so it should rarely need
611          * to re-roll.
612          */
613         for (;;) {
614                 r = dispatch_random(actx);
615                 if (r >= min)
616                         break;
617         }
618
619         return (r % upper_bound);
620 }
621
622 /*
623  * Return a hash of the destination and message id.
624  */
625 static isc_uint32_t
626 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
627          in_port_t port)
628 {
629         unsigned int ret;
630
631         ret = isc_sockaddr_hash(dest, ISC_TRUE);
632         ret ^= (id << 16) | port;
633         ret %= qid->qid_nbuckets;
634
635         INSIST(ret < qid->qid_nbuckets);
636
637         return (ret);
638 }
639
640 /*
641  * Find the first entry in 'qid'.  Returns NULL if there are no entries.
642  */
643 static dns_dispentry_t *
644 linear_first(dns_qid_t *qid) {
645         dns_dispentry_t *ret;
646         unsigned int bucket;
647
648         bucket = 0;
649
650         while (bucket < qid->qid_nbuckets) {
651                 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
652                 if (ret != NULL)
653                         return (ret);
654                 bucket++;
655         }
656
657         return (NULL);
658 }
659
660 /*
661  * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
662  * no more entries.
663  */
664 static dns_dispentry_t *
665 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
666         dns_dispentry_t *ret;
667         unsigned int bucket;
668
669         ret = ISC_LIST_NEXT(resp, link);
670         if (ret != NULL)
671                 return (ret);
672
673         bucket = resp->bucket;
674         bucket++;
675         while (bucket < qid->qid_nbuckets) {
676                 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
677                 if (ret != NULL)
678                         return (ret);
679                 bucket++;
680         }
681
682         return (NULL);
683 }
684
685 /*
686  * The dispatch must be locked.
687  */
688 static isc_boolean_t
689 destroy_disp_ok(dns_dispatch_t *disp)
690 {
691         if (disp->refcount != 0)
692                 return (ISC_FALSE);
693
694         if (disp->recv_pending != 0)
695                 return (ISC_FALSE);
696
697         if (!ISC_LIST_EMPTY(disp->activesockets))
698                 return (ISC_FALSE);
699
700         if (disp->shutting_down == 0)
701                 return (ISC_FALSE);
702
703         return (ISC_TRUE);
704 }
705
706 /*
707  * Called when refcount reaches 0 (and safe to destroy).
708  *
709  * The dispatcher must be locked.
710  * The manager must not be locked.
711  */
712 static void
713 destroy_disp(isc_task_t *task, isc_event_t *event) {
714         dns_dispatch_t *disp;
715         dns_dispatchmgr_t *mgr;
716         isc_boolean_t killmgr;
717         dispsocket_t *dispsocket;
718         int i;
719
720         INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
721
722         UNUSED(task);
723
724         disp = event->ev_arg;
725         mgr = disp->mgr;
726
727         LOCK(&mgr->lock);
728         ISC_LIST_UNLINK(mgr->list, disp, link);
729
730         dispatch_log(disp, LVL(90),
731                      "shutting down; detaching from sock %p, task %p",
732                      disp->socket, disp->task[0]); /* XXXX */
733
734         if (disp->sepool != NULL) {
735                 isc_mempool_destroy(&disp->sepool);
736                 (void)isc_mutex_destroy(&disp->sepool_lock);
737         }
738
739         if (disp->socket != NULL)
740                 isc_socket_detach(&disp->socket);
741         while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
742                 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
743                 destroy_dispsocket(disp, &dispsocket);
744         }
745         for (i = 0; i < disp->ntasks; i++)
746                 isc_task_detach(&disp->task[i]);
747         isc_event_free(&event);
748
749         dispatch_free(&disp);
750
751         killmgr = destroy_mgr_ok(mgr);
752         UNLOCK(&mgr->lock);
753         if (killmgr)
754                 destroy_mgr(&mgr);
755 }
756
757 /*%
758  * Manipulate port table per dispatch: find an entry for a given port number,
759  * create a new entry, and decrement a given entry with possible clean-up.
760  */
761 static dispportentry_t *
762 port_search(dns_dispatch_t *disp, in_port_t port) {
763         dispportentry_t *portentry;
764
765         REQUIRE(disp->port_table != NULL);
766
767         portentry = ISC_LIST_HEAD(disp->port_table[port %
768                                                    DNS_DISPATCH_PORTTABLESIZE]);
769         while (portentry != NULL) {
770                 if (portentry->port == port)
771                         return (portentry);
772                 portentry = ISC_LIST_NEXT(portentry, link);
773         }
774
775         return (NULL);
776 }
777
778 static dispportentry_t *
779 new_portentry(dns_dispatch_t *disp, in_port_t port) {
780         dispportentry_t *portentry;
781         dns_qid_t *qid;
782
783         REQUIRE(disp->port_table != NULL);
784
785         portentry = isc_mempool_get(disp->portpool);
786         if (portentry == NULL)
787                 return (portentry);
788
789         portentry->port = port;
790         portentry->refs = 1;
791         ISC_LINK_INIT(portentry, link);
792         qid = DNS_QID(disp);
793         LOCK(&qid->lock);
794         ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
795                         portentry, link);
796         UNLOCK(&qid->lock);
797
798         return (portentry);
799 }
800
801 /*%
802  * The caller must not hold the qid->lock.
803  */
804 static void
805 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
806         dispportentry_t *portentry = *portentryp;
807         dns_qid_t *qid;
808
809         REQUIRE(disp->port_table != NULL);
810         REQUIRE(portentry != NULL && portentry->refs > 0);
811
812         qid = DNS_QID(disp);
813         LOCK(&qid->lock);
814         portentry->refs--;
815
816         if (portentry->refs == 0) {
817                 ISC_LIST_UNLINK(disp->port_table[portentry->port %
818                                                  DNS_DISPATCH_PORTTABLESIZE],
819                                 portentry, link);
820                 isc_mempool_put(disp->portpool, portentry);
821         }
822
823         /*
824          * Set '*portentryp' to NULL inside the lock so that
825          * dispsock->portentry does not change in socket_search.
826          */
827         *portentryp = NULL;
828
829         UNLOCK(&qid->lock);
830 }
831
832 /*%
833  * Find a dispsocket for socket address 'dest', and port number 'port'.
834  * Return NULL if no such entry exists.  Requires qid->lock to be held.
835  */
836 static dispsocket_t *
837 socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
838               unsigned int bucket)
839 {
840         dispsocket_t *dispsock;
841
842         REQUIRE(VALID_QID(qid));
843         REQUIRE(bucket < qid->qid_nbuckets);
844
845         dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
846
847         while (dispsock != NULL) {
848                 if (dispsock->portentry != NULL &&
849                     dispsock->portentry->port == port &&
850                     isc_sockaddr_equal(dest, &dispsock->host))
851                         return (dispsock);
852                 dispsock = ISC_LIST_NEXT(dispsock, blink);
853         }
854
855         return (NULL);
856 }
857
858 /*%
859  * Make a new socket for a single dispatch with a random port number.
860  * The caller must hold the disp->lock
861  */
862 static isc_result_t
863 get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
864                isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp,
865                in_port_t *portp)
866 {
867         int i;
868         isc_uint32_t r;
869         dns_dispatchmgr_t *mgr = disp->mgr;
870         isc_socket_t *sock = NULL;
871         isc_result_t result = ISC_R_FAILURE;
872         in_port_t port;
873         isc_sockaddr_t localaddr;
874         unsigned int bucket = 0;
875         dispsocket_t *dispsock;
876         unsigned int nports;
877         in_port_t *ports;
878         unsigned int bindoptions;
879         dispportentry_t *portentry = NULL;
880         dns_qid_t *qid;
881
882         if (isc_sockaddr_pf(&disp->local) == AF_INET) {
883                 nports = disp->mgr->nv4ports;
884                 ports = disp->mgr->v4ports;
885         } else {
886                 nports = disp->mgr->nv6ports;
887                 ports = disp->mgr->v6ports;
888         }
889         if (nports == 0)
890                 return (ISC_R_ADDRNOTAVAIL);
891
892         dispsock = ISC_LIST_HEAD(disp->inactivesockets);
893         if (dispsock != NULL) {
894                 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
895                 sock = dispsock->socket;
896                 dispsock->socket = NULL;
897         } else {
898                 dispsock = isc_mempool_get(mgr->spool);
899                 if (dispsock == NULL)
900                         return (ISC_R_NOMEMORY);
901
902                 disp->nsockets++;
903                 dispsock->socket = NULL;
904                 dispsock->disp = disp;
905                 dispsock->resp = NULL;
906                 dispsock->portentry = NULL;
907                 isc_random_get(&r);
908                 dispsock->task = NULL;
909                 isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
910                 ISC_LINK_INIT(dispsock, link);
911                 ISC_LINK_INIT(dispsock, blink);
912                 dispsock->magic = DISPSOCK_MAGIC;
913         }
914
915         /*
916          * Pick up a random UDP port and open a new socket with it.  Avoid
917          * choosing ports that share the same destination because it will be
918          * very likely to fail in bind(2) or connect(2).
919          */
920         localaddr = disp->local;
921         qid = DNS_QID(disp);
922
923         for (i = 0; i < 64; i++) {
924                 port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
925                                                         nports)];
926                 isc_sockaddr_setport(&localaddr, port);
927
928                 LOCK(&qid->lock);
929                 bucket = dns_hash(qid, dest, 0, port);
930                 if (socket_search(qid, dest, port, bucket) != NULL) {
931                         UNLOCK(&qid->lock);
932                         continue;
933                 }
934                 UNLOCK(&qid->lock);
935                 bindoptions = 0;
936                 portentry = port_search(disp, port);
937
938                 if (portentry != NULL)
939                         bindoptions |= ISC_SOCKET_REUSEADDRESS;
940                 result = open_socket(sockmgr, &localaddr, bindoptions, &sock,
941                                      NULL);
942                 if (result == ISC_R_SUCCESS) {
943                         if (portentry == NULL) {
944                                 portentry = new_portentry(disp, port);
945                                 if (portentry == NULL) {
946                                         result = ISC_R_NOMEMORY;
947                                         break;
948                                 }
949                         } else {
950                                 LOCK(&qid->lock);
951                                 portentry->refs++;
952                                 UNLOCK(&qid->lock);
953                         }
954                         break;
955                 } else if (result == ISC_R_NOPERM) {
956                         char buf[ISC_SOCKADDR_FORMATSIZE];
957                         isc_sockaddr_format(&localaddr, buf, sizeof(buf));
958                         dispatch_log(disp, ISC_LOG_WARNING,
959                                      "open_socket(%s) -> %s: continuing",
960                                      buf, isc_result_totext(result));
961                 } else if (result != ISC_R_ADDRINUSE)
962                         break;
963         }
964
965         if (result == ISC_R_SUCCESS) {
966                 dispsock->socket = sock;
967                 dispsock->host = *dest;
968                 dispsock->portentry = portentry;
969                 dispsock->bucket = bucket;
970                 LOCK(&qid->lock);
971                 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
972                 UNLOCK(&qid->lock);
973                 *dispsockp = dispsock;
974                 *portp = port;
975         } else {
976                 /*
977                  * We could keep it in the inactive list, but since this should
978                  * be an exceptional case and might be resource shortage, we'd
979                  * rather destroy it.
980                  */
981                 if (sock != NULL)
982                         isc_socket_detach(&sock);
983                 destroy_dispsocket(disp, &dispsock);
984         }
985
986         return (result);
987 }
988
989 /*%
990  * Destroy a dedicated dispatch socket.
991  */
992 static void
993 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
994         dispsocket_t *dispsock;
995         dns_qid_t *qid;
996
997         /*
998          * The dispatch must be locked.
999          */
1000
1001         REQUIRE(dispsockp != NULL && *dispsockp != NULL);
1002         dispsock = *dispsockp;
1003         REQUIRE(!ISC_LINK_LINKED(dispsock, link));
1004
1005         disp->nsockets--;
1006         dispsock->magic = 0;
1007         if (dispsock->portentry != NULL)
1008                 deref_portentry(disp, &dispsock->portentry);
1009         if (dispsock->socket != NULL)
1010                 isc_socket_detach(&dispsock->socket);
1011         if (ISC_LINK_LINKED(dispsock, blink)) {
1012                 qid = DNS_QID(disp);
1013                 LOCK(&qid->lock);
1014                 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1015                                 blink);
1016                 UNLOCK(&qid->lock);
1017         }
1018         if (dispsock->task != NULL)
1019                 isc_task_detach(&dispsock->task);
1020         isc_mempool_put(disp->mgr->spool, dispsock);
1021
1022         *dispsockp = NULL;
1023 }
1024
1025 /*%
1026  * Deactivate a dedicated dispatch socket.  Move it to the inactive list for
1027  * future reuse unless the total number of sockets are exceeding the maximum.
1028  */
1029 static void
1030 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1031         isc_result_t result;
1032         dns_qid_t *qid;
1033
1034         /*
1035          * The dispatch must be locked.
1036          */
1037         ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
1038         if (dispsock->resp != NULL) {
1039                 INSIST(dispsock->resp->dispsocket == dispsock);
1040                 dispsock->resp->dispsocket = NULL;
1041         }
1042
1043         INSIST(dispsock->portentry != NULL);
1044         deref_portentry(disp, &dispsock->portentry);
1045
1046 #ifdef BIND9
1047         if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
1048                 destroy_dispsocket(disp, &dispsock);
1049         else {
1050                 result = isc_socket_close(dispsock->socket);
1051
1052                 qid = DNS_QID(disp);
1053                 LOCK(&qid->lock);
1054                 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1055                                 blink);
1056                 UNLOCK(&qid->lock);
1057
1058                 if (result == ISC_R_SUCCESS)
1059                         ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
1060                 else {
1061                         /*
1062                          * If the underlying system does not allow this
1063                          * optimization, destroy this temporary structure (and
1064                          * create a new one for a new transaction).
1065                          */
1066                         INSIST(result == ISC_R_NOTIMPLEMENTED);
1067                         destroy_dispsocket(disp, &dispsock);
1068                 }
1069         }
1070 #else
1071         /* This kind of optimization isn't necessary for normal use */
1072         UNUSED(qid);
1073         UNUSED(result);
1074
1075         destroy_dispsocket(disp, &dispsock);
1076 #endif
1077 }
1078
1079 /*
1080  * Find an entry for query ID 'id', socket address 'dest', and port number
1081  * 'port'.
1082  * Return NULL if no such entry exists.
1083  */
1084 static dns_dispentry_t *
1085 entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
1086              in_port_t port, unsigned int bucket)
1087 {
1088         dns_dispentry_t *res;
1089
1090         REQUIRE(VALID_QID(qid));
1091         REQUIRE(bucket < qid->qid_nbuckets);
1092
1093         res = ISC_LIST_HEAD(qid->qid_table[bucket]);
1094
1095         while (res != NULL) {
1096                 if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1097                     res->port == port) {
1098                         return (res);
1099                 }
1100                 res = ISC_LIST_NEXT(res, link);
1101         }
1102
1103         return (NULL);
1104 }
1105
1106 static void
1107 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1108         isc_mempool_t *bpool;
1109         INSIST(buf != NULL && len != 0);
1110
1111
1112         switch (disp->socktype) {
1113         case isc_sockettype_tcp:
1114                 INSIST(disp->tcpbuffers > 0);
1115                 disp->tcpbuffers--;
1116                 isc_mem_put(disp->mgr->mctx, buf, len);
1117                 break;
1118         case isc_sockettype_udp:
1119                 LOCK(&disp->mgr->buffer_lock);
1120                 INSIST(disp->mgr->buffers > 0);
1121                 INSIST(len == disp->mgr->buffersize);
1122                 disp->mgr->buffers--;
1123                 bpool = disp->mgr->bpool;
1124                 UNLOCK(&disp->mgr->buffer_lock);
1125                 isc_mempool_put(bpool, buf);
1126                 break;
1127         default:
1128                 INSIST(0);
1129                 break;
1130         }
1131 }
1132
1133 static void *
1134 allocate_udp_buffer(dns_dispatch_t *disp) {
1135         isc_mempool_t *bpool;
1136         void *temp;
1137
1138         LOCK(&disp->mgr->buffer_lock);
1139         bpool = disp->mgr->bpool;
1140         disp->mgr->buffers++;
1141         UNLOCK(&disp->mgr->buffer_lock);
1142
1143         temp = isc_mempool_get(bpool);
1144
1145         if (temp == NULL) {
1146                 LOCK(&disp->mgr->buffer_lock);
1147                 disp->mgr->buffers--;
1148                 UNLOCK(&disp->mgr->buffer_lock);
1149         }
1150
1151         return (temp);
1152 }
1153
1154 static inline void
1155 free_sevent(isc_event_t *ev) {
1156         isc_mempool_t *pool = ev->ev_destroy_arg;
1157         isc_socketevent_t *sev = (isc_socketevent_t *) ev;
1158         isc_mempool_put(pool, sev);
1159 }
1160
1161 static inline isc_socketevent_t *
1162 allocate_sevent(dns_dispatch_t *disp, isc_socket_t *socket,
1163                 isc_eventtype_t type, isc_taskaction_t action, const void *arg)
1164 {
1165         isc_socketevent_t *ev;
1166         void *deconst_arg;
1167
1168         ev = isc_mempool_get(disp->sepool);
1169         if (ev == NULL)
1170                 return (NULL);
1171         DE_CONST(arg, deconst_arg);
1172         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type,
1173                        action, deconst_arg, socket,
1174                        free_sevent, disp->sepool);
1175         ev->result = ISC_R_UNSET;
1176         ISC_LINK_INIT(ev, ev_link);
1177         ISC_LIST_INIT(ev->bufferlist);
1178         ev->region.base = NULL;
1179         ev->n = 0;
1180         ev->offset = 0;
1181         ev->attributes = 0;
1182
1183         return (ev);
1184 }
1185
1186
1187 static inline void
1188 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1189         if (disp->failsafe_ev == ev) {
1190                 INSIST(disp->shutdown_out == 1);
1191                 disp->shutdown_out = 0;
1192
1193                 return;
1194         }
1195
1196         isc_mempool_put(disp->mgr->depool, ev);
1197 }
1198
1199 static inline dns_dispatchevent_t *
1200 allocate_devent(dns_dispatch_t *disp) {
1201         dns_dispatchevent_t *ev;
1202
1203         ev = isc_mempool_get(disp->mgr->depool);
1204         if (ev == NULL)
1205                 return (NULL);
1206         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1207                        NULL, NULL, NULL, NULL, NULL);
1208
1209         return (ev);
1210 }
1211
1212 static void
1213 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1214         dispsocket_t *dispsock = ev->ev_arg;
1215
1216         UNUSED(task);
1217
1218         REQUIRE(VALID_DISPSOCK(dispsock));
1219         udp_recv(ev, dispsock->disp, dispsock);
1220 }
1221
1222 static void
1223 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1224         dns_dispatch_t *disp = ev->ev_arg;
1225
1226         UNUSED(task);
1227
1228         REQUIRE(VALID_DISPATCH(disp));
1229         udp_recv(ev, disp, NULL);
1230 }
1231
1232 /*
1233  * General flow:
1234  *
1235  * If I/O result == CANCELED or error, free the buffer.
1236  *
1237  * If query, free the buffer, restart.
1238  *
1239  * If response:
1240  *      Allocate event, fill in details.
1241  *              If cannot allocate, free buffer, restart.
1242  *      find target.  If not found, free buffer, restart.
1243  *      if event queue is not empty, queue.  else, send.
1244  *      restart.
1245  */
1246 static void
1247 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1248         isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1249         dns_messageid_t id;
1250         isc_result_t dres;
1251         isc_buffer_t source;
1252         unsigned int flags;
1253         dns_dispentry_t *resp = NULL;
1254         dns_dispatchevent_t *rev;
1255         unsigned int bucket;
1256         isc_boolean_t killit;
1257         isc_boolean_t queue_response;
1258         dns_dispatchmgr_t *mgr;
1259         dns_qid_t *qid;
1260         isc_netaddr_t netaddr;
1261         int match;
1262         int result;
1263         isc_boolean_t qidlocked = ISC_FALSE;
1264
1265         LOCK(&disp->lock);
1266
1267         mgr = disp->mgr;
1268         qid = mgr->qid;
1269
1270         dispatch_log(disp, LVL(90),
1271                      "got packet: requests %d, buffers %d, recvs %d",
1272                      disp->requests, disp->mgr->buffers, disp->recv_pending);
1273
1274         if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1275                 /*
1276                  * Unless the receive event was imported from a listening
1277                  * interface, in which case the event type is
1278                  * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1279                  */
1280                 INSIST(disp->recv_pending != 0);
1281                 disp->recv_pending = 0;
1282         }
1283
1284         if (dispsock != NULL &&
1285             (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1286                 /*
1287                  * dispsock->resp can be NULL if this transaction was canceled
1288                  * just after receiving a response.  Since this socket is
1289                  * exclusively used and there should be at most one receive
1290                  * event the canceled event should have been no effect.  So
1291                  * we can (and should) deactivate the socket right now.
1292                  */
1293                 deactivate_dispsocket(disp, dispsock);
1294                 dispsock = NULL;
1295         }
1296
1297         if (disp->shutting_down) {
1298                 /*
1299                  * This dispatcher is shutting down.
1300                  */
1301                 free_buffer(disp, ev->region.base, ev->region.length);
1302
1303                 isc_event_free(&ev_in);
1304                 ev = NULL;
1305
1306                 killit = destroy_disp_ok(disp);
1307                 UNLOCK(&disp->lock);
1308                 if (killit)
1309                         isc_task_send(disp->task[0], &disp->ctlevent);
1310
1311                 return;
1312         }
1313
1314         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1315                 if (dispsock != NULL) {
1316                         resp = dispsock->resp;
1317                         id = resp->id;
1318                         if (ev->result != ISC_R_SUCCESS) {
1319                                 /*
1320                                  * This is most likely a network error on a
1321                                  * connected socket.  It makes no sense to
1322                                  * check the address or parse the packet, but it
1323                                  * will help to return the error to the caller.
1324                                  */
1325                                 goto sendresponse;
1326                         }
1327                 } else {
1328                         free_buffer(disp, ev->region.base, ev->region.length);
1329
1330                         isc_event_free(&ev_in);
1331                         UNLOCK(&disp->lock);
1332                         return;
1333                 }
1334         } else if (ev->result != ISC_R_SUCCESS) {
1335                 free_buffer(disp, ev->region.base, ev->region.length);
1336
1337                 if (ev->result != ISC_R_CANCELED)
1338                         dispatch_log(disp, ISC_LOG_ERROR,
1339                                      "odd socket result in udp_recv(): %s",
1340                                      isc_result_totext(ev->result));
1341
1342                 isc_event_free(&ev_in);
1343                 UNLOCK(&disp->lock);
1344                 return;
1345         }
1346
1347         /*
1348          * If this is from a blackholed address, drop it.
1349          */
1350         isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1351         if (disp->mgr->blackhole != NULL &&
1352             dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1353                           NULL, &match, NULL) == ISC_R_SUCCESS &&
1354             match > 0)
1355         {
1356                 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1357                         char netaddrstr[ISC_NETADDR_FORMATSIZE];
1358                         isc_netaddr_format(&netaddr, netaddrstr,
1359                                            sizeof(netaddrstr));
1360                         dispatch_log(disp, LVL(10),
1361                                      "blackholed packet from %s",
1362                                      netaddrstr);
1363                 }
1364                 free_buffer(disp, ev->region.base, ev->region.length);
1365                 goto restart;
1366         }
1367
1368         /*
1369          * Peek into the buffer to see what we can see.
1370          */
1371         isc_buffer_init(&source, ev->region.base, ev->region.length);
1372         isc_buffer_add(&source, ev->n);
1373         dres = dns_message_peekheader(&source, &id, &flags);
1374         if (dres != ISC_R_SUCCESS) {
1375                 free_buffer(disp, ev->region.base, ev->region.length);
1376                 dispatch_log(disp, LVL(10), "got garbage packet");
1377                 goto restart;
1378         }
1379
1380         dispatch_log(disp, LVL(92),
1381                      "got valid DNS message header, /QR %c, id %u",
1382                      ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1383
1384         /*
1385          * Look at flags.  If query, drop it. If response,
1386          * look to see where it goes.
1387          */
1388         if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1389                 /* query */
1390                 free_buffer(disp, ev->region.base, ev->region.length);
1391                 goto restart;
1392         }
1393
1394         /*
1395          * Search for the corresponding response.  If we are using an exclusive
1396          * socket, we've already identified it and we can skip the search; but
1397          * the ID and the address must match the expected ones.
1398          */
1399         if (resp == NULL) {
1400                 bucket = dns_hash(qid, &ev->address, id, disp->localport);
1401                 LOCK(&qid->lock);
1402                 qidlocked = ISC_TRUE;
1403                 resp = entry_search(qid, &ev->address, id, disp->localport,
1404                                     bucket);
1405                 dispatch_log(disp, LVL(90),
1406                              "search for response in bucket %d: %s",
1407                              bucket, (resp == NULL ? "not found" : "found"));
1408
1409                 if (resp == NULL) {
1410                         inc_stats(mgr, dns_resstatscounter_mismatch);
1411                         free_buffer(disp, ev->region.base, ev->region.length);
1412                         goto unlock;
1413                 }
1414         } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1415                                                          &resp->host)) {
1416                 dispatch_log(disp, LVL(90),
1417                              "response to an exclusive socket doesn't match");
1418                 inc_stats(mgr, dns_resstatscounter_mismatch);
1419                 free_buffer(disp, ev->region.base, ev->region.length);
1420                 goto unlock;
1421         }
1422
1423         /*
1424          * Now that we have the original dispatch the query was sent
1425          * from check that the address and port the response was
1426          * sent to make sense.
1427          */
1428         if (disp != resp->disp) {
1429                 isc_sockaddr_t a1;
1430                 isc_sockaddr_t a2;
1431
1432                 /*
1433                  * Check that the socket types and ports match.
1434                  */
1435                 if (disp->socktype != resp->disp->socktype ||
1436                     isc_sockaddr_getport(&disp->local) !=
1437                     isc_sockaddr_getport(&resp->disp->local)) {
1438                         free_buffer(disp, ev->region.base, ev->region.length);
1439                         goto unlock;
1440                 }
1441
1442                 /*
1443                  * If each dispatch is bound to a different address
1444                  * then fail.
1445                  *
1446                  * Note under Linux a packet can be sent out via IPv4 socket
1447                  * and the response be received via a IPv6 socket.
1448                  *
1449                  * Requests sent out via IPv6 should always come back in
1450                  * via IPv6.
1451                  */
1452                 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1453                     isc_sockaddr_pf(&disp->local) != PF_INET6) {
1454                         free_buffer(disp, ev->region.base, ev->region.length);
1455                         goto unlock;
1456                 }
1457                 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1458                 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1459                 if (!isc_sockaddr_eqaddr(&disp->local, &resp->disp->local) &&
1460                     !isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1461                     !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1462                         free_buffer(disp, ev->region.base, ev->region.length);
1463                         goto unlock;
1464                 }
1465         }
1466
1467   sendresponse:
1468         queue_response = resp->item_out;
1469         rev = allocate_devent(resp->disp);
1470         if (rev == NULL) {
1471                 free_buffer(disp, ev->region.base, ev->region.length);
1472                 goto unlock;
1473         }
1474
1475         /*
1476          * At this point, rev contains the event we want to fill in, and
1477          * resp contains the information on the place to send it to.
1478          * Send the event off.
1479          */
1480         isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1481         isc_buffer_add(&rev->buffer, ev->n);
1482         rev->result = ev->result;
1483         rev->id = id;
1484         rev->addr = ev->address;
1485         rev->pktinfo = ev->pktinfo;
1486         rev->attributes = ev->attributes;
1487         if (queue_response) {
1488                 ISC_LIST_APPEND(resp->items, rev, ev_link);
1489         } else {
1490                 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1491                                DNS_EVENT_DISPATCH,
1492                                resp->action, resp->arg, resp, NULL, NULL);
1493                 request_log(disp, resp, LVL(90),
1494                             "[a] Sent event %p buffer %p len %d to task %p",
1495                             rev, rev->buffer.base, rev->buffer.length,
1496                             resp->task);
1497                 resp->item_out = ISC_TRUE;
1498                 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1499         }
1500  unlock:
1501         if (qidlocked)
1502                 UNLOCK(&qid->lock);
1503
1504         /*
1505          * Restart recv() to get the next packet.
1506          */
1507  restart:
1508         result = startrecv(disp, dispsock);
1509         if (result != ISC_R_SUCCESS && dispsock != NULL) {
1510                 /*
1511                  * XXX: wired. There seems to be no recovery process other than
1512                  * deactivate this socket anyway (since we cannot start
1513                  * receiving, we won't be able to receive a cancel event
1514                  * from the user).
1515                  */
1516                 deactivate_dispsocket(disp, dispsock);
1517         }
1518         isc_event_free(&ev_in);
1519         UNLOCK(&disp->lock);
1520 }
1521
1522 /*
1523  * General flow:
1524  *
1525  * If I/O result == CANCELED, EOF, or error, notify everyone as the
1526  * various queues drain.
1527  *
1528  * If query, restart.
1529  *
1530  * If response:
1531  *      Allocate event, fill in details.
1532  *              If cannot allocate, restart.
1533  *      find target.  If not found, restart.
1534  *      if event queue is not empty, queue.  else, send.
1535  *      restart.
1536  */
1537 static void
1538 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1539         dns_dispatch_t *disp = ev_in->ev_arg;
1540         dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1541         dns_messageid_t id;
1542         isc_result_t dres;
1543         unsigned int flags;
1544         dns_dispentry_t *resp;
1545         dns_dispatchevent_t *rev;
1546         unsigned int bucket;
1547         isc_boolean_t killit;
1548         isc_boolean_t queue_response;
1549         dns_qid_t *qid;
1550         int level;
1551         char buf[ISC_SOCKADDR_FORMATSIZE];
1552
1553         UNUSED(task);
1554
1555         REQUIRE(VALID_DISPATCH(disp));
1556
1557         qid = disp->qid;
1558
1559         dispatch_log(disp, LVL(90),
1560                      "got TCP packet: requests %d, buffers %d, recvs %d",
1561                      disp->requests, disp->tcpbuffers, disp->recv_pending);
1562
1563         LOCK(&disp->lock);
1564
1565         INSIST(disp->recv_pending != 0);
1566         disp->recv_pending = 0;
1567
1568         if (disp->refcount == 0) {
1569                 /*
1570                  * This dispatcher is shutting down.  Force cancelation.
1571                  */
1572                 tcpmsg->result = ISC_R_CANCELED;
1573         }
1574
1575         if (tcpmsg->result != ISC_R_SUCCESS) {
1576                 switch (tcpmsg->result) {
1577                 case ISC_R_CANCELED:
1578                         break;
1579
1580                 case ISC_R_EOF:
1581                         dispatch_log(disp, LVL(90), "shutting down on EOF");
1582                         do_cancel(disp);
1583                         break;
1584
1585                 case ISC_R_CONNECTIONRESET:
1586                         level = ISC_LOG_INFO;
1587                         goto logit;
1588
1589                 default:
1590                         level = ISC_LOG_ERROR;
1591                 logit:
1592                         isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1593                         dispatch_log(disp, level, "shutting down due to TCP "
1594                                      "receive error: %s: %s", buf,
1595                                      isc_result_totext(tcpmsg->result));
1596                         do_cancel(disp);
1597                         break;
1598                 }
1599
1600                 /*
1601                  * The event is statically allocated in the tcpmsg
1602                  * structure, and destroy_disp() frees the tcpmsg, so we must
1603                  * free the event *before* calling destroy_disp().
1604                  */
1605                 isc_event_free(&ev_in);
1606
1607                 disp->shutting_down = 1;
1608                 disp->shutdown_why = tcpmsg->result;
1609
1610                 /*
1611                  * If the recv() was canceled pass the word on.
1612                  */
1613                 killit = destroy_disp_ok(disp);
1614                 UNLOCK(&disp->lock);
1615                 if (killit)
1616                         isc_task_send(disp->task[0], &disp->ctlevent);
1617                 return;
1618         }
1619
1620         dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1621                      tcpmsg->result,
1622                      tcpmsg->buffer.length, tcpmsg->buffer.base);
1623
1624         /*
1625          * Peek into the buffer to see what we can see.
1626          */
1627         dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1628         if (dres != ISC_R_SUCCESS) {
1629                 dispatch_log(disp, LVL(10), "got garbage packet");
1630                 goto restart;
1631         }
1632
1633         dispatch_log(disp, LVL(92),
1634                      "got valid DNS message header, /QR %c, id %u",
1635                      ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1636
1637         /*
1638          * Allocate an event to send to the query or response client, and
1639          * allocate a new buffer for our use.
1640          */
1641
1642         /*
1643          * Look at flags.  If query, drop it. If response,
1644          * look to see where it goes.
1645          */
1646         if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1647                 /*
1648                  * Query.
1649                  */
1650                 goto restart;
1651         }
1652
1653         /*
1654          * Response.
1655          */
1656         bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1657         LOCK(&qid->lock);
1658         resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1659         dispatch_log(disp, LVL(90),
1660                      "search for response in bucket %d: %s",
1661                      bucket, (resp == NULL ? "not found" : "found"));
1662
1663         if (resp == NULL)
1664                 goto unlock;
1665         queue_response = resp->item_out;
1666         rev = allocate_devent(disp);
1667         if (rev == NULL)
1668                 goto unlock;
1669
1670         /*
1671          * At this point, rev contains the event we want to fill in, and
1672          * resp contains the information on the place to send it to.
1673          * Send the event off.
1674          */
1675         dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1676         disp->tcpbuffers++;
1677         rev->result = ISC_R_SUCCESS;
1678         rev->id = id;
1679         rev->addr = tcpmsg->address;
1680         if (queue_response) {
1681                 ISC_LIST_APPEND(resp->items, rev, ev_link);
1682         } else {
1683                 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1684                                resp->action, resp->arg, resp, NULL, NULL);
1685                 request_log(disp, resp, LVL(90),
1686                             "[b] Sent event %p buffer %p len %d to task %p",
1687                             rev, rev->buffer.base, rev->buffer.length,
1688                             resp->task);
1689                 resp->item_out = ISC_TRUE;
1690                 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1691         }
1692  unlock:
1693         UNLOCK(&qid->lock);
1694
1695         /*
1696          * Restart recv() to get the next packet.
1697          */
1698  restart:
1699         (void)startrecv(disp, NULL);
1700
1701         isc_event_free(&ev_in);
1702         UNLOCK(&disp->lock);
1703 }
1704
1705 /*
1706  * disp must be locked.
1707  */
1708 static isc_result_t
1709 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1710         isc_result_t res;
1711         isc_region_t region;
1712         isc_socket_t *socket;
1713
1714         if (disp->shutting_down == 1)
1715                 return (ISC_R_SUCCESS);
1716
1717         if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1718                 return (ISC_R_SUCCESS);
1719
1720         if (disp->recv_pending != 0 && dispsock == NULL)
1721                 return (ISC_R_SUCCESS);
1722
1723         if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1724                 return (ISC_R_NOMEMORY);
1725
1726         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1727             dispsock == NULL)
1728                 return (ISC_R_SUCCESS);
1729
1730         if (dispsock != NULL)
1731                 socket = dispsock->socket;
1732         else
1733                 socket = disp->socket;
1734         INSIST(socket != NULL);
1735
1736         switch (disp->socktype) {
1737                 /*
1738                  * UDP reads are always maximal.
1739                  */
1740         case isc_sockettype_udp:
1741                 region.length = disp->mgr->buffersize;
1742                 region.base = allocate_udp_buffer(disp);
1743                 if (region.base == NULL)
1744                         return (ISC_R_NOMEMORY);
1745                 if (dispsock != NULL) {
1746                         isc_task_t *dt = dispsock->task;
1747                         isc_socketevent_t *sev =
1748                                 allocate_sevent(disp, socket,
1749                                                 ISC_SOCKEVENT_RECVDONE,
1750                                                 udp_exrecv, dispsock);
1751                         if (sev == NULL) {
1752                                 free_buffer(disp, region.base, region.length);
1753                                 return (ISC_R_NOMEMORY);
1754                         }
1755
1756                         res = isc_socket_recv2(socket, &region, 1, dt, sev, 0);
1757                         if (res != ISC_R_SUCCESS) {
1758                                 free_buffer(disp, region.base, region.length);
1759                                 return (res);
1760                         }
1761                 } else {
1762                         isc_task_t *dt = disp->task[0];
1763                         isc_socketevent_t *sev =
1764                                 allocate_sevent(disp, socket,
1765                                                 ISC_SOCKEVENT_RECVDONE,
1766                                                 udp_shrecv, disp);
1767                         if (sev == NULL) {
1768                                 free_buffer(disp, region.base, region.length);
1769                                 return (ISC_R_NOMEMORY);
1770                         }
1771
1772                         res = isc_socket_recv2(socket, &region, 1, dt, sev, 0);
1773                         if (res != ISC_R_SUCCESS) {
1774                                 free_buffer(disp, region.base, region.length);
1775                                 disp->shutdown_why = res;
1776                                 disp->shutting_down = 1;
1777                                 do_cancel(disp);
1778                                 return (ISC_R_SUCCESS); /* recover by cancel */
1779                         }
1780                         INSIST(disp->recv_pending == 0);
1781                         disp->recv_pending = 1;
1782                 }
1783                 break;
1784
1785         case isc_sockettype_tcp:
1786                 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1787                                              tcp_recv, disp);
1788                 if (res != ISC_R_SUCCESS) {
1789                         disp->shutdown_why = res;
1790                         disp->shutting_down = 1;
1791                         do_cancel(disp);
1792                         return (ISC_R_SUCCESS); /* recover by cancel */
1793                 }
1794                 INSIST(disp->recv_pending == 0);
1795                 disp->recv_pending = 1;
1796                 break;
1797         default:
1798                 INSIST(0);
1799                 break;
1800         }
1801
1802         return (ISC_R_SUCCESS);
1803 }
1804
1805 /*
1806  * Mgr must be locked when calling this function.
1807  */
1808 static isc_boolean_t
1809 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1810         mgr_log(mgr, LVL(90),
1811                 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1812                 "depool=%d, rpool=%d, dpool=%d",
1813                 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1814                 isc_mempool_getallocated(mgr->depool),
1815                 isc_mempool_getallocated(mgr->rpool),
1816                 isc_mempool_getallocated(mgr->dpool));
1817         if (!MGR_IS_SHUTTINGDOWN(mgr))
1818                 return (ISC_FALSE);
1819         if (!ISC_LIST_EMPTY(mgr->list))
1820                 return (ISC_FALSE);
1821         if (isc_mempool_getallocated(mgr->depool) != 0)
1822                 return (ISC_FALSE);
1823         if (isc_mempool_getallocated(mgr->rpool) != 0)
1824                 return (ISC_FALSE);
1825         if (isc_mempool_getallocated(mgr->dpool) != 0)
1826                 return (ISC_FALSE);
1827
1828         return (ISC_TRUE);
1829 }
1830
1831 /*
1832  * Mgr must be unlocked when calling this function.
1833  */
1834 static void
1835 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1836         isc_mem_t *mctx;
1837         dns_dispatchmgr_t *mgr;
1838
1839         mgr = *mgrp;
1840         *mgrp = NULL;
1841
1842         mctx = mgr->mctx;
1843
1844         mgr->magic = 0;
1845         mgr->mctx = NULL;
1846         DESTROYLOCK(&mgr->lock);
1847         mgr->state = 0;
1848
1849         DESTROYLOCK(&mgr->arc4_lock);
1850
1851         isc_mempool_destroy(&mgr->depool);
1852         isc_mempool_destroy(&mgr->rpool);
1853         isc_mempool_destroy(&mgr->dpool);
1854         if (mgr->bpool != NULL)
1855                 isc_mempool_destroy(&mgr->bpool);
1856         if (mgr->spool != NULL)
1857                 isc_mempool_destroy(&mgr->spool);
1858
1859         DESTROYLOCK(&mgr->spool_lock);
1860         DESTROYLOCK(&mgr->bpool_lock);
1861         DESTROYLOCK(&mgr->dpool_lock);
1862         DESTROYLOCK(&mgr->rpool_lock);
1863         DESTROYLOCK(&mgr->depool_lock);
1864
1865 #ifdef BIND9
1866         if (mgr->entropy != NULL)
1867                 isc_entropy_detach(&mgr->entropy);
1868 #endif /* BIND9 */
1869         if (mgr->qid != NULL)
1870                 qid_destroy(mctx, &mgr->qid);
1871
1872         DESTROYLOCK(&mgr->buffer_lock);
1873
1874         if (mgr->blackhole != NULL)
1875                 dns_acl_detach(&mgr->blackhole);
1876
1877         if (mgr->stats != NULL)
1878                 isc_stats_detach(&mgr->stats);
1879
1880         if (mgr->v4ports != NULL) {
1881                 isc_mem_put(mctx, mgr->v4ports,
1882                             mgr->nv4ports * sizeof(in_port_t));
1883         }
1884         if (mgr->v6ports != NULL) {
1885                 isc_mem_put(mctx, mgr->v6ports,
1886                             mgr->nv6ports * sizeof(in_port_t));
1887         }
1888         isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1889         isc_mem_detach(&mctx);
1890 }
1891
1892 static isc_result_t
1893 open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1894             unsigned int options, isc_socket_t **sockp,
1895             isc_socket_t *dup_socket)
1896 {
1897         isc_socket_t *sock;
1898         isc_result_t result;
1899
1900         sock = *sockp;
1901         if (sock != NULL) {
1902 #ifdef BIND9
1903                 result = isc_socket_open(sock);
1904                 if (result != ISC_R_SUCCESS)
1905                         return (result);
1906 #else
1907                 INSIST(0);
1908 #endif
1909         } else if (dup_socket != NULL) {
1910                 result = isc_socket_dup(dup_socket, &sock);
1911                 if (result != ISC_R_SUCCESS)
1912                         return (result);
1913
1914                 isc_socket_setname(sock, "dispatcher", NULL);
1915                 *sockp = sock;
1916                 return (ISC_R_SUCCESS);
1917         } else {
1918                 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1919                                         isc_sockettype_udp, &sock);
1920                 if (result != ISC_R_SUCCESS)
1921                         return (result);
1922         }
1923
1924         isc_socket_setname(sock, "dispatcher", NULL);
1925
1926 #ifndef ISC_ALLOW_MAPPED
1927         isc_socket_ipv6only(sock, ISC_TRUE);
1928 #endif
1929         result = isc_socket_bind(sock, local, options);
1930         if (result != ISC_R_SUCCESS) {
1931                 if (*sockp == NULL)
1932                         isc_socket_detach(&sock);
1933                 else {
1934 #ifdef BIND9
1935                         isc_socket_close(sock);
1936 #else
1937                         INSIST(0);
1938 #endif
1939                 }
1940                 return (result);
1941         }
1942
1943         *sockp = sock;
1944         return (ISC_R_SUCCESS);
1945 }
1946
1947 /*%
1948  * Create a temporary port list to set the initial default set of dispatch
1949  * ports: [1024, 65535].  This is almost meaningless as the application will
1950  * normally set the ports explicitly, but is provided to fill some minor corner
1951  * cases.
1952  */
1953 static isc_result_t
1954 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1955         isc_result_t result;
1956
1957         result = isc_portset_create(mctx, portsetp);
1958         if (result != ISC_R_SUCCESS)
1959                 return (result);
1960         isc_portset_addrange(*portsetp, 1024, 65535);
1961
1962         return (ISC_R_SUCCESS);
1963 }
1964
1965 /*
1966  * Publics.
1967  */
1968
1969 isc_result_t
1970 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1971                        dns_dispatchmgr_t **mgrp)
1972 {
1973         dns_dispatchmgr_t *mgr;
1974         isc_result_t result;
1975         isc_portset_t *v4portset = NULL;
1976         isc_portset_t *v6portset = NULL;
1977
1978         REQUIRE(mctx != NULL);
1979         REQUIRE(mgrp != NULL && *mgrp == NULL);
1980
1981         mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1982         if (mgr == NULL)
1983                 return (ISC_R_NOMEMORY);
1984
1985         mgr->mctx = NULL;
1986         isc_mem_attach(mctx, &mgr->mctx);
1987
1988         mgr->blackhole = NULL;
1989         mgr->stats = NULL;
1990
1991         result = isc_mutex_init(&mgr->lock);
1992         if (result != ISC_R_SUCCESS)
1993                 goto deallocate;
1994
1995         result = isc_mutex_init(&mgr->arc4_lock);
1996         if (result != ISC_R_SUCCESS)
1997                 goto kill_lock;
1998
1999         result = isc_mutex_init(&mgr->buffer_lock);
2000         if (result != ISC_R_SUCCESS)
2001                 goto kill_arc4_lock;
2002
2003         result = isc_mutex_init(&mgr->depool_lock);
2004         if (result != ISC_R_SUCCESS)
2005                 goto kill_buffer_lock;
2006
2007         result = isc_mutex_init(&mgr->rpool_lock);
2008         if (result != ISC_R_SUCCESS)
2009                 goto kill_depool_lock;
2010
2011         result = isc_mutex_init(&mgr->dpool_lock);
2012         if (result != ISC_R_SUCCESS)
2013                 goto kill_rpool_lock;
2014
2015         result = isc_mutex_init(&mgr->bpool_lock);
2016         if (result != ISC_R_SUCCESS)
2017                 goto kill_dpool_lock;
2018
2019         result = isc_mutex_init(&mgr->spool_lock);
2020         if (result != ISC_R_SUCCESS)
2021                 goto kill_bpool_lock;
2022
2023         mgr->depool = NULL;
2024         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
2025                                &mgr->depool) != ISC_R_SUCCESS) {
2026                 result = ISC_R_NOMEMORY;
2027                 goto kill_spool_lock;
2028         }
2029
2030         mgr->rpool = NULL;
2031         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
2032                                &mgr->rpool) != ISC_R_SUCCESS) {
2033                 result = ISC_R_NOMEMORY;
2034                 goto kill_depool;
2035         }
2036
2037         mgr->dpool = NULL;
2038         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
2039                                &mgr->dpool) != ISC_R_SUCCESS) {
2040                 result = ISC_R_NOMEMORY;
2041                 goto kill_rpool;
2042         }
2043
2044         isc_mempool_setname(mgr->depool, "dispmgr_depool");
2045         isc_mempool_setmaxalloc(mgr->depool, 32768);
2046         isc_mempool_setfreemax(mgr->depool, 32768);
2047         isc_mempool_associatelock(mgr->depool, &mgr->depool_lock);
2048         isc_mempool_setfillcount(mgr->depool, 256);
2049
2050         isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
2051         isc_mempool_setmaxalloc(mgr->rpool, 32768);
2052         isc_mempool_setfreemax(mgr->rpool, 32768);
2053         isc_mempool_associatelock(mgr->rpool, &mgr->rpool_lock);
2054         isc_mempool_setfillcount(mgr->rpool, 256);
2055
2056         isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
2057         isc_mempool_setmaxalloc(mgr->dpool, 32768);
2058         isc_mempool_setfreemax(mgr->dpool, 32768);
2059         isc_mempool_associatelock(mgr->dpool, &mgr->dpool_lock);
2060         isc_mempool_setfillcount(mgr->dpool, 256);
2061
2062         mgr->buffers = 0;
2063         mgr->buffersize = 0;
2064         mgr->maxbuffers = 0;
2065         mgr->bpool = NULL;
2066         mgr->spool = NULL;
2067         mgr->entropy = NULL;
2068         mgr->qid = NULL;
2069         mgr->state = 0;
2070         ISC_LIST_INIT(mgr->list);
2071         mgr->v4ports = NULL;
2072         mgr->v6ports = NULL;
2073         mgr->nv4ports = 0;
2074         mgr->nv6ports = 0;
2075         mgr->magic = DNS_DISPATCHMGR_MAGIC;
2076
2077         result = create_default_portset(mctx, &v4portset);
2078         if (result == ISC_R_SUCCESS) {
2079                 result = create_default_portset(mctx, &v6portset);
2080                 if (result == ISC_R_SUCCESS) {
2081                         result = dns_dispatchmgr_setavailports(mgr,
2082                                                                v4portset,
2083                                                                v6portset);
2084                 }
2085         }
2086         if (v4portset != NULL)
2087                 isc_portset_destroy(mctx, &v4portset);
2088         if (v6portset != NULL)
2089                 isc_portset_destroy(mctx, &v6portset);
2090         if (result != ISC_R_SUCCESS)
2091                 goto kill_dpool;
2092
2093 #ifdef BIND9
2094         if (entropy != NULL)
2095                 isc_entropy_attach(entropy, &mgr->entropy);
2096 #else
2097         UNUSED(entropy);
2098 #endif
2099
2100         dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
2101
2102         *mgrp = mgr;
2103         return (ISC_R_SUCCESS);
2104
2105  kill_dpool:
2106         isc_mempool_destroy(&mgr->dpool);
2107  kill_rpool:
2108         isc_mempool_destroy(&mgr->rpool);
2109  kill_depool:
2110         isc_mempool_destroy(&mgr->depool);
2111  kill_spool_lock:
2112         DESTROYLOCK(&mgr->spool_lock);
2113  kill_bpool_lock:
2114         DESTROYLOCK(&mgr->bpool_lock);
2115  kill_dpool_lock:
2116         DESTROYLOCK(&mgr->dpool_lock);
2117  kill_rpool_lock:
2118         DESTROYLOCK(&mgr->rpool_lock);
2119  kill_depool_lock:
2120         DESTROYLOCK(&mgr->depool_lock);
2121  kill_buffer_lock:
2122         DESTROYLOCK(&mgr->buffer_lock);
2123  kill_arc4_lock:
2124         DESTROYLOCK(&mgr->arc4_lock);
2125  kill_lock:
2126         DESTROYLOCK(&mgr->lock);
2127  deallocate:
2128         isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
2129         isc_mem_detach(&mctx);
2130
2131         return (result);
2132 }
2133
2134 void
2135 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
2136         REQUIRE(VALID_DISPATCHMGR(mgr));
2137         if (mgr->blackhole != NULL)
2138                 dns_acl_detach(&mgr->blackhole);
2139         dns_acl_attach(blackhole, &mgr->blackhole);
2140 }
2141
2142 dns_acl_t *
2143 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
2144         REQUIRE(VALID_DISPATCHMGR(mgr));
2145         return (mgr->blackhole);
2146 }
2147
2148 void
2149 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
2150                                  dns_portlist_t *portlist)
2151 {
2152         REQUIRE(VALID_DISPATCHMGR(mgr));
2153         UNUSED(portlist);
2154
2155         /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
2156         return;
2157 }
2158
2159 dns_portlist_t *
2160 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
2161         REQUIRE(VALID_DISPATCHMGR(mgr));
2162         return (NULL);          /* this function is deprecated */
2163 }
2164
2165 isc_result_t
2166 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
2167                               isc_portset_t *v6portset)
2168 {
2169         in_port_t *v4ports, *v6ports, p;
2170         unsigned int nv4ports, nv6ports, i4, i6;
2171
2172         REQUIRE(VALID_DISPATCHMGR(mgr));
2173
2174         nv4ports = isc_portset_nports(v4portset);
2175         nv6ports = isc_portset_nports(v6portset);
2176
2177         v4ports = NULL;
2178         if (nv4ports != 0) {
2179                 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
2180                 if (v4ports == NULL)
2181                         return (ISC_R_NOMEMORY);
2182         }
2183         v6ports = NULL;
2184         if (nv6ports != 0) {
2185                 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
2186                 if (v6ports == NULL) {
2187                         if (v4ports != NULL) {
2188                                 isc_mem_put(mgr->mctx, v4ports,
2189                                             sizeof(in_port_t) *
2190                                             isc_portset_nports(v4portset));
2191                         }
2192                         return (ISC_R_NOMEMORY);
2193                 }
2194         }
2195
2196         p = 0;
2197         i4 = 0;
2198         i6 = 0;
2199         do {
2200                 if (isc_portset_isset(v4portset, p)) {
2201                         INSIST(i4 < nv4ports);
2202                         v4ports[i4++] = p;
2203                 }
2204                 if (isc_portset_isset(v6portset, p)) {
2205                         INSIST(i6 < nv6ports);
2206                         v6ports[i6++] = p;
2207                 }
2208         } while (p++ < 65535);
2209         INSIST(i4 == nv4ports && i6 == nv6ports);
2210
2211         PORTBUFLOCK(mgr);
2212         if (mgr->v4ports != NULL) {
2213                 isc_mem_put(mgr->mctx, mgr->v4ports,
2214                             mgr->nv4ports * sizeof(in_port_t));
2215         }
2216         mgr->v4ports = v4ports;
2217         mgr->nv4ports = nv4ports;
2218
2219         if (mgr->v6ports != NULL) {
2220                 isc_mem_put(mgr->mctx, mgr->v6ports,
2221                             mgr->nv6ports * sizeof(in_port_t));
2222         }
2223         mgr->v6ports = v6ports;
2224         mgr->nv6ports = nv6ports;
2225         PORTBUFUNLOCK(mgr);
2226
2227         return (ISC_R_SUCCESS);
2228 }
2229
2230 static isc_result_t
2231 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2232                        unsigned int buffersize, unsigned int maxbuffers,
2233                        unsigned int maxrequests, unsigned int buckets,
2234                        unsigned int increment)
2235 {
2236         isc_result_t result;
2237
2238         REQUIRE(VALID_DISPATCHMGR(mgr));
2239         REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2240         REQUIRE(maxbuffers > 0);
2241         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2242         REQUIRE(increment > buckets);
2243
2244         /*
2245          * Keep some number of items around.  This should be a config
2246          * option.  For now, keep 8, but later keep at least two even
2247          * if the caller wants less.  This allows us to ensure certain
2248          * things, like an event can be "freed" and the next allocation
2249          * will always succeed.
2250          *
2251          * Note that if limits are placed on anything here, we use one
2252          * event internally, so the actual limit should be "wanted + 1."
2253          *
2254          * XXXMLG
2255          */
2256
2257         if (maxbuffers < 8)
2258                 maxbuffers = 8;
2259
2260         LOCK(&mgr->buffer_lock);
2261
2262         /* Create or adjust buffer pool */
2263         if (mgr->bpool != NULL) {
2264                 /*
2265                  * We only increase the maxbuffers to avoid accidental buffer
2266                  * shortage.  Ideally we'd separate the manager-wide maximum
2267                  * from per-dispatch limits and respect the latter within the
2268                  * global limit.  But at this moment that's deemed to be
2269                  * overkilling and isn't worth additional implementation
2270                  * complexity.
2271                  */
2272                 if (maxbuffers > mgr->maxbuffers) {
2273                         isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2274                         isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2275                         mgr->maxbuffers = maxbuffers;
2276                 }
2277         } else {
2278                 result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2279                 if (result != ISC_R_SUCCESS) {
2280                         UNLOCK(&mgr->buffer_lock);
2281                         return (result);
2282                 }
2283                 isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2284                 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2285                 isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2286                 isc_mempool_associatelock(mgr->bpool, &mgr->bpool_lock);
2287                 isc_mempool_setfillcount(mgr->bpool, 256);
2288         }
2289
2290         /* Create or adjust socket pool */
2291         if (mgr->spool != NULL) {
2292                 if (maxrequests < DNS_DISPATCH_POOLSOCKS * 2) {
2293                         isc_mempool_setmaxalloc(mgr->spool,
2294                                                 DNS_DISPATCH_POOLSOCKS * 2);
2295                         isc_mempool_setfreemax(mgr->spool,
2296                                                DNS_DISPATCH_POOLSOCKS * 2);
2297                 }
2298                 UNLOCK(&mgr->buffer_lock);
2299                 return (ISC_R_SUCCESS);
2300         }
2301         result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2302                                     &mgr->spool);
2303         if (result != ISC_R_SUCCESS) {
2304                 UNLOCK(&mgr->buffer_lock);
2305                 goto cleanup;
2306         }
2307         isc_mempool_setname(mgr->spool, "dispmgr_spool");
2308         isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2309         isc_mempool_setfreemax(mgr->spool, maxrequests);
2310         isc_mempool_associatelock(mgr->spool, &mgr->spool_lock);
2311         isc_mempool_setfillcount(mgr->spool, 256);
2312
2313         result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
2314         if (result != ISC_R_SUCCESS)
2315                 goto cleanup;
2316
2317         mgr->buffersize = buffersize;
2318         mgr->maxbuffers = maxbuffers;
2319         UNLOCK(&mgr->buffer_lock);
2320         return (ISC_R_SUCCESS);
2321
2322  cleanup:
2323         isc_mempool_destroy(&mgr->bpool);
2324         if (mgr->spool != NULL)
2325                 isc_mempool_destroy(&mgr->spool);
2326         UNLOCK(&mgr->buffer_lock);
2327         return (result);
2328 }
2329
2330 void
2331 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2332         dns_dispatchmgr_t *mgr;
2333         isc_boolean_t killit;
2334
2335         REQUIRE(mgrp != NULL);
2336         REQUIRE(VALID_DISPATCHMGR(*mgrp));
2337
2338         mgr = *mgrp;
2339         *mgrp = NULL;
2340
2341         LOCK(&mgr->lock);
2342         mgr->state |= MGR_SHUTTINGDOWN;
2343
2344         killit = destroy_mgr_ok(mgr);
2345         UNLOCK(&mgr->lock);
2346
2347         mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2348
2349         if (killit)
2350                 destroy_mgr(&mgr);
2351 }
2352
2353 void
2354 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2355         REQUIRE(VALID_DISPATCHMGR(mgr));
2356         REQUIRE(ISC_LIST_EMPTY(mgr->list));
2357         REQUIRE(mgr->stats == NULL);
2358
2359         isc_stats_attach(stats, &mgr->stats);
2360 }
2361
2362 static int
2363 port_cmp(const void *key, const void *ent) {
2364         in_port_t p1 = *(const in_port_t *)key;
2365         in_port_t p2 = *(const in_port_t *)ent;
2366
2367         if (p1 < p2)
2368                 return (-1);
2369         else if (p1 == p2)
2370                 return (0);
2371         else
2372                 return (1);
2373 }
2374
2375 static isc_boolean_t
2376 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2377               isc_sockaddr_t *sockaddrp)
2378 {
2379         isc_sockaddr_t sockaddr;
2380         isc_result_t result;
2381         in_port_t *ports, port;
2382         unsigned int nports;
2383         isc_boolean_t available = ISC_FALSE;
2384
2385         REQUIRE(sock != NULL || sockaddrp != NULL);
2386
2387         PORTBUFLOCK(mgr);
2388         if (sock != NULL) {
2389                 sockaddrp = &sockaddr;
2390                 result = isc_socket_getsockname(sock, sockaddrp);
2391                 if (result != ISC_R_SUCCESS)
2392                         goto unlock;
2393         }
2394
2395         if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2396                 ports = mgr->v4ports;
2397                 nports = mgr->nv4ports;
2398         } else {
2399                 ports = mgr->v6ports;
2400                 nports = mgr->nv6ports;
2401         }
2402         if (ports == NULL)
2403                 goto unlock;
2404
2405         port = isc_sockaddr_getport(sockaddrp);
2406         if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2407                 available = ISC_TRUE;
2408
2409 unlock:
2410         PORTBUFUNLOCK(mgr);
2411         return (available);
2412 }
2413
2414 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2415
2416 static isc_boolean_t
2417 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2418         isc_sockaddr_t sockaddr;
2419         isc_result_t result;
2420
2421         REQUIRE(disp->socket != NULL);
2422
2423         if (addr == NULL)
2424                 return (ISC_TRUE);
2425
2426         /*
2427          * Don't match wildcard ports unless the port is available in the
2428          * current configuration.
2429          */
2430         if (isc_sockaddr_getport(addr) == 0 &&
2431             isc_sockaddr_getport(&disp->local) == 0 &&
2432             !portavailable(disp->mgr, disp->socket, NULL)) {
2433                 return (ISC_FALSE);
2434         }
2435
2436         /*
2437          * Check if we match the binding <address,port>.
2438          * Wildcard ports match/fail here.
2439          */
2440         if (isc_sockaddr_equal(&disp->local, addr))
2441                 return (ISC_TRUE);
2442         if (isc_sockaddr_getport(addr) == 0)
2443                 return (ISC_FALSE);
2444
2445         /*
2446          * Check if we match a bound wildcard port <address,port>.
2447          */
2448         if (!isc_sockaddr_eqaddr(&disp->local, addr))
2449                 return (ISC_FALSE);
2450         result = isc_socket_getsockname(disp->socket, &sockaddr);
2451         if (result != ISC_R_SUCCESS)
2452                 return (ISC_FALSE);
2453
2454         return (isc_sockaddr_equal(&sockaddr, addr));
2455 }
2456
2457 /*
2458  * Requires mgr be locked.
2459  *
2460  * No dispatcher can be locked by this thread when calling this function.
2461  *
2462  *
2463  * NOTE:
2464  *      If a matching dispatcher is found, it is locked after this function
2465  *      returns, and must be unlocked by the caller.
2466  */
2467 static isc_result_t
2468 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2469               unsigned int attributes, unsigned int mask,
2470               dns_dispatch_t **dispp)
2471 {
2472         dns_dispatch_t *disp;
2473         isc_result_t result;
2474
2475         /*
2476          * Make certain that we will not match a private or exclusive dispatch.
2477          */
2478         attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2479         mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2480
2481         disp = ISC_LIST_HEAD(mgr->list);
2482         while (disp != NULL) {
2483                 LOCK(&disp->lock);
2484                 if ((disp->shutting_down == 0)
2485                     && ATTRMATCH(disp->attributes, attributes, mask)
2486                     && local_addr_match(disp, local))
2487                         break;
2488                 UNLOCK(&disp->lock);
2489                 disp = ISC_LIST_NEXT(disp, link);
2490         }
2491
2492         if (disp == NULL) {
2493                 result = ISC_R_NOTFOUND;
2494                 goto out;
2495         }
2496
2497         *dispp = disp;
2498         result = ISC_R_SUCCESS;
2499  out:
2500
2501         return (result);
2502 }
2503
2504 static isc_result_t
2505 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2506              unsigned int increment, dns_qid_t **qidp,
2507              isc_boolean_t needsocktable)
2508 {
2509         dns_qid_t *qid;
2510         unsigned int i;
2511         isc_result_t result;
2512
2513         REQUIRE(VALID_DISPATCHMGR(mgr));
2514         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2515         REQUIRE(increment > buckets);
2516         REQUIRE(qidp != NULL && *qidp == NULL);
2517
2518         qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2519         if (qid == NULL)
2520                 return (ISC_R_NOMEMORY);
2521
2522         qid->qid_table = isc_mem_get(mgr->mctx,
2523                                      buckets * sizeof(dns_displist_t));
2524         if (qid->qid_table == NULL) {
2525                 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2526                 return (ISC_R_NOMEMORY);
2527         }
2528
2529         qid->sock_table = NULL;
2530         if (needsocktable) {
2531                 qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2532                                               sizeof(dispsocketlist_t));
2533                 if (qid->sock_table == NULL) {
2534                         isc_mem_put(mgr->mctx, qid->qid_table,
2535                                     buckets * sizeof(dns_displist_t));
2536                         isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2537                         return (ISC_R_NOMEMORY);
2538                 }
2539         }
2540
2541         result = isc_mutex_init(&qid->lock);
2542         if (result != ISC_R_SUCCESS) {
2543                 if (qid->sock_table != NULL) {
2544                         isc_mem_put(mgr->mctx, qid->sock_table,
2545                                     buckets * sizeof(dispsocketlist_t));
2546                 }
2547                 isc_mem_put(mgr->mctx, qid->qid_table,
2548                             buckets * sizeof(dns_displist_t));
2549                 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2550                 return (result);
2551         }
2552
2553         for (i = 0; i < buckets; i++) {
2554                 ISC_LIST_INIT(qid->qid_table[i]);
2555                 if (qid->sock_table != NULL)
2556                         ISC_LIST_INIT(qid->sock_table[i]);
2557         }
2558
2559         qid->qid_nbuckets = buckets;
2560         qid->qid_increment = increment;
2561         qid->magic = QID_MAGIC;
2562         *qidp = qid;
2563         return (ISC_R_SUCCESS);
2564 }
2565
2566 static void
2567 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2568         dns_qid_t *qid;
2569
2570         REQUIRE(qidp != NULL);
2571         qid = *qidp;
2572
2573         REQUIRE(VALID_QID(qid));
2574
2575         *qidp = NULL;
2576         qid->magic = 0;
2577         isc_mem_put(mctx, qid->qid_table,
2578                     qid->qid_nbuckets * sizeof(dns_displist_t));
2579         if (qid->sock_table != NULL) {
2580                 isc_mem_put(mctx, qid->sock_table,
2581                             qid->qid_nbuckets * sizeof(dispsocketlist_t));
2582         }
2583         DESTROYLOCK(&qid->lock);
2584         isc_mem_put(mctx, qid, sizeof(*qid));
2585 }
2586
2587 /*
2588  * Allocate and set important limits.
2589  */
2590 static isc_result_t
2591 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2592                   dns_dispatch_t **dispp)
2593 {
2594         dns_dispatch_t *disp;
2595         isc_result_t result;
2596
2597         REQUIRE(VALID_DISPATCHMGR(mgr));
2598         REQUIRE(dispp != NULL && *dispp == NULL);
2599
2600         /*
2601          * Set up the dispatcher, mostly.  Don't bother setting some of
2602          * the options that are controlled by tcp vs. udp, etc.
2603          */
2604
2605         disp = isc_mempool_get(mgr->dpool);
2606         if (disp == NULL)
2607                 return (ISC_R_NOMEMORY);
2608
2609         disp->magic = 0;
2610         disp->mgr = mgr;
2611         disp->maxrequests = maxrequests;
2612         disp->attributes = 0;
2613         ISC_LINK_INIT(disp, link);
2614         disp->refcount = 1;
2615         disp->recv_pending = 0;
2616         memset(&disp->local, 0, sizeof(disp->local));
2617         disp->localport = 0;
2618         disp->shutting_down = 0;
2619         disp->shutdown_out = 0;
2620         disp->connected = 0;
2621         disp->tcpmsg_valid = 0;
2622         disp->shutdown_why = ISC_R_UNEXPECTED;
2623         disp->requests = 0;
2624         disp->tcpbuffers = 0;
2625         disp->qid = NULL;
2626         ISC_LIST_INIT(disp->activesockets);
2627         ISC_LIST_INIT(disp->inactivesockets);
2628         disp->nsockets = 0;
2629         dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL);
2630         disp->port_table = NULL;
2631         disp->portpool = NULL;
2632
2633         result = isc_mutex_init(&disp->lock);
2634         if (result != ISC_R_SUCCESS)
2635                 goto deallocate;
2636
2637         disp->failsafe_ev = allocate_devent(disp);
2638         if (disp->failsafe_ev == NULL) {
2639                 result = ISC_R_NOMEMORY;
2640                 goto kill_lock;
2641         }
2642
2643         disp->magic = DISPATCH_MAGIC;
2644
2645         *dispp = disp;
2646         return (ISC_R_SUCCESS);
2647
2648         /*
2649          * error returns
2650          */
2651  kill_lock:
2652         DESTROYLOCK(&disp->lock);
2653  deallocate:
2654         isc_mempool_put(mgr->dpool, disp);
2655
2656         return (result);
2657 }
2658
2659
2660 /*
2661  * MUST be unlocked, and not used by anything.
2662  */
2663 static void
2664 dispatch_free(dns_dispatch_t **dispp) {
2665         dns_dispatch_t *disp;
2666         dns_dispatchmgr_t *mgr;
2667         int i;
2668
2669         REQUIRE(VALID_DISPATCH(*dispp));
2670         disp = *dispp;
2671         *dispp = NULL;
2672
2673         mgr = disp->mgr;
2674         REQUIRE(VALID_DISPATCHMGR(mgr));
2675
2676         if (disp->tcpmsg_valid) {
2677                 dns_tcpmsg_invalidate(&disp->tcpmsg);
2678                 disp->tcpmsg_valid = 0;
2679         }
2680
2681         INSIST(disp->tcpbuffers == 0);
2682         INSIST(disp->requests == 0);
2683         INSIST(disp->recv_pending == 0);
2684         INSIST(ISC_LIST_EMPTY(disp->activesockets));
2685         INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2686
2687         isc_mempool_put(mgr->depool, disp->failsafe_ev);
2688         disp->failsafe_ev = NULL;
2689
2690         if (disp->qid != NULL)
2691                 qid_destroy(mgr->mctx, &disp->qid);
2692
2693         if (disp->port_table != NULL) {
2694                 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2695                         INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2696                 isc_mem_put(mgr->mctx, disp->port_table,
2697                             sizeof(disp->port_table[0]) *
2698                             DNS_DISPATCH_PORTTABLESIZE);
2699         }
2700
2701         if (disp->portpool != NULL)
2702                 isc_mempool_destroy(&disp->portpool);
2703
2704         disp->mgr = NULL;
2705         DESTROYLOCK(&disp->lock);
2706         disp->magic = 0;
2707         isc_mempool_put(mgr->dpool, disp);
2708 }
2709
2710 isc_result_t
2711 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2712                        isc_taskmgr_t *taskmgr, unsigned int buffersize,
2713                        unsigned int maxbuffers, unsigned int maxrequests,
2714                        unsigned int buckets, unsigned int increment,
2715                        unsigned int attributes, dns_dispatch_t **dispp)
2716 {
2717         isc_result_t result;
2718         dns_dispatch_t *disp;
2719
2720         UNUSED(maxbuffers);
2721         UNUSED(buffersize);
2722
2723         REQUIRE(VALID_DISPATCHMGR(mgr));
2724         REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2725         REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2726         REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2727
2728         attributes |= DNS_DISPATCHATTR_PRIVATE;  /* XXXMLG */
2729
2730         LOCK(&mgr->lock);
2731
2732         /*
2733          * dispatch_allocate() checks mgr for us.
2734          * qid_allocate() checks buckets and increment for us.
2735          */
2736         disp = NULL;
2737         result = dispatch_allocate(mgr, maxrequests, &disp);
2738         if (result != ISC_R_SUCCESS) {
2739                 UNLOCK(&mgr->lock);
2740                 return (result);
2741         }
2742
2743         result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2744         if (result != ISC_R_SUCCESS)
2745                 goto deallocate_dispatch;
2746
2747         disp->socktype = isc_sockettype_tcp;
2748         disp->socket = NULL;
2749         isc_socket_attach(sock, &disp->socket);
2750
2751         disp->sepool = NULL;
2752
2753         disp->ntasks = 1;
2754         disp->task[0] = NULL;
2755         result = isc_task_create(taskmgr, 0, &disp->task[0]);
2756         if (result != ISC_R_SUCCESS)
2757                 goto kill_socket;
2758
2759         disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2760                                             DNS_EVENT_DISPATCHCONTROL,
2761                                             destroy_disp, disp,
2762                                             sizeof(isc_event_t));
2763         if (disp->ctlevent == NULL) {
2764                 result = ISC_R_NOMEMORY;
2765                 goto kill_task;
2766         }
2767
2768         isc_task_setname(disp->task[0], "tcpdispatch", disp);
2769
2770         dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2771         disp->tcpmsg_valid = 1;
2772
2773         disp->attributes = attributes;
2774
2775         /*
2776          * Append it to the dispatcher list.
2777          */
2778         ISC_LIST_APPEND(mgr->list, disp, link);
2779         UNLOCK(&mgr->lock);
2780
2781         mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2782         dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2783
2784         *dispp = disp;
2785
2786         return (ISC_R_SUCCESS);
2787
2788         /*
2789          * Error returns.
2790          */
2791  kill_task:
2792         isc_task_detach(&disp->task[0]);
2793  kill_socket:
2794         isc_socket_detach(&disp->socket);
2795  deallocate_dispatch:
2796         dispatch_free(&disp);
2797
2798         UNLOCK(&mgr->lock);
2799
2800         return (result);
2801 }
2802
2803 isc_result_t
2804 dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2805                     isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2806                     unsigned int buffersize,
2807                     unsigned int maxbuffers, unsigned int maxrequests,
2808                     unsigned int buckets, unsigned int increment,
2809                     unsigned int attributes, unsigned int mask,
2810                     dns_dispatch_t **dispp, dns_dispatch_t *dup_dispatch)
2811 {
2812         isc_result_t result;
2813         dns_dispatch_t *disp = NULL;
2814
2815         REQUIRE(VALID_DISPATCHMGR(mgr));
2816         REQUIRE(sockmgr != NULL);
2817         REQUIRE(localaddr != NULL);
2818         REQUIRE(taskmgr != NULL);
2819         REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2820         REQUIRE(maxbuffers > 0);
2821         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2822         REQUIRE(increment > buckets);
2823         REQUIRE(dispp != NULL && *dispp == NULL);
2824         REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2825
2826         result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2827                                         maxrequests, buckets, increment);
2828         if (result != ISC_R_SUCCESS)
2829                 return (result);
2830
2831         LOCK(&mgr->lock);
2832
2833         if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2834                 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2835                 goto createudp;
2836         }
2837
2838         /*
2839          * See if we have a dispatcher that matches.
2840          */
2841         if (dup_dispatch == NULL) {
2842                 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2843                 if (result == ISC_R_SUCCESS) {
2844                         disp->refcount++;
2845
2846                         if (disp->maxrequests < maxrequests)
2847                                 disp->maxrequests = maxrequests;
2848
2849                         if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0
2850                             && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2851                         {
2852                                 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2853                                 if (disp->recv_pending != 0)
2854                                         isc_socket_cancel(disp->socket,
2855                                                           disp->task[0],
2856                                                           ISC_SOCKCANCEL_RECV);
2857                         }
2858
2859                         UNLOCK(&disp->lock);
2860                         UNLOCK(&mgr->lock);
2861
2862                         *dispp = disp;
2863
2864                         return (ISC_R_SUCCESS);
2865                 }
2866         }
2867
2868  createudp:
2869         /*
2870          * Nope, create one.
2871          */
2872         result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2873                                     maxrequests, attributes, &disp,
2874                                     dup_dispatch == NULL
2875                                             ? NULL
2876                                             : dup_dispatch->socket);
2877
2878         if (result != ISC_R_SUCCESS) {
2879                 UNLOCK(&mgr->lock);
2880                 return (result);
2881         }
2882
2883         UNLOCK(&mgr->lock);
2884         *dispp = disp;
2885
2886         return (ISC_R_SUCCESS);
2887 }
2888
2889 isc_result_t
2890 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2891                     isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2892                     unsigned int buffersize,
2893                     unsigned int maxbuffers, unsigned int maxrequests,
2894                     unsigned int buckets, unsigned int increment,
2895                     unsigned int attributes, unsigned int mask,
2896                     dns_dispatch_t **dispp)
2897 {
2898         return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr,
2899                                         buffersize, maxbuffers, maxrequests,
2900                                         buckets, increment, attributes,
2901                                         mask, dispp, NULL));
2902 }
2903
2904 /*
2905  * mgr should be locked.
2906  */
2907
2908 #ifndef DNS_DISPATCH_HELD
2909 #define DNS_DISPATCH_HELD 20U
2910 #endif
2911
2912 static isc_result_t
2913 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2914               isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2915               isc_socket_t **sockp, isc_socket_t *dup_socket)
2916 {
2917         unsigned int i, j;
2918         isc_socket_t *held[DNS_DISPATCH_HELD];
2919         isc_sockaddr_t localaddr_bound;
2920         isc_socket_t *sock = NULL;
2921         isc_result_t result = ISC_R_SUCCESS;
2922         isc_boolean_t anyport;
2923
2924         INSIST(sockp != NULL && *sockp == NULL);
2925
2926         localaddr_bound = *localaddr;
2927         anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2928
2929         if (anyport) {
2930                 unsigned int nports;
2931                 in_port_t *ports;
2932
2933                 /*
2934                  * If no port is specified, we first try to pick up a random
2935                  * port by ourselves.
2936                  */
2937                 if (isc_sockaddr_pf(localaddr) == AF_INET) {
2938                         nports = disp->mgr->nv4ports;
2939                         ports = disp->mgr->v4ports;
2940                 } else {
2941                         nports = disp->mgr->nv6ports;
2942                         ports = disp->mgr->v6ports;
2943                 }
2944                 if (nports == 0)
2945                         return (ISC_R_ADDRNOTAVAIL);
2946
2947                 for (i = 0; i < 1024; i++) {
2948                         in_port_t prt;
2949
2950                         prt = ports[dispatch_uniformrandom(
2951                                         DISP_ARC4CTX(disp),
2952                                         nports)];
2953                         isc_sockaddr_setport(&localaddr_bound, prt);
2954                         result = open_socket(sockmgr, &localaddr_bound,
2955                                              0, &sock, NULL);
2956                         /*
2957                          * Continue if the port choosen is already in use
2958                          * or the OS has reserved it.
2959                          */
2960                         if (result == ISC_R_NOPERM ||
2961                             result == ISC_R_ADDRINUSE)
2962                                 continue;
2963                         disp->localport = prt;
2964                         *sockp = sock;
2965                         return (result);
2966                 }
2967
2968                 /*
2969                  * If this fails 1024 times, we then ask the kernel for
2970                  * choosing one.
2971                  */
2972         } else {
2973                 /* Allow to reuse address for non-random ports. */
2974                 result = open_socket(sockmgr, localaddr,
2975                                      ISC_SOCKET_REUSEADDRESS, &sock,
2976                                      dup_socket);
2977
2978                 if (result == ISC_R_SUCCESS)
2979                         *sockp = sock;
2980
2981                 return (result);
2982         }
2983
2984         memset(held, 0, sizeof(held));
2985         i = 0;
2986
2987         for (j = 0; j < 0xffffU; j++) {
2988                 result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
2989                 if (result != ISC_R_SUCCESS)
2990                         goto end;
2991                 else if (portavailable(mgr, sock, NULL))
2992                         break;
2993                 if (held[i] != NULL)
2994                         isc_socket_detach(&held[i]);
2995                 held[i++] = sock;
2996                 sock = NULL;
2997                 if (i == DNS_DISPATCH_HELD)
2998                         i = 0;
2999         }
3000         if (j == 0xffffU) {
3001                 mgr_log(mgr, ISC_LOG_ERROR,
3002                         "avoid-v%s-udp-ports: unable to allocate "
3003                         "an available port",
3004                         isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
3005                 result = ISC_R_FAILURE;
3006                 goto end;
3007         }
3008         *sockp = sock;
3009
3010 end:
3011         for (i = 0; i < DNS_DISPATCH_HELD; i++) {
3012                 if (held[i] != NULL)
3013                         isc_socket_detach(&held[i]);
3014         }
3015
3016         return (result);
3017 }
3018
3019 static isc_result_t
3020 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
3021                    isc_taskmgr_t *taskmgr,
3022                    isc_sockaddr_t *localaddr,
3023                    unsigned int maxrequests,
3024                    unsigned int attributes,
3025                    dns_dispatch_t **dispp,
3026                    isc_socket_t *dup_socket)
3027 {
3028         isc_result_t result;
3029         dns_dispatch_t *disp;
3030         isc_socket_t *sock = NULL;
3031         int i = 0;
3032
3033         /*
3034          * dispatch_allocate() checks mgr for us.
3035          */
3036         disp = NULL;
3037         result = dispatch_allocate(mgr, maxrequests, &disp);
3038         if (result != ISC_R_SUCCESS)
3039                 return (result);
3040
3041         disp->socktype = isc_sockettype_udp;
3042
3043         if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
3044                 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock,
3045                                        dup_socket);
3046                 if (result != ISC_R_SUCCESS)
3047                         goto deallocate_dispatch;
3048
3049                 if (isc_log_wouldlog(dns_lctx, 90)) {
3050                         char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3051
3052                         isc_sockaddr_format(localaddr, addrbuf,
3053                                             ISC_SOCKADDR_FORMATSIZE);
3054                         mgr_log(mgr, LVL(90), "dns_dispatch_createudp: Created"
3055                                 " UDP dispatch for %s with socket fd %d\n",
3056                                 addrbuf, isc_socket_getfd(sock));
3057                 }
3058
3059         } else {
3060                 isc_sockaddr_t sa_any;
3061
3062                 /*
3063                  * For dispatches using exclusive sockets with a specific
3064                  * source address, we only check if the specified address is
3065                  * available on the system.  Query sockets will be created later
3066                  * on demand.
3067                  */
3068                 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
3069                 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
3070                         result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
3071                         if (sock != NULL)
3072                                 isc_socket_detach(&sock);
3073                         if (result != ISC_R_SUCCESS)
3074                                 goto deallocate_dispatch;
3075                 }
3076
3077                 disp->port_table = isc_mem_get(mgr->mctx,
3078                                                sizeof(disp->port_table[0]) *
3079                                                DNS_DISPATCH_PORTTABLESIZE);
3080                 if (disp->port_table == NULL)
3081                         goto deallocate_dispatch;
3082                 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
3083                         ISC_LIST_INIT(disp->port_table[i]);
3084
3085                 result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
3086                                             &disp->portpool);
3087                 if (result != ISC_R_SUCCESS)
3088                         goto deallocate_dispatch;
3089                 isc_mempool_setname(disp->portpool, "disp_portpool");
3090                 isc_mempool_setfreemax(disp->portpool, 128);
3091         }
3092         disp->socket = sock;
3093         disp->local = *localaddr;
3094
3095         if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3096                 disp->ntasks = MAX_INTERNAL_TASKS;
3097         else
3098                 disp->ntasks = 1;
3099         for (i = 0; i < disp->ntasks; i++) {
3100                 disp->task[i] = NULL;
3101                 result = isc_task_create(taskmgr, 0, &disp->task[i]);
3102                 if (result != ISC_R_SUCCESS) {
3103                         while (--i >= 0) {
3104                                 isc_task_shutdown(disp->task[i]);
3105                                 isc_task_detach(&disp->task[i]);
3106                         }
3107                         goto kill_socket;
3108                 }
3109                 isc_task_setname(disp->task[i], "udpdispatch", disp);
3110         }
3111
3112         disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
3113                                             DNS_EVENT_DISPATCHCONTROL,
3114                                             destroy_disp, disp,
3115                                             sizeof(isc_event_t));
3116         if (disp->ctlevent == NULL) {
3117                 result = ISC_R_NOMEMORY;
3118                 goto kill_task;
3119         }
3120
3121         disp->sepool = NULL;
3122         if (isc_mempool_create(mgr->mctx, sizeof(isc_socketevent_t),
3123                                &disp->sepool) != ISC_R_SUCCESS)
3124         {
3125                 result = ISC_R_NOMEMORY;
3126                 goto kill_ctlevent;
3127         }
3128
3129         result = isc_mutex_init(&disp->sepool_lock);
3130         if (result != ISC_R_SUCCESS)
3131                 goto kill_sepool;
3132
3133         isc_mempool_setname(disp->sepool, "disp_sepool");
3134         isc_mempool_setmaxalloc(disp->sepool, 32768);
3135         isc_mempool_setfreemax(disp->sepool, 32768);
3136         isc_mempool_associatelock(disp->sepool, &disp->sepool_lock);
3137         isc_mempool_setfillcount(disp->sepool, 16);
3138
3139         attributes &= ~DNS_DISPATCHATTR_TCP;
3140         attributes |= DNS_DISPATCHATTR_UDP;
3141         disp->attributes = attributes;
3142
3143         /*
3144          * Append it to the dispatcher list.
3145          */
3146         ISC_LIST_APPEND(mgr->list, disp, link);
3147
3148         mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
3149         dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
3150         if (disp->socket != NULL)
3151                 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
3152
3153         *dispp = disp;
3154
3155         return (result);
3156
3157         /*
3158          * Error returns.
3159          */
3160  kill_sepool:
3161         isc_mempool_destroy(&disp->sepool);
3162  kill_ctlevent:
3163         isc_event_free(&disp->ctlevent);
3164  kill_task:
3165         for (i = 0; i < disp->ntasks; i++)
3166                 isc_task_detach(&disp->task[i]);
3167  kill_socket:
3168         if (disp->socket != NULL)
3169                 isc_socket_detach(&disp->socket);
3170  deallocate_dispatch:
3171         dispatch_free(&disp);
3172
3173         return (result);
3174 }
3175
3176 void
3177 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
3178         REQUIRE(VALID_DISPATCH(disp));
3179         REQUIRE(dispp != NULL && *dispp == NULL);
3180
3181         LOCK(&disp->lock);
3182         disp->refcount++;
3183         UNLOCK(&disp->lock);
3184
3185         *dispp = disp;
3186 }
3187
3188 /*
3189  * It is important to lock the manager while we are deleting the dispatch,
3190  * since dns_dispatch_getudp will call dispatch_find, which returns to
3191  * the caller a dispatch but does not attach to it until later.  _getudp
3192  * locks the manager, however, so locking it here will keep us from attaching
3193  * to a dispatcher that is in the process of going away.
3194  */
3195 void
3196 dns_dispatch_detach(dns_dispatch_t **dispp) {
3197         dns_dispatch_t *disp;
3198         dispsocket_t *dispsock;
3199         isc_boolean_t killit;
3200
3201         REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
3202
3203         disp = *dispp;
3204         *dispp = NULL;
3205
3206         LOCK(&disp->lock);
3207
3208         INSIST(disp->refcount > 0);
3209         disp->refcount--;
3210         if (disp->refcount == 0) {
3211                 if (disp->recv_pending > 0)
3212                         isc_socket_cancel(disp->socket, disp->task[0],
3213                                           ISC_SOCKCANCEL_RECV);
3214                 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3215                      dispsock != NULL;
3216                      dispsock = ISC_LIST_NEXT(dispsock, link)) {
3217                         isc_socket_cancel(dispsock->socket, dispsock->task,
3218                                           ISC_SOCKCANCEL_RECV);
3219                 }
3220                 disp->shutting_down = 1;
3221         }
3222
3223         dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
3224
3225         killit = destroy_disp_ok(disp);
3226         UNLOCK(&disp->lock);
3227         if (killit)
3228                 isc_task_send(disp->task[0], &disp->ctlevent);
3229 }
3230
3231 isc_result_t
3232 dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3233                           isc_task_t *task, isc_taskaction_t action, void *arg,
3234                           dns_messageid_t *idp, dns_dispentry_t **resp,
3235                           isc_socketmgr_t *sockmgr)
3236 {
3237         return (dns_dispatch_addresponse3(disp, 0, dest, task, action, arg,
3238                                           idp, resp, sockmgr));
3239 }
3240
3241 isc_result_t
3242 dns_dispatch_addresponse3(dns_dispatch_t *disp, unsigned int options,
3243                           isc_sockaddr_t *dest, isc_task_t *task,
3244                           isc_taskaction_t action, void *arg,
3245                           dns_messageid_t *idp, dns_dispentry_t **resp,
3246                           isc_socketmgr_t *sockmgr)
3247 {
3248         dns_dispentry_t *res;
3249         unsigned int bucket;
3250         in_port_t localport = 0;
3251         dns_messageid_t id;
3252         int i;
3253         isc_boolean_t ok;
3254         dns_qid_t *qid;
3255         dispsocket_t *dispsocket = NULL;
3256         isc_result_t result;
3257
3258         REQUIRE(VALID_DISPATCH(disp));
3259         REQUIRE(task != NULL);
3260         REQUIRE(dest != NULL);
3261         REQUIRE(resp != NULL && *resp == NULL);
3262         REQUIRE(idp != NULL);
3263         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3264                 REQUIRE(sockmgr != NULL);
3265
3266         LOCK(&disp->lock);
3267
3268         if (disp->shutting_down == 1) {
3269                 UNLOCK(&disp->lock);
3270                 return (ISC_R_SHUTTINGDOWN);
3271         }
3272
3273         if (disp->requests >= disp->maxrequests) {
3274                 UNLOCK(&disp->lock);
3275                 return (ISC_R_QUOTA);
3276         }
3277
3278         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
3279             disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
3280                 dispsocket_t *oldestsocket;
3281                 dns_dispentry_t *oldestresp;
3282                 dns_dispatchevent_t *rev;
3283
3284                 /*
3285                  * Kill oldest outstanding query if the number of sockets
3286                  * exceeds the quota to keep the room for new queries.
3287                  */
3288                 oldestsocket = ISC_LIST_HEAD(disp->activesockets);
3289                 oldestresp = oldestsocket->resp;
3290                 if (oldestresp != NULL && !oldestresp->item_out) {
3291                         rev = allocate_devent(oldestresp->disp);
3292                         if (rev != NULL) {
3293                                 rev->buffer.base = NULL;
3294                                 rev->result = ISC_R_CANCELED;
3295                                 rev->id = oldestresp->id;
3296                                 ISC_EVENT_INIT(rev, sizeof(*rev), 0,
3297                                                NULL, DNS_EVENT_DISPATCH,
3298                                                oldestresp->action,
3299                                                oldestresp->arg, oldestresp,
3300                                                NULL, NULL);
3301                                 oldestresp->item_out = ISC_TRUE;
3302                                 isc_task_send(oldestresp->task,
3303                                               ISC_EVENT_PTR(&rev));
3304                                 inc_stats(disp->mgr,
3305                                           dns_resstatscounter_dispabort);
3306                         }
3307                 }
3308
3309                 /*
3310                  * Move this entry to the tail so that it won't (easily) be
3311                  * examined before actually being canceled.
3312                  */
3313                 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3314                 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3315         }
3316
3317         qid = DNS_QID(disp);
3318
3319         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3320                 /*
3321                  * Get a separate UDP socket with a random port number.
3322                  */
3323                 result = get_dispsocket(disp, dest, sockmgr, &dispsocket,
3324                                         &localport);
3325                 if (result != ISC_R_SUCCESS) {
3326                         UNLOCK(&disp->lock);
3327                         inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3328                         return (result);
3329                 }
3330         } else {
3331                 localport = disp->localport;
3332         }
3333
3334         /*
3335          * Try somewhat hard to find an unique ID unless FIXEDID is set
3336          * in which case we use the id passed in via *idp.
3337          */
3338         LOCK(&qid->lock);
3339         if ((options & DNS_DISPATCHOPT_FIXEDID) != 0)
3340                 id = *idp;
3341         else
3342                 id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp));
3343         ok = ISC_FALSE;
3344         i = 0;
3345         do {
3346                 bucket = dns_hash(qid, dest, id, localport);
3347                 if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3348                         ok = ISC_TRUE;
3349                         break;
3350                 }
3351                 if ((disp->attributes & DNS_DISPATCHATTR_FIXEDID) != 0)
3352                         break;
3353                 id += qid->qid_increment;
3354                 id &= 0x0000ffff;
3355         } while (i++ < 64);
3356         UNLOCK(&qid->lock);
3357
3358         if (!ok) {
3359                 UNLOCK(&disp->lock);
3360                 return (ISC_R_NOMORE);
3361         }
3362
3363         res = isc_mempool_get(disp->mgr->rpool);
3364         if (res == NULL) {
3365                 if (dispsocket != NULL)
3366                         destroy_dispsocket(disp, &dispsocket);
3367                 UNLOCK(&disp->lock);
3368                 return (ISC_R_NOMEMORY);
3369         }
3370
3371         disp->refcount++;
3372         disp->requests++;
3373         res->task = NULL;
3374         isc_task_attach(task, &res->task);
3375         res->disp = disp;
3376         res->id = id;
3377         res->port = localport;
3378         res->bucket = bucket;
3379         res->host = *dest;
3380         res->action = action;
3381         res->arg = arg;
3382         res->dispsocket = dispsocket;
3383         if (dispsocket != NULL)
3384                 dispsocket->resp = res;
3385         res->item_out = ISC_FALSE;
3386         ISC_LIST_INIT(res->items);
3387         ISC_LINK_INIT(res, link);
3388         res->magic = RESPONSE_MAGIC;
3389
3390         LOCK(&qid->lock);
3391         ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3392         UNLOCK(&qid->lock);
3393
3394         request_log(disp, res, LVL(90),
3395                     "attached to task %p", res->task);
3396
3397         if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3398             ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3399                 result = startrecv(disp, dispsocket);
3400                 if (result != ISC_R_SUCCESS) {
3401                         LOCK(&qid->lock);
3402                         ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3403                         UNLOCK(&qid->lock);
3404
3405                         if (dispsocket != NULL)
3406                                 destroy_dispsocket(disp, &dispsocket);
3407
3408                         disp->refcount--;
3409                         disp->requests--;
3410
3411                         UNLOCK(&disp->lock);
3412                         isc_task_detach(&res->task);
3413                         isc_mempool_put(disp->mgr->rpool, res);
3414                         return (result);
3415                 }
3416         }
3417
3418         if (dispsocket != NULL)
3419                 ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3420
3421         UNLOCK(&disp->lock);
3422
3423         *idp = id;
3424         *resp = res;
3425
3426         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3427                 INSIST(res->dispsocket != NULL);
3428
3429         return (ISC_R_SUCCESS);
3430 }
3431
3432 isc_result_t
3433 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3434                          isc_task_t *task, isc_taskaction_t action, void *arg,
3435                          dns_messageid_t *idp, dns_dispentry_t **resp)
3436 {
3437         REQUIRE(VALID_DISPATCH(disp));
3438         REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3439
3440         return (dns_dispatch_addresponse3(disp, 0, dest, task, action, arg,
3441                                           idp, resp, NULL));
3442 }
3443
3444 void
3445 dns_dispatch_starttcp(dns_dispatch_t *disp) {
3446
3447         REQUIRE(VALID_DISPATCH(disp));
3448
3449         dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3450
3451         LOCK(&disp->lock);
3452         disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3453         (void)startrecv(disp, NULL);
3454         UNLOCK(&disp->lock);
3455 }
3456
3457 void
3458 dns_dispatch_removeresponse(dns_dispentry_t **resp,
3459                             dns_dispatchevent_t **sockevent)
3460 {
3461         dns_dispatchmgr_t *mgr;
3462         dns_dispatch_t *disp;
3463         dns_dispentry_t *res;
3464         dispsocket_t *dispsock;
3465         dns_dispatchevent_t *ev;
3466         unsigned int bucket;
3467         isc_boolean_t killit;
3468         unsigned int n;
3469         isc_eventlist_t events;
3470         dns_qid_t *qid;
3471
3472         REQUIRE(resp != NULL);
3473         REQUIRE(VALID_RESPONSE(*resp));
3474
3475         res = *resp;
3476         *resp = NULL;
3477
3478         disp = res->disp;
3479         REQUIRE(VALID_DISPATCH(disp));
3480         mgr = disp->mgr;
3481         REQUIRE(VALID_DISPATCHMGR(mgr));
3482
3483         qid = DNS_QID(disp);
3484
3485         if (sockevent != NULL) {
3486                 REQUIRE(*sockevent != NULL);
3487                 ev = *sockevent;
3488                 *sockevent = NULL;
3489         } else {
3490                 ev = NULL;
3491         }
3492
3493         LOCK(&disp->lock);
3494
3495         INSIST(disp->requests > 0);
3496         disp->requests--;
3497         INSIST(disp->refcount > 0);
3498         disp->refcount--;
3499         if (disp->refcount == 0) {
3500                 if (disp->recv_pending > 0)
3501                         isc_socket_cancel(disp->socket, disp->task[0],
3502                                           ISC_SOCKCANCEL_RECV);
3503                 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3504                      dispsock != NULL;
3505                      dispsock = ISC_LIST_NEXT(dispsock, link)) {
3506                         isc_socket_cancel(dispsock->socket, dispsock->task,
3507                                           ISC_SOCKCANCEL_RECV);
3508                 }
3509                 disp->shutting_down = 1;
3510         }
3511
3512         bucket = res->bucket;
3513
3514         LOCK(&qid->lock);
3515         ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3516         UNLOCK(&qid->lock);
3517
3518         if (ev == NULL && res->item_out) {
3519                 /*
3520                  * We've posted our event, but the caller hasn't gotten it
3521                  * yet.  Take it back.
3522                  */
3523                 ISC_LIST_INIT(events);
3524                 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3525                                     NULL, &events);
3526                 /*
3527                  * We had better have gotten it back.
3528                  */
3529                 INSIST(n == 1);
3530                 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3531         }
3532
3533         if (ev != NULL) {
3534                 REQUIRE(res->item_out == ISC_TRUE);
3535                 res->item_out = ISC_FALSE;
3536                 if (ev->buffer.base != NULL)
3537                         free_buffer(disp, ev->buffer.base, ev->buffer.length);
3538                 free_devent(disp, ev);
3539         }
3540
3541         request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3542         isc_task_detach(&res->task);
3543
3544         if (res->dispsocket != NULL) {
3545                 isc_socket_cancel(res->dispsocket->socket,
3546                                   res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3547                 res->dispsocket->resp = NULL;
3548         }
3549
3550         /*
3551          * Free any buffered requests as well
3552          */
3553         ev = ISC_LIST_HEAD(res->items);
3554         while (ev != NULL) {
3555                 ISC_LIST_UNLINK(res->items, ev, ev_link);
3556                 if (ev->buffer.base != NULL)
3557                         free_buffer(disp, ev->buffer.base, ev->buffer.length);
3558                 free_devent(disp, ev);
3559                 ev = ISC_LIST_HEAD(res->items);
3560         }
3561         res->magic = 0;
3562         isc_mempool_put(disp->mgr->rpool, res);
3563         if (disp->shutting_down == 1)
3564                 do_cancel(disp);
3565         else
3566                 (void)startrecv(disp, NULL);
3567
3568         killit = destroy_disp_ok(disp);
3569         UNLOCK(&disp->lock);
3570         if (killit)
3571                 isc_task_send(disp->task[0], &disp->ctlevent);
3572 }
3573
3574 static void
3575 do_cancel(dns_dispatch_t *disp) {
3576         dns_dispatchevent_t *ev;
3577         dns_dispentry_t *resp;
3578         dns_qid_t *qid;
3579
3580         if (disp->shutdown_out == 1)
3581                 return;
3582
3583         qid = DNS_QID(disp);
3584
3585         /*
3586          * Search for the first response handler without packets outstanding
3587          * unless a specific hander is given.
3588          */
3589         LOCK(&qid->lock);
3590         for (resp = linear_first(qid);
3591              resp != NULL && resp->item_out;
3592              /* Empty. */)
3593                 resp = linear_next(qid, resp);
3594
3595         /*
3596          * No one to send the cancel event to, so nothing to do.
3597          */
3598         if (resp == NULL)
3599                 goto unlock;
3600
3601         /*
3602          * Send the shutdown failsafe event to this resp.
3603          */
3604         ev = disp->failsafe_ev;
3605         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3606                        resp->action, resp->arg, resp, NULL, NULL);
3607         ev->result = disp->shutdown_why;
3608         ev->buffer.base = NULL;
3609         ev->buffer.length = 0;
3610         disp->shutdown_out = 1;
3611         request_log(disp, resp, LVL(10),
3612                     "cancel: failsafe event %p -> task %p",
3613                     ev, resp->task);
3614         resp->item_out = ISC_TRUE;
3615         isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3616  unlock:
3617         UNLOCK(&qid->lock);
3618 }
3619
3620 isc_socket_t *
3621 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3622         REQUIRE(VALID_DISPATCH(disp));
3623
3624         return (disp->socket);
3625 }
3626
3627 isc_socket_t *
3628 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3629         REQUIRE(VALID_RESPONSE(resp));
3630
3631         if (resp->dispsocket != NULL)
3632                 return (resp->dispsocket->socket);
3633         else
3634                 return (NULL);
3635 }
3636
3637 isc_result_t
3638 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3639
3640         REQUIRE(VALID_DISPATCH(disp));
3641         REQUIRE(addrp != NULL);
3642
3643         if (disp->socktype == isc_sockettype_udp) {
3644                 *addrp = disp->local;
3645                 return (ISC_R_SUCCESS);
3646         }
3647         return (ISC_R_NOTIMPLEMENTED);
3648 }
3649
3650 void
3651 dns_dispatch_cancel(dns_dispatch_t *disp) {
3652         REQUIRE(VALID_DISPATCH(disp));
3653
3654         LOCK(&disp->lock);
3655
3656         if (disp->shutting_down == 1) {
3657                 UNLOCK(&disp->lock);
3658                 return;
3659         }
3660
3661         disp->shutdown_why = ISC_R_CANCELED;
3662         disp->shutting_down = 1;
3663         do_cancel(disp);
3664
3665         UNLOCK(&disp->lock);
3666
3667         return;
3668 }
3669
3670 unsigned int
3671 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3672         REQUIRE(VALID_DISPATCH(disp));
3673
3674         /*
3675          * We don't bother locking disp here; it's the caller's responsibility
3676          * to use only non volatile flags.
3677          */
3678         return (disp->attributes);
3679 }
3680
3681 void
3682 dns_dispatch_changeattributes(dns_dispatch_t *disp,
3683                               unsigned int attributes, unsigned int mask)
3684 {
3685         REQUIRE(VALID_DISPATCH(disp));
3686         /* Exclusive attribute can only be set on creation */
3687         REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3688         /* Also, a dispatch with randomport specified cannot start listening */
3689         REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3690                 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3691
3692         /* XXXMLG
3693          * Should check for valid attributes here!
3694          */
3695
3696         LOCK(&disp->lock);
3697
3698         if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3699                 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3700                     (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3701                         disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3702                         (void)startrecv(disp, NULL);
3703                 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3704                            == 0 &&
3705                            (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3706                         disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3707                         if (disp->recv_pending != 0)
3708                                 isc_socket_cancel(disp->socket, disp->task[0],
3709                                                   ISC_SOCKCANCEL_RECV);
3710                 }
3711         }
3712
3713         disp->attributes &= ~mask;
3714         disp->attributes |= (attributes & mask);
3715         UNLOCK(&disp->lock);
3716 }
3717
3718 void
3719 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3720         void *buf;
3721         isc_socketevent_t *sevent, *newsevent;
3722
3723         REQUIRE(VALID_DISPATCH(disp));
3724         REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3725         REQUIRE(event != NULL);
3726
3727         sevent = (isc_socketevent_t *)event;
3728
3729         INSIST(sevent->n <= disp->mgr->buffersize);
3730         newsevent = (isc_socketevent_t *)
3731                     isc_event_allocate(disp->mgr->mctx, NULL,
3732                                       DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3733                                       disp, sizeof(isc_socketevent_t));
3734         if (newsevent == NULL)
3735                 return;
3736
3737         buf = allocate_udp_buffer(disp);
3738         if (buf == NULL) {
3739                 isc_event_free(ISC_EVENT_PTR(&newsevent));
3740                 return;
3741         }
3742         memmove(buf, sevent->region.base, sevent->n);
3743         newsevent->region.base = buf;
3744         newsevent->region.length = disp->mgr->buffersize;
3745         newsevent->n = sevent->n;
3746         newsevent->result = sevent->result;
3747         newsevent->address = sevent->address;
3748         newsevent->timestamp = sevent->timestamp;
3749         newsevent->pktinfo = sevent->pktinfo;
3750         newsevent->attributes = sevent->attributes;
3751
3752         isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3753 }
3754
3755 dns_dispatch_t *
3756 dns_dispatchset_get(dns_dispatchset_t *dset) {
3757         dns_dispatch_t *disp;
3758
3759         /* check that dispatch set is configured */
3760         if (dset == NULL || dset->ndisp == 0)
3761                 return (NULL);
3762
3763         LOCK(&dset->lock);
3764         disp = dset->dispatches[dset->cur];
3765         dset->cur++;
3766         if (dset->cur == dset->ndisp)
3767                 dset->cur = 0;
3768         UNLOCK(&dset->lock);
3769
3770         return (disp);
3771 }
3772
3773 isc_result_t
3774 dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr,
3775                        isc_taskmgr_t *taskmgr, dns_dispatch_t *source,
3776                        dns_dispatchset_t **dsetp, int n)
3777 {
3778         isc_result_t result;
3779         dns_dispatchset_t *dset;
3780         dns_dispatchmgr_t *mgr;
3781         int i, j;
3782
3783         REQUIRE(VALID_DISPATCH(source));
3784         REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0);
3785         REQUIRE(dsetp != NULL && *dsetp == NULL);
3786
3787         mgr = source->mgr;
3788
3789         dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t));
3790         if (dset == NULL)
3791                 return (ISC_R_NOMEMORY);
3792         memset(dset, 0, sizeof(*dset));
3793
3794         result = isc_mutex_init(&dset->lock);
3795         if (result != ISC_R_SUCCESS)
3796                 goto fail_alloc;
3797
3798         dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n);
3799         if (dset == NULL) {
3800                 result = ISC_R_NOMEMORY;
3801                 goto fail_lock;
3802         }
3803
3804         isc_mem_attach(mctx, &dset->mctx);
3805         dset->ndisp = n;
3806         dset->cur = 0;
3807
3808         dset->dispatches[0] = NULL;
3809         dns_dispatch_attach(source, &dset->dispatches[0]);
3810
3811         LOCK(&mgr->lock);
3812         for (i = 1; i < n; i++) {
3813                 dset->dispatches[i] = NULL;
3814                 result = dispatch_createudp(mgr, sockmgr, taskmgr,
3815                                             &source->local,
3816                                             source->maxrequests,
3817                                             source->attributes,
3818                                             &dset->dispatches[i],
3819                                             source->socket);
3820                 if (result != ISC_R_SUCCESS)
3821                         goto fail;
3822         }
3823
3824         UNLOCK(&mgr->lock);
3825         *dsetp = dset;
3826
3827         return (ISC_R_SUCCESS);
3828
3829  fail:
3830         UNLOCK(&mgr->lock);
3831
3832         for (j = 0; j < i; j++)
3833                 dns_dispatch_detach(&(dset->dispatches[j]));
3834         isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n);
3835         if (dset->mctx == mctx)
3836                 isc_mem_detach(&dset->mctx);
3837
3838  fail_lock:
3839         DESTROYLOCK(&dset->lock);
3840
3841  fail_alloc:
3842         isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t));
3843         return (result);
3844 }
3845
3846 void
3847 dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) {
3848         int i;
3849
3850         REQUIRE(dset != NULL);
3851
3852         for (i = 0; i < dset->ndisp; i++) {
3853                 isc_socket_t *sock;
3854                 sock = dns_dispatch_getsocket(dset->dispatches[i]);
3855                 isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL);
3856         }
3857 }
3858
3859 void
3860 dns_dispatchset_destroy(dns_dispatchset_t **dsetp) {
3861         dns_dispatchset_t *dset;
3862         int i;
3863
3864         REQUIRE(dsetp != NULL && *dsetp != NULL);
3865
3866         dset = *dsetp;
3867         for (i = 0; i < dset->ndisp; i++)
3868                 dns_dispatch_detach(&(dset->dispatches[i]));
3869         isc_mem_put(dset->mctx, dset->dispatches,
3870                     sizeof(dns_dispatch_t *) * dset->ndisp);
3871         DESTROYLOCK(&dset->lock);
3872         isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t));
3873
3874         *dsetp = NULL;
3875 }
3876
3877 #if 0
3878 void
3879 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3880         dns_dispatch_t *disp;
3881         char foo[1024];
3882
3883         disp = ISC_LIST_HEAD(mgr->list);
3884         while (disp != NULL) {
3885                 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3886                 printf("\tdispatch %p, addr %s\n", disp, foo);
3887                 disp = ISC_LIST_NEXT(disp, link);
3888         }
3889 }
3890 #endif