]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - contrib/bind9/lib/dns/dispatch.c
MFC r254651:
[FreeBSD/stable/9.git] / contrib / bind9 / lib / dns / dispatch.c
1 /*
2  * Copyright (C) 2004-2009, 2011-2013  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: dispatch.c,v 1.175 2011/11/29 01:03:47 marka Exp $ */
19
20 /*! \file */
21
22 #include <config.h>
23
24 #include <stdlib.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 #include <stdlib.h>
28
29 #include <isc/entropy.h>
30 #include <isc/mem.h>
31 #include <isc/mutex.h>
32 #include <isc/portset.h>
33 #include <isc/print.h>
34 #include <isc/random.h>
35 #include <isc/socket.h>
36 #include <isc/stats.h>
37 #include <isc/string.h>
38 #include <isc/task.h>
39 #include <isc/time.h>
40 #include <isc/util.h>
41
42 #include <dns/acl.h>
43 #include <dns/dispatch.h>
44 #include <dns/events.h>
45 #include <dns/log.h>
46 #include <dns/message.h>
47 #include <dns/portlist.h>
48 #include <dns/stats.h>
49 #include <dns/tcpmsg.h>
50 #include <dns/types.h>
51
52 typedef ISC_LIST(dns_dispentry_t)       dns_displist_t;
53
54 typedef struct dispsocket               dispsocket_t;
55 typedef ISC_LIST(dispsocket_t)          dispsocketlist_t;
56
57 typedef struct dispportentry            dispportentry_t;
58 typedef ISC_LIST(dispportentry_t)       dispportlist_t;
59
60 /* ARC4 Random generator state */
61 typedef struct arc4ctx {
62         isc_uint8_t     i;
63         isc_uint8_t     j;
64         isc_uint8_t     s[256];
65         int             count;
66         isc_entropy_t   *entropy;       /*%< entropy source for ARC4 */
67         isc_mutex_t     *lock;
68 } arc4ctx_t;
69
70 typedef struct dns_qid {
71         unsigned int    magic;
72         unsigned int    qid_nbuckets;   /*%< hash table size */
73         unsigned int    qid_increment;  /*%< id increment on collision */
74         isc_mutex_t     lock;
75         dns_displist_t  *qid_table;     /*%< the table itself */
76         dispsocketlist_t *sock_table;   /*%< socket table */
77 } dns_qid_t;
78
79 struct dns_dispatchmgr {
80         /* Unlocked. */
81         unsigned int                    magic;
82         isc_mem_t                      *mctx;
83         dns_acl_t                      *blackhole;
84         dns_portlist_t                 *portlist;
85         isc_stats_t                    *stats;
86         isc_entropy_t                  *entropy; /*%< entropy source */
87
88         /* Locked by "lock". */
89         isc_mutex_t                     lock;
90         unsigned int                    state;
91         ISC_LIST(dns_dispatch_t)        list;
92
93         /* Locked by arc4_lock. */
94         isc_mutex_t                     arc4_lock;
95         arc4ctx_t                       arc4ctx;    /*%< ARC4 context for QID */
96
97         /* locked by buffer lock */
98         dns_qid_t                       *qid;
99         isc_mutex_t                     buffer_lock;
100         unsigned int                    buffers;    /*%< allocated buffers */
101         unsigned int                    buffersize; /*%< size of each buffer */
102         unsigned int                    maxbuffers; /*%< max buffers */
103
104         /* Locked internally. */
105         isc_mutex_t                     depool_lock;
106         isc_mempool_t                  *depool; /*%< pool for dispatch events */
107         isc_mutex_t                     rpool_lock;
108         isc_mempool_t                  *rpool;  /*%< pool for replies */
109         isc_mutex_t                     dpool_lock;
110         isc_mempool_t                  *dpool;  /*%< dispatch allocations */
111         isc_mutex_t                     bpool_lock;
112         isc_mempool_t                  *bpool;  /*%< pool for buffers */
113         isc_mutex_t                     spool_lock;
114         isc_mempool_t                  *spool;  /*%< pool for dispsocks */
115
116         /*%
117          * Locked by qid->lock if qid exists; otherwise, can be used without
118          * being locked.
119          * Memory footprint considerations: this is a simple implementation of
120          * available ports, i.e., an ordered array of the actual port numbers.
121          * This will require about 256KB of memory in the worst case (128KB for
122          * each of IPv4 and IPv6).  We could reduce it by representing it as a
123          * more sophisticated way such as a list (or array) of ranges that are
124          * searched to identify a specific port.  Our decision here is the saved
125          * memory isn't worth the implementation complexity, considering the
126          * fact that the whole BIND9 process (which is mainly named) already
127          * requires a pretty large memory footprint.  We may, however, have to
128          * revisit the decision when we want to use it as a separate module for
129          * an environment where memory requirement is severer.
130          */
131         in_port_t       *v4ports;       /*%< available ports for IPv4 */
132         unsigned int    nv4ports;       /*%< # of available ports for IPv4 */
133         in_port_t       *v6ports;       /*%< available ports for IPv4 */
134         unsigned int    nv6ports;       /*%< # of available ports for IPv4 */
135 };
136
137 #define MGR_SHUTTINGDOWN                0x00000001U
138 #define MGR_IS_SHUTTINGDOWN(l)  (((l)->state & MGR_SHUTTINGDOWN) != 0)
139
140 #define IS_PRIVATE(d)   (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
141
142 struct dns_dispentry {
143         unsigned int                    magic;
144         dns_dispatch_t                 *disp;
145         dns_messageid_t                 id;
146         in_port_t                       port;
147         unsigned int                    bucket;
148         isc_sockaddr_t                  host;
149         isc_task_t                     *task;
150         isc_taskaction_t                action;
151         void                           *arg;
152         isc_boolean_t                   item_out;
153         dispsocket_t                    *dispsocket;
154         ISC_LIST(dns_dispatchevent_t)   items;
155         ISC_LINK(dns_dispentry_t)       link;
156 };
157
158 /*%
159  * Maximum number of dispatch sockets that can be pooled for reuse.  The
160  * appropriate value may vary, but experiments have shown a busy caching server
161  * may need more than 1000 sockets concurrently opened.  The maximum allowable
162  * number of dispatch sockets (per manager) will be set to the double of this
163  * value.
164  */
165 #ifndef DNS_DISPATCH_POOLSOCKS
166 #define DNS_DISPATCH_POOLSOCKS                  2048
167 #endif
168
169 /*%
170  * Quota to control the number of dispatch sockets.  If a dispatch has more
171  * than the quota of sockets, new queries will purge oldest ones, so that
172  * a massive number of outstanding queries won't prevent subsequent queries
173  * (especially if the older ones take longer time and result in timeout).
174  */
175 #ifndef DNS_DISPATCH_SOCKSQUOTA
176 #define DNS_DISPATCH_SOCKSQUOTA                 3072
177 #endif
178
179 struct dispsocket {
180         unsigned int                    magic;
181         isc_socket_t                    *socket;
182         dns_dispatch_t                  *disp;
183         isc_sockaddr_t                  host;
184         in_port_t                       localport; /* XXX: should be removed later */
185         dispportentry_t                 *portentry;
186         dns_dispentry_t                 *resp;
187         isc_task_t                      *task;
188         ISC_LINK(dispsocket_t)          link;
189         unsigned int                    bucket;
190         ISC_LINK(dispsocket_t)          blink;
191 };
192
193 /*%
194  * A port table entry.  We remember every port we first open in a table with a
195  * reference counter so that we can 'reuse' the same port (with different
196  * destination addresses) using the SO_REUSEADDR socket option.
197  */
198 struct dispportentry {
199         in_port_t                       port;
200         unsigned int                    refs;
201         ISC_LINK(struct dispportentry)  link;
202 };
203
204 #ifndef DNS_DISPATCH_PORTTABLESIZE
205 #define DNS_DISPATCH_PORTTABLESIZE      1024
206 #endif
207
208 #define INVALID_BUCKET          (0xffffdead)
209
210 /*%
211  * Number of tasks for each dispatch that use separate sockets for different
212  * transactions.  This must be a power of 2 as it will divide 32 bit numbers
213  * to get an uniformly random tasks selection.  See get_dispsocket().
214  */
215 #define MAX_INTERNAL_TASKS      64
216
217 struct dns_dispatch {
218         /* Unlocked. */
219         unsigned int            magic;          /*%< magic */
220         dns_dispatchmgr_t      *mgr;            /*%< dispatch manager */
221         int                     ntasks;
222         /*%
223          * internal task buckets.  We use multiple tasks to distribute various
224          * socket events well when using separate dispatch sockets.  We use the
225          * 1st task (task[0]) for internal control events.
226          */
227         isc_task_t             *task[MAX_INTERNAL_TASKS];
228         isc_socket_t           *socket;         /*%< isc socket attached to */
229         isc_sockaddr_t          local;          /*%< local address */
230         in_port_t               localport;      /*%< local UDP port */
231         unsigned int            maxrequests;    /*%< max requests */
232         isc_event_t            *ctlevent;
233
234         isc_mutex_t             sepool_lock;
235         isc_mempool_t          *sepool;         /*%< pool for socket events */
236
237         /*% Locked by mgr->lock. */
238         ISC_LINK(dns_dispatch_t) link;
239
240         /* Locked by "lock". */
241         isc_mutex_t             lock;           /*%< locks all below */
242         isc_sockettype_t        socktype;
243         unsigned int            attributes;
244         unsigned int            refcount;       /*%< number of users */
245         dns_dispatchevent_t    *failsafe_ev;    /*%< failsafe cancel event */
246         unsigned int            shutting_down : 1,
247                                 shutdown_out : 1,
248                                 connected : 1,
249                                 tcpmsg_valid : 1,
250                                 recv_pending : 1; /*%< is a recv() pending? */
251         isc_result_t            shutdown_why;
252         ISC_LIST(dispsocket_t)  activesockets;
253         ISC_LIST(dispsocket_t)  inactivesockets;
254         unsigned int            nsockets;
255         unsigned int            requests;       /*%< how many requests we have */
256         unsigned int            tcpbuffers;     /*%< allocated buffers */
257         dns_tcpmsg_t            tcpmsg;         /*%< for tcp streams */
258         dns_qid_t               *qid;
259         arc4ctx_t               arc4ctx;        /*%< for QID/UDP port num */
260         dispportlist_t          *port_table;    /*%< hold ports 'owned' by us */
261         isc_mempool_t           *portpool;      /*%< port table entries  */
262 };
263
264 #define QID_MAGIC               ISC_MAGIC('Q', 'i', 'd', ' ')
265 #define VALID_QID(e)            ISC_MAGIC_VALID((e), QID_MAGIC)
266
267 #define RESPONSE_MAGIC          ISC_MAGIC('D', 'r', 's', 'p')
268 #define VALID_RESPONSE(e)       ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
269
270 #define DISPSOCK_MAGIC          ISC_MAGIC('D', 's', 'o', 'c')
271 #define VALID_DISPSOCK(e)       ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
272
273 #define DISPATCH_MAGIC          ISC_MAGIC('D', 'i', 's', 'p')
274 #define VALID_DISPATCH(e)       ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
275
276 #define DNS_DISPATCHMGR_MAGIC   ISC_MAGIC('D', 'M', 'g', 'r')
277 #define VALID_DISPATCHMGR(e)    ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
278
279 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
280                        (disp)->qid : (disp)->mgr->qid
281 #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
282                         (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
283
284 /*%
285  * Locking a query port buffer is a bit tricky.  We access the buffer without
286  * locking until qid is created.  Technically, there is a possibility of race
287  * between the creation of qid and access to the port buffer; in practice,
288  * however, this should be safe because qid isn't created until the first
289  * dispatch is created and there should be no contending situation until then.
290  */
291 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
292 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
293
294 /*
295  * Statics.
296  */
297 static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
298                                      dns_messageid_t, in_port_t, unsigned int);
299 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
300 static void destroy_disp(isc_task_t *task, isc_event_t *event);
301 static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
302 static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
303 static void udp_exrecv(isc_task_t *, isc_event_t *);
304 static void udp_shrecv(isc_task_t *, isc_event_t *);
305 static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
306 static void tcp_recv(isc_task_t *, isc_event_t *);
307 static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
308 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
309                              in_port_t);
310 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
311 static void *allocate_udp_buffer(dns_dispatch_t *disp);
312 static inline void free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
313 static inline dns_dispatchevent_t *allocate_devent(dns_dispatch_t *disp);
314 static void do_cancel(dns_dispatch_t *disp);
315 static dns_dispentry_t *linear_first(dns_qid_t *disp);
316 static dns_dispentry_t *linear_next(dns_qid_t *disp,
317                                     dns_dispentry_t *resp);
318 static void dispatch_free(dns_dispatch_t **dispp);
319 static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
320                                   dns_dispatch_t *disp,
321                                   isc_socketmgr_t *sockmgr,
322                                   isc_sockaddr_t *localaddr,
323                                   isc_socket_t **sockp,
324                                   isc_socket_t *dup_socket);
325 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
326                                        isc_socketmgr_t *sockmgr,
327                                        isc_taskmgr_t *taskmgr,
328                                        isc_sockaddr_t *localaddr,
329                                        unsigned int maxrequests,
330                                        unsigned int attributes,
331                                        dns_dispatch_t **dispp,
332                                        isc_socket_t *dup_socket);
333 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
334 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
335 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
336                                  unsigned int increment, dns_qid_t **qidp,
337                                  isc_boolean_t needaddrtable);
338 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
339 static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
340                                 unsigned int options, isc_socket_t **sockp,
341                                 isc_socket_t *dup_socket);
342 static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
343                                    isc_sockaddr_t *sockaddrp);
344
345 #define LVL(x) ISC_LOG_DEBUG(x)
346
347 static void
348 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
349      ISC_FORMAT_PRINTF(3, 4);
350
351 static void
352 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
353         char msgbuf[2048];
354         va_list ap;
355
356         if (! isc_log_wouldlog(dns_lctx, level))
357                 return;
358
359         va_start(ap, fmt);
360         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
361         va_end(ap);
362
363         isc_log_write(dns_lctx,
364                       DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
365                       level, "dispatchmgr %p: %s", mgr, msgbuf);
366 }
367
368 static inline void
369 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
370         if (mgr->stats != NULL)
371                 isc_stats_increment(mgr->stats, counter);
372 }
373
374 static void
375 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
376      ISC_FORMAT_PRINTF(3, 4);
377
378 static void
379 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
380         char msgbuf[2048];
381         va_list ap;
382
383         if (! isc_log_wouldlog(dns_lctx, level))
384                 return;
385
386         va_start(ap, fmt);
387         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
388         va_end(ap);
389
390         isc_log_write(dns_lctx,
391                       DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
392                       level, "dispatch %p: %s", disp, msgbuf);
393 }
394
395 static void
396 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
397             int level, const char *fmt, ...)
398      ISC_FORMAT_PRINTF(4, 5);
399
400 static void
401 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
402             int level, const char *fmt, ...)
403 {
404         char msgbuf[2048];
405         char peerbuf[256];
406         va_list ap;
407
408         if (! isc_log_wouldlog(dns_lctx, level))
409                 return;
410
411         va_start(ap, fmt);
412         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
413         va_end(ap);
414
415         if (VALID_RESPONSE(resp)) {
416                 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
417                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
418                               DNS_LOGMODULE_DISPATCH, level,
419                               "dispatch %p response %p %s: %s", disp, resp,
420                               peerbuf, msgbuf);
421         } else {
422                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
423                               DNS_LOGMODULE_DISPATCH, level,
424                               "dispatch %p req/resp %p: %s", disp, resp,
425                               msgbuf);
426         }
427 }
428
429 /*%
430  * ARC4 random number generator derived from OpenBSD.
431  * Only dispatch_random() and dispatch_uniformrandom() are expected
432  * to be called from general dispatch routines; the rest of them are subroutines
433  * for these two.
434  *
435  * The original copyright follows:
436  * Copyright (c) 1996, David Mazieres <dm@uun.org>
437  * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
438  *
439  * Permission to use, copy, modify, and distribute this software for any
440  * purpose with or without fee is hereby granted, provided that the above
441  * copyright notice and this permission notice appear in all copies.
442  *
443  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
444  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
445  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
446  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
447  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
448  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
449  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
450  */
451 #ifdef BIND9
452 static void
453 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
454                     isc_mutex_t *lock)
455 {
456         int n;
457         for (n = 0; n < 256; n++)
458                 actx->s[n] = n;
459         actx->i = 0;
460         actx->j = 0;
461         actx->count = 0;
462         actx->entropy = entropy; /* don't have to attach */
463         actx->lock = lock;
464 }
465
466 static void
467 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
468         int n;
469         isc_uint8_t si;
470
471         actx->i--;
472         for (n = 0; n < 256; n++) {
473                 actx->i = (actx->i + 1);
474                 si = actx->s[actx->i];
475                 actx->j = (actx->j + si + dat[n % datlen]);
476                 actx->s[actx->i] = actx->s[actx->j];
477                 actx->s[actx->j] = si;
478         }
479         actx->j = actx->i;
480 }
481
482 static inline isc_uint8_t
483 dispatch_arc4get8(arc4ctx_t *actx) {
484         isc_uint8_t si, sj;
485
486         actx->i = (actx->i + 1);
487         si = actx->s[actx->i];
488         actx->j = (actx->j + si);
489         sj = actx->s[actx->j];
490         actx->s[actx->i] = sj;
491         actx->s[actx->j] = si;
492
493         return (actx->s[(si + sj) & 0xff]);
494 }
495
496 static inline isc_uint16_t
497 dispatch_arc4get16(arc4ctx_t *actx) {
498         isc_uint16_t val;
499
500         val = dispatch_arc4get8(actx) << 8;
501         val |= dispatch_arc4get8(actx);
502
503         return (val);
504 }
505
506 static void
507 dispatch_arc4stir(arc4ctx_t *actx) {
508         int i;
509         union {
510                 unsigned char rnd[128];
511                 isc_uint32_t rnd32[32];
512         } rnd;
513         isc_result_t result;
514
515         if (actx->entropy != NULL) {
516                 /*
517                  * We accept any quality of random data to avoid blocking.
518                  */
519                 result = isc_entropy_getdata(actx->entropy, rnd.rnd,
520                                              sizeof(rnd), NULL, 0);
521                 RUNTIME_CHECK(result == ISC_R_SUCCESS);
522         } else {
523                 for (i = 0; i < 32; i++)
524                         isc_random_get(&rnd.rnd32[i]);
525         }
526         dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
527
528         /*
529          * Discard early keystream, as per recommendations in:
530          * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
531          */
532         for (i = 0; i < 256; i++)
533                 (void)dispatch_arc4get8(actx);
534
535         /*
536          * Derived from OpenBSD's implementation.  The rationale is not clear,
537          * but should be conservative enough in safety, and reasonably large
538          * for efficiency.
539          */
540         actx->count = 1600000;
541 }
542
543 static isc_uint16_t
544 dispatch_random(arc4ctx_t *actx) {
545         isc_uint16_t result;
546
547         if (actx->lock != NULL)
548                 LOCK(actx->lock);
549
550         actx->count -= sizeof(isc_uint16_t);
551         if (actx->count <= 0)
552                 dispatch_arc4stir(actx);
553         result = dispatch_arc4get16(actx);
554
555         if (actx->lock != NULL)
556                 UNLOCK(actx->lock);
557
558         return (result);
559 }
560 #else
561 /*
562  * For general purpose library, we don't have to be too strict about the
563  * quality of random values.  Performance doesn't matter much, either.
564  * So we simply use the isc_random module to keep the library as small as
565  * possible.
566  */
567
568 static void
569 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
570                     isc_mutex_t *lock)
571 {
572         UNUSED(actx);
573         UNUSED(entropy);
574         UNUSED(lock);
575
576         return;
577 }
578
579 static isc_uint16_t
580 dispatch_random(arc4ctx_t *actx) {
581         isc_uint32_t r;
582
583         UNUSED(actx);
584
585         isc_random_get(&r);
586         return (r & 0xffff);
587 }
588 #endif  /* BIND9 */
589
590 static isc_uint16_t
591 dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
592         isc_uint16_t min, r;
593
594         if (upper_bound < 2)
595                 return (0);
596
597         /*
598          * Ensure the range of random numbers [min, 0xffff] be a multiple of
599          * upper_bound and contain at least a half of the 16 bit range.
600          */
601
602         if (upper_bound > 0x8000)
603                 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
604         else
605                 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
606
607         /*
608          * This could theoretically loop forever but each retry has
609          * p > 0.5 (worst case, usually far better) of selecting a
610          * number inside the range we need, so it should rarely need
611          * to re-roll.
612          */
613         for (;;) {
614                 r = dispatch_random(actx);
615                 if (r >= min)
616                         break;
617         }
618
619         return (r % upper_bound);
620 }
621
622 /*
623  * Return a hash of the destination and message id.
624  */
625 static isc_uint32_t
626 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
627          in_port_t port)
628 {
629         unsigned int ret;
630
631         ret = isc_sockaddr_hash(dest, ISC_TRUE);
632         ret ^= (id << 16) | port;
633         ret %= qid->qid_nbuckets;
634
635         INSIST(ret < qid->qid_nbuckets);
636
637         return (ret);
638 }
639
640 /*
641  * Find the first entry in 'qid'.  Returns NULL if there are no entries.
642  */
643 static dns_dispentry_t *
644 linear_first(dns_qid_t *qid) {
645         dns_dispentry_t *ret;
646         unsigned int bucket;
647
648         bucket = 0;
649
650         while (bucket < qid->qid_nbuckets) {
651                 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
652                 if (ret != NULL)
653                         return (ret);
654                 bucket++;
655         }
656
657         return (NULL);
658 }
659
660 /*
661  * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
662  * no more entries.
663  */
664 static dns_dispentry_t *
665 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
666         dns_dispentry_t *ret;
667         unsigned int bucket;
668
669         ret = ISC_LIST_NEXT(resp, link);
670         if (ret != NULL)
671                 return (ret);
672
673         bucket = resp->bucket;
674         bucket++;
675         while (bucket < qid->qid_nbuckets) {
676                 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
677                 if (ret != NULL)
678                         return (ret);
679                 bucket++;
680         }
681
682         return (NULL);
683 }
684
685 /*
686  * The dispatch must be locked.
687  */
688 static isc_boolean_t
689 destroy_disp_ok(dns_dispatch_t *disp)
690 {
691         if (disp->refcount != 0)
692                 return (ISC_FALSE);
693
694         if (disp->recv_pending != 0)
695                 return (ISC_FALSE);
696
697         if (!ISC_LIST_EMPTY(disp->activesockets))
698                 return (ISC_FALSE);
699
700         if (disp->shutting_down == 0)
701                 return (ISC_FALSE);
702
703         return (ISC_TRUE);
704 }
705
706 /*
707  * Called when refcount reaches 0 (and safe to destroy).
708  *
709  * The dispatcher must not be locked.
710  * The manager must be locked.
711  */
712 static void
713 destroy_disp(isc_task_t *task, isc_event_t *event) {
714         dns_dispatch_t *disp;
715         dns_dispatchmgr_t *mgr;
716         isc_boolean_t killmgr;
717         dispsocket_t *dispsocket;
718         int i;
719
720         INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
721
722         UNUSED(task);
723
724         disp = event->ev_arg;
725         mgr = disp->mgr;
726
727         LOCK(&mgr->lock);
728         ISC_LIST_UNLINK(mgr->list, disp, link);
729
730         dispatch_log(disp, LVL(90),
731                      "shutting down; detaching from sock %p, task %p",
732                      disp->socket, disp->task[0]); /* XXXX */
733
734         if (disp->sepool != NULL) {
735                 isc_mempool_destroy(&disp->sepool);
736                 (void)isc_mutex_destroy(&disp->sepool_lock);
737         }
738
739         if (disp->socket != NULL)
740                 isc_socket_detach(&disp->socket);
741         while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
742                 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
743                 destroy_dispsocket(disp, &dispsocket);
744         }
745         for (i = 0; i < disp->ntasks; i++)
746                 isc_task_detach(&disp->task[i]);
747         isc_event_free(&event);
748
749         dispatch_free(&disp);
750
751         killmgr = destroy_mgr_ok(mgr);
752         UNLOCK(&mgr->lock);
753         if (killmgr)
754                 destroy_mgr(&mgr);
755 }
756
757 /*%
758  * Manipulate port table per dispatch: find an entry for a given port number,
759  * create a new entry, and decrement a given entry with possible clean-up.
760  */
761 static dispportentry_t *
762 port_search(dns_dispatch_t *disp, in_port_t port) {
763         dispportentry_t *portentry;
764
765         REQUIRE(disp->port_table != NULL);
766
767         portentry = ISC_LIST_HEAD(disp->port_table[port %
768                                                    DNS_DISPATCH_PORTTABLESIZE]);
769         while (portentry != NULL) {
770                 if (portentry->port == port)
771                         return (portentry);
772                 portentry = ISC_LIST_NEXT(portentry, link);
773         }
774
775         return (NULL);
776 }
777
778 static dispportentry_t *
779 new_portentry(dns_dispatch_t *disp, in_port_t port) {
780         dispportentry_t *portentry;
781
782         REQUIRE(disp->port_table != NULL);
783
784         portentry = isc_mempool_get(disp->portpool);
785         if (portentry == NULL)
786                 return (portentry);
787
788         portentry->port = port;
789         portentry->refs = 0;
790         ISC_LINK_INIT(portentry, link);
791         ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
792                         portentry, link);
793
794         return (portentry);
795 }
796
797 /*%
798  * The caller must not hold the qid->lock.
799  */
800 static void
801 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
802         dispportentry_t *portentry = *portentryp;
803         isc_boolean_t unlink = ISC_FALSE;
804         dns_qid_t *qid;
805
806         REQUIRE(disp->port_table != NULL);
807         REQUIRE(portentry != NULL && portentry->refs > 0);
808
809         qid = DNS_QID(disp);
810         LOCK(&qid->lock);
811         portentry->refs--;
812         unlink = ISC_TF(portentry->refs == 0);
813         UNLOCK(&qid->lock);
814
815         if (unlink) {
816                 ISC_LIST_UNLINK(disp->port_table[portentry->port %
817                                                  DNS_DISPATCH_PORTTABLESIZE],
818                                 portentry, link);
819                 isc_mempool_put(disp->portpool, portentry);
820         }
821
822         *portentryp = NULL;
823 }
824
825 /*%
826  * Find a dispsocket for socket address 'dest', and port number 'port'.
827  * Return NULL if no such entry exists.
828  */
829 static dispsocket_t *
830 socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
831               unsigned int bucket)
832 {
833         dispsocket_t *dispsock;
834
835         REQUIRE(bucket < qid->qid_nbuckets);
836
837         dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
838
839         while (dispsock != NULL) {
840                 if (dispsock->portentry != NULL &&
841                     dispsock->portentry->port == port &&
842                     isc_sockaddr_equal(dest, &dispsock->host))
843                         return (dispsock);
844                 dispsock = ISC_LIST_NEXT(dispsock, blink);
845         }
846
847         return (NULL);
848 }
849
850 /*%
851  * Make a new socket for a single dispatch with a random port number.
852  * The caller must hold the disp->lock
853  */
854 static isc_result_t
855 get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
856                isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp,
857                in_port_t *portp)
858 {
859         int i;
860         isc_uint32_t r;
861         dns_dispatchmgr_t *mgr = disp->mgr;
862         isc_socket_t *sock = NULL;
863         isc_result_t result = ISC_R_FAILURE;
864         in_port_t port;
865         isc_sockaddr_t localaddr;
866         unsigned int bucket = 0;
867         dispsocket_t *dispsock;
868         unsigned int nports;
869         in_port_t *ports;
870         unsigned int bindoptions;
871         dispportentry_t *portentry = NULL;
872         dns_qid_t *qid;
873
874         if (isc_sockaddr_pf(&disp->local) == AF_INET) {
875                 nports = disp->mgr->nv4ports;
876                 ports = disp->mgr->v4ports;
877         } else {
878                 nports = disp->mgr->nv6ports;
879                 ports = disp->mgr->v6ports;
880         }
881         if (nports == 0)
882                 return (ISC_R_ADDRNOTAVAIL);
883
884         dispsock = ISC_LIST_HEAD(disp->inactivesockets);
885         if (dispsock != NULL) {
886                 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
887                 sock = dispsock->socket;
888                 dispsock->socket = NULL;
889         } else {
890                 dispsock = isc_mempool_get(mgr->spool);
891                 if (dispsock == NULL)
892                         return (ISC_R_NOMEMORY);
893
894                 disp->nsockets++;
895                 dispsock->socket = NULL;
896                 dispsock->disp = disp;
897                 dispsock->resp = NULL;
898                 dispsock->portentry = NULL;
899                 isc_random_get(&r);
900                 dispsock->task = NULL;
901                 isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
902                 ISC_LINK_INIT(dispsock, link);
903                 ISC_LINK_INIT(dispsock, blink);
904                 dispsock->magic = DISPSOCK_MAGIC;
905         }
906
907         /*
908          * Pick up a random UDP port and open a new socket with it.  Avoid
909          * choosing ports that share the same destination because it will be
910          * very likely to fail in bind(2) or connect(2).
911          */
912         localaddr = disp->local;
913         qid = DNS_QID(disp);
914
915         for (i = 0; i < 64; i++) {
916                 port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
917                                                         nports)];
918                 isc_sockaddr_setport(&localaddr, port);
919
920                 LOCK(&qid->lock);
921                 bucket = dns_hash(qid, dest, 0, port);
922                 if (socket_search(qid, dest, port, bucket) != NULL) {
923                         UNLOCK(&qid->lock);
924                         continue;
925                 }
926                 UNLOCK(&qid->lock);
927                 bindoptions = 0;
928                 portentry = port_search(disp, port);
929
930                 if (portentry != NULL)
931                         bindoptions |= ISC_SOCKET_REUSEADDRESS;
932                 result = open_socket(sockmgr, &localaddr, bindoptions, &sock,
933                                      NULL);
934                 if (result == ISC_R_SUCCESS) {
935                         if (portentry == NULL) {
936                                 portentry = new_portentry(disp, port);
937                                 if (portentry == NULL) {
938                                         result = ISC_R_NOMEMORY;
939                                         break;
940                                 }
941                         }
942                         portentry->refs++;
943                         break;
944                 } else if (result == ISC_R_NOPERM) {
945                         char buf[ISC_SOCKADDR_FORMATSIZE];
946                         isc_sockaddr_format(&localaddr, buf, sizeof(buf));
947                         dispatch_log(disp, ISC_LOG_WARNING,
948                                      "open_socket(%s) -> %s: continuing",
949                                      buf, isc_result_totext(result));
950                 } else if (result != ISC_R_ADDRINUSE)
951                         break;
952         }
953
954         if (result == ISC_R_SUCCESS) {
955                 dispsock->socket = sock;
956                 dispsock->host = *dest;
957                 dispsock->portentry = portentry;
958                 dispsock->bucket = bucket;
959                 LOCK(&qid->lock);
960                 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
961                 UNLOCK(&qid->lock);
962                 *dispsockp = dispsock;
963                 *portp = port;
964         } else {
965                 /*
966                  * We could keep it in the inactive list, but since this should
967                  * be an exceptional case and might be resource shortage, we'd
968                  * rather destroy it.
969                  */
970                 if (sock != NULL)
971                         isc_socket_detach(&sock);
972                 destroy_dispsocket(disp, &dispsock);
973         }
974
975         return (result);
976 }
977
978 /*%
979  * Destroy a dedicated dispatch socket.
980  */
981 static void
982 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
983         dispsocket_t *dispsock;
984         dns_qid_t *qid;
985
986         /*
987          * The dispatch must be locked.
988          */
989
990         REQUIRE(dispsockp != NULL && *dispsockp != NULL);
991         dispsock = *dispsockp;
992         REQUIRE(!ISC_LINK_LINKED(dispsock, link));
993
994         disp->nsockets--;
995         dispsock->magic = 0;
996         if (dispsock->portentry != NULL)
997                 deref_portentry(disp, &dispsock->portentry);
998         if (dispsock->socket != NULL)
999                 isc_socket_detach(&dispsock->socket);
1000         if (ISC_LINK_LINKED(dispsock, blink)) {
1001                 qid = DNS_QID(disp);
1002                 LOCK(&qid->lock);
1003                 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1004                                 blink);
1005                 UNLOCK(&qid->lock);
1006         }
1007         if (dispsock->task != NULL)
1008                 isc_task_detach(&dispsock->task);
1009         isc_mempool_put(disp->mgr->spool, dispsock);
1010
1011         *dispsockp = NULL;
1012 }
1013
1014 /*%
1015  * Deactivate a dedicated dispatch socket.  Move it to the inactive list for
1016  * future reuse unless the total number of sockets are exceeding the maximum.
1017  */
1018 static void
1019 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1020         isc_result_t result;
1021         dns_qid_t *qid;
1022
1023         /*
1024          * The dispatch must be locked.
1025          */
1026         ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
1027         if (dispsock->resp != NULL) {
1028                 INSIST(dispsock->resp->dispsocket == dispsock);
1029                 dispsock->resp->dispsocket = NULL;
1030         }
1031
1032         INSIST(dispsock->portentry != NULL);
1033         deref_portentry(disp, &dispsock->portentry);
1034
1035 #ifdef BIND9
1036         if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
1037                 destroy_dispsocket(disp, &dispsock);
1038         else {
1039                 result = isc_socket_close(dispsock->socket);
1040
1041                 qid = DNS_QID(disp);
1042                 LOCK(&qid->lock);
1043                 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1044                                 blink);
1045                 UNLOCK(&qid->lock);
1046
1047                 if (result == ISC_R_SUCCESS)
1048                         ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
1049                 else {
1050                         /*
1051                          * If the underlying system does not allow this
1052                          * optimization, destroy this temporary structure (and
1053                          * create a new one for a new transaction).
1054                          */
1055                         INSIST(result == ISC_R_NOTIMPLEMENTED);
1056                         destroy_dispsocket(disp, &dispsock);
1057                 }
1058         }
1059 #else
1060         /* This kind of optimization isn't necessary for normal use */
1061         UNUSED(qid);
1062         UNUSED(result);
1063
1064         destroy_dispsocket(disp, &dispsock);
1065 #endif
1066 }
1067
1068 /*
1069  * Find an entry for query ID 'id', socket address 'dest', and port number
1070  * 'port'.
1071  * Return NULL if no such entry exists.
1072  */
1073 static dns_dispentry_t *
1074 entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
1075              in_port_t port, unsigned int bucket)
1076 {
1077         dns_dispentry_t *res;
1078
1079         REQUIRE(bucket < qid->qid_nbuckets);
1080
1081         res = ISC_LIST_HEAD(qid->qid_table[bucket]);
1082
1083         while (res != NULL) {
1084                 if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1085                     res->port == port) {
1086                         return (res);
1087                 }
1088                 res = ISC_LIST_NEXT(res, link);
1089         }
1090
1091         return (NULL);
1092 }
1093
1094 static void
1095 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1096         isc_mempool_t *bpool;
1097         INSIST(buf != NULL && len != 0);
1098
1099
1100         switch (disp->socktype) {
1101         case isc_sockettype_tcp:
1102                 INSIST(disp->tcpbuffers > 0);
1103                 disp->tcpbuffers--;
1104                 isc_mem_put(disp->mgr->mctx, buf, len);
1105                 break;
1106         case isc_sockettype_udp:
1107                 LOCK(&disp->mgr->buffer_lock);
1108                 INSIST(disp->mgr->buffers > 0);
1109                 INSIST(len == disp->mgr->buffersize);
1110                 disp->mgr->buffers--;
1111                 bpool = disp->mgr->bpool;
1112                 UNLOCK(&disp->mgr->buffer_lock);
1113                 isc_mempool_put(bpool, buf);
1114                 break;
1115         default:
1116                 INSIST(0);
1117                 break;
1118         }
1119 }
1120
1121 static void *
1122 allocate_udp_buffer(dns_dispatch_t *disp) {
1123         isc_mempool_t *bpool;
1124         void *temp;
1125
1126         LOCK(&disp->mgr->buffer_lock);
1127         bpool = disp->mgr->bpool;
1128         disp->mgr->buffers++;
1129         UNLOCK(&disp->mgr->buffer_lock);
1130
1131         temp = isc_mempool_get(bpool);
1132
1133         if (temp == NULL) {
1134                 LOCK(&disp->mgr->buffer_lock);
1135                 disp->mgr->buffers--;
1136                 UNLOCK(&disp->mgr->buffer_lock);
1137         }
1138
1139         return (temp);
1140 }
1141
1142 static inline void
1143 free_sevent(isc_event_t *ev) {
1144         isc_mempool_t *pool = ev->ev_destroy_arg;
1145         isc_socketevent_t *sev = (isc_socketevent_t *) ev;
1146         isc_mempool_put(pool, sev);
1147 }
1148
1149 static inline isc_socketevent_t *
1150 allocate_sevent(dns_dispatch_t *disp, isc_socket_t *socket,
1151                 isc_eventtype_t type, isc_taskaction_t action, const void *arg)
1152 {
1153         isc_socketevent_t *ev;
1154         void *deconst_arg;
1155
1156         ev = isc_mempool_get(disp->sepool);
1157         if (ev == NULL)
1158                 return (NULL);
1159         DE_CONST(arg, deconst_arg);
1160         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type,
1161                        action, deconst_arg, socket,
1162                        free_sevent, disp->sepool);
1163         ev->result = ISC_R_UNSET;
1164         ISC_LINK_INIT(ev, ev_link);
1165         ISC_LIST_INIT(ev->bufferlist);
1166         ev->region.base = NULL;
1167         ev->n = 0;
1168         ev->offset = 0;
1169         ev->attributes = 0;
1170
1171         return (ev);
1172 }
1173
1174
1175 static inline void
1176 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1177         if (disp->failsafe_ev == ev) {
1178                 INSIST(disp->shutdown_out == 1);
1179                 disp->shutdown_out = 0;
1180
1181                 return;
1182         }
1183
1184         isc_mempool_put(disp->mgr->depool, ev);
1185 }
1186
1187 static inline dns_dispatchevent_t *
1188 allocate_devent(dns_dispatch_t *disp) {
1189         dns_dispatchevent_t *ev;
1190
1191         ev = isc_mempool_get(disp->mgr->depool);
1192         if (ev == NULL)
1193                 return (NULL);
1194         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1195                        NULL, NULL, NULL, NULL, NULL);
1196
1197         return (ev);
1198 }
1199
1200 static void
1201 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1202         dispsocket_t *dispsock = ev->ev_arg;
1203
1204         UNUSED(task);
1205
1206         REQUIRE(VALID_DISPSOCK(dispsock));
1207         udp_recv(ev, dispsock->disp, dispsock);
1208 }
1209
1210 static void
1211 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1212         dns_dispatch_t *disp = ev->ev_arg;
1213
1214         UNUSED(task);
1215
1216         REQUIRE(VALID_DISPATCH(disp));
1217         udp_recv(ev, disp, NULL);
1218 }
1219
1220 /*
1221  * General flow:
1222  *
1223  * If I/O result == CANCELED or error, free the buffer.
1224  *
1225  * If query, free the buffer, restart.
1226  *
1227  * If response:
1228  *      Allocate event, fill in details.
1229  *              If cannot allocate, free buffer, restart.
1230  *      find target.  If not found, free buffer, restart.
1231  *      if event queue is not empty, queue.  else, send.
1232  *      restart.
1233  */
1234 static void
1235 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1236         isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1237         dns_messageid_t id;
1238         isc_result_t dres;
1239         isc_buffer_t source;
1240         unsigned int flags;
1241         dns_dispentry_t *resp = NULL;
1242         dns_dispatchevent_t *rev;
1243         unsigned int bucket;
1244         isc_boolean_t killit;
1245         isc_boolean_t queue_response;
1246         dns_dispatchmgr_t *mgr;
1247         dns_qid_t *qid;
1248         isc_netaddr_t netaddr;
1249         int match;
1250         int result;
1251         isc_boolean_t qidlocked = ISC_FALSE;
1252
1253         LOCK(&disp->lock);
1254
1255         mgr = disp->mgr;
1256         qid = mgr->qid;
1257
1258         dispatch_log(disp, LVL(90),
1259                      "got packet: requests %d, buffers %d, recvs %d",
1260                      disp->requests, disp->mgr->buffers, disp->recv_pending);
1261
1262         if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1263                 /*
1264                  * Unless the receive event was imported from a listening
1265                  * interface, in which case the event type is
1266                  * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1267                  */
1268                 INSIST(disp->recv_pending != 0);
1269                 disp->recv_pending = 0;
1270         }
1271
1272         if (dispsock != NULL &&
1273             (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1274                 /*
1275                  * dispsock->resp can be NULL if this transaction was canceled
1276                  * just after receiving a response.  Since this socket is
1277                  * exclusively used and there should be at most one receive
1278                  * event the canceled event should have been no effect.  So
1279                  * we can (and should) deactivate the socket right now.
1280                  */
1281                 deactivate_dispsocket(disp, dispsock);
1282                 dispsock = NULL;
1283         }
1284
1285         if (disp->shutting_down) {
1286                 /*
1287                  * This dispatcher is shutting down.
1288                  */
1289                 free_buffer(disp, ev->region.base, ev->region.length);
1290
1291                 isc_event_free(&ev_in);
1292                 ev = NULL;
1293
1294                 killit = destroy_disp_ok(disp);
1295                 UNLOCK(&disp->lock);
1296                 if (killit)
1297                         isc_task_send(disp->task[0], &disp->ctlevent);
1298
1299                 return;
1300         }
1301
1302         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1303                 if (dispsock != NULL) {
1304                         resp = dispsock->resp;
1305                         id = resp->id;
1306                         if (ev->result != ISC_R_SUCCESS) {
1307                                 /*
1308                                  * This is most likely a network error on a
1309                                  * connected socket.  It makes no sense to
1310                                  * check the address or parse the packet, but it
1311                                  * will help to return the error to the caller.
1312                                  */
1313                                 goto sendresponse;
1314                         }
1315                 } else {
1316                         free_buffer(disp, ev->region.base, ev->region.length);
1317
1318                         UNLOCK(&disp->lock);
1319                         isc_event_free(&ev_in);
1320                         return;
1321                 }
1322         } else if (ev->result != ISC_R_SUCCESS) {
1323                 free_buffer(disp, ev->region.base, ev->region.length);
1324
1325                 if (ev->result != ISC_R_CANCELED)
1326                         dispatch_log(disp, ISC_LOG_ERROR,
1327                                      "odd socket result in udp_recv(): %s",
1328                                      isc_result_totext(ev->result));
1329
1330                 UNLOCK(&disp->lock);
1331                 isc_event_free(&ev_in);
1332                 return;
1333         }
1334
1335         /*
1336          * If this is from a blackholed address, drop it.
1337          */
1338         isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1339         if (disp->mgr->blackhole != NULL &&
1340             dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1341                           NULL, &match, NULL) == ISC_R_SUCCESS &&
1342             match > 0)
1343         {
1344                 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1345                         char netaddrstr[ISC_NETADDR_FORMATSIZE];
1346                         isc_netaddr_format(&netaddr, netaddrstr,
1347                                            sizeof(netaddrstr));
1348                         dispatch_log(disp, LVL(10),
1349                                      "blackholed packet from %s",
1350                                      netaddrstr);
1351                 }
1352                 free_buffer(disp, ev->region.base, ev->region.length);
1353                 goto restart;
1354         }
1355
1356         /*
1357          * Peek into the buffer to see what we can see.
1358          */
1359         isc_buffer_init(&source, ev->region.base, ev->region.length);
1360         isc_buffer_add(&source, ev->n);
1361         dres = dns_message_peekheader(&source, &id, &flags);
1362         if (dres != ISC_R_SUCCESS) {
1363                 free_buffer(disp, ev->region.base, ev->region.length);
1364                 dispatch_log(disp, LVL(10), "got garbage packet");
1365                 goto restart;
1366         }
1367
1368         dispatch_log(disp, LVL(92),
1369                      "got valid DNS message header, /QR %c, id %u",
1370                      ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1371
1372         /*
1373          * Look at flags.  If query, drop it. If response,
1374          * look to see where it goes.
1375          */
1376         if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1377                 /* query */
1378                 free_buffer(disp, ev->region.base, ev->region.length);
1379                 goto restart;
1380         }
1381
1382         /*
1383          * Search for the corresponding response.  If we are using an exclusive
1384          * socket, we've already identified it and we can skip the search; but
1385          * the ID and the address must match the expected ones.
1386          */
1387         if (resp == NULL) {
1388                 bucket = dns_hash(qid, &ev->address, id, disp->localport);
1389                 LOCK(&qid->lock);
1390                 qidlocked = ISC_TRUE;
1391                 resp = entry_search(qid, &ev->address, id, disp->localport,
1392                                     bucket);
1393                 dispatch_log(disp, LVL(90),
1394                              "search for response in bucket %d: %s",
1395                              bucket, (resp == NULL ? "not found" : "found"));
1396
1397                 if (resp == NULL) {
1398                         inc_stats(mgr, dns_resstatscounter_mismatch);
1399                         free_buffer(disp, ev->region.base, ev->region.length);
1400                         goto unlock;
1401                 }
1402         } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1403                                                          &resp->host)) {
1404                 dispatch_log(disp, LVL(90),
1405                              "response to an exclusive socket doesn't match");
1406                 inc_stats(mgr, dns_resstatscounter_mismatch);
1407                 free_buffer(disp, ev->region.base, ev->region.length);
1408                 goto unlock;
1409         }
1410
1411         /*
1412          * Now that we have the original dispatch the query was sent
1413          * from check that the address and port the response was
1414          * sent to make sense.
1415          */
1416         if (disp != resp->disp) {
1417                 isc_sockaddr_t a1;
1418                 isc_sockaddr_t a2;
1419
1420                 /*
1421                  * Check that the socket types and ports match.
1422                  */
1423                 if (disp->socktype != resp->disp->socktype ||
1424                     isc_sockaddr_getport(&disp->local) !=
1425                     isc_sockaddr_getport(&resp->disp->local)) {
1426                         free_buffer(disp, ev->region.base, ev->region.length);
1427                         goto unlock;
1428                 }
1429
1430                 /*
1431                  * If both dispatches are bound to an address then fail as
1432                  * the addresses can't be equal (enforced by the IP stack).
1433                  *
1434                  * Note under Linux a packet can be sent out via IPv4 socket
1435                  * and the response be received via a IPv6 socket.
1436                  *
1437                  * Requests sent out via IPv6 should always come back in
1438                  * via IPv6.
1439                  */
1440                 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1441                     isc_sockaddr_pf(&disp->local) != PF_INET6) {
1442                         free_buffer(disp, ev->region.base, ev->region.length);
1443                         goto unlock;
1444                 }
1445                 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1446                 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1447                 if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1448                     !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1449                         free_buffer(disp, ev->region.base, ev->region.length);
1450                         goto unlock;
1451                 }
1452         }
1453
1454   sendresponse:
1455         queue_response = resp->item_out;
1456         rev = allocate_devent(resp->disp);
1457         if (rev == NULL) {
1458                 free_buffer(disp, ev->region.base, ev->region.length);
1459                 goto unlock;
1460         }
1461
1462         /*
1463          * At this point, rev contains the event we want to fill in, and
1464          * resp contains the information on the place to send it to.
1465          * Send the event off.
1466          */
1467         isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1468         isc_buffer_add(&rev->buffer, ev->n);
1469         rev->result = ev->result;
1470         rev->id = id;
1471         rev->addr = ev->address;
1472         rev->pktinfo = ev->pktinfo;
1473         rev->attributes = ev->attributes;
1474         if (queue_response) {
1475                 ISC_LIST_APPEND(resp->items, rev, ev_link);
1476         } else {
1477                 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1478                                DNS_EVENT_DISPATCH,
1479                                resp->action, resp->arg, resp, NULL, NULL);
1480                 request_log(disp, resp, LVL(90),
1481                             "[a] Sent event %p buffer %p len %d to task %p",
1482                             rev, rev->buffer.base, rev->buffer.length,
1483                             resp->task);
1484                 resp->item_out = ISC_TRUE;
1485                 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1486         }
1487  unlock:
1488         if (qidlocked)
1489                 UNLOCK(&qid->lock);
1490
1491         /*
1492          * Restart recv() to get the next packet.
1493          */
1494  restart:
1495         result = startrecv(disp, dispsock);
1496         if (result != ISC_R_SUCCESS && dispsock != NULL) {
1497                 /*
1498                  * XXX: wired. There seems to be no recovery process other than
1499                  * deactivate this socket anyway (since we cannot start
1500                  * receiving, we won't be able to receive a cancel event
1501                  * from the user).
1502                  */
1503                 deactivate_dispsocket(disp, dispsock);
1504         }
1505         UNLOCK(&disp->lock);
1506
1507         isc_event_free(&ev_in);
1508 }
1509
1510 /*
1511  * General flow:
1512  *
1513  * If I/O result == CANCELED, EOF, or error, notify everyone as the
1514  * various queues drain.
1515  *
1516  * If query, restart.
1517  *
1518  * If response:
1519  *      Allocate event, fill in details.
1520  *              If cannot allocate, restart.
1521  *      find target.  If not found, restart.
1522  *      if event queue is not empty, queue.  else, send.
1523  *      restart.
1524  */
1525 static void
1526 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1527         dns_dispatch_t *disp = ev_in->ev_arg;
1528         dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1529         dns_messageid_t id;
1530         isc_result_t dres;
1531         unsigned int flags;
1532         dns_dispentry_t *resp;
1533         dns_dispatchevent_t *rev;
1534         unsigned int bucket;
1535         isc_boolean_t killit;
1536         isc_boolean_t queue_response;
1537         dns_qid_t *qid;
1538         int level;
1539         char buf[ISC_SOCKADDR_FORMATSIZE];
1540
1541         UNUSED(task);
1542
1543         REQUIRE(VALID_DISPATCH(disp));
1544
1545         qid = disp->qid;
1546
1547         dispatch_log(disp, LVL(90),
1548                      "got TCP packet: requests %d, buffers %d, recvs %d",
1549                      disp->requests, disp->tcpbuffers, disp->recv_pending);
1550
1551         LOCK(&disp->lock);
1552
1553         INSIST(disp->recv_pending != 0);
1554         disp->recv_pending = 0;
1555
1556         if (disp->refcount == 0) {
1557                 /*
1558                  * This dispatcher is shutting down.  Force cancelation.
1559                  */
1560                 tcpmsg->result = ISC_R_CANCELED;
1561         }
1562
1563         if (tcpmsg->result != ISC_R_SUCCESS) {
1564                 switch (tcpmsg->result) {
1565                 case ISC_R_CANCELED:
1566                         break;
1567
1568                 case ISC_R_EOF:
1569                         dispatch_log(disp, LVL(90), "shutting down on EOF");
1570                         do_cancel(disp);
1571                         break;
1572
1573                 case ISC_R_CONNECTIONRESET:
1574                         level = ISC_LOG_INFO;
1575                         goto logit;
1576
1577                 default:
1578                         level = ISC_LOG_ERROR;
1579                 logit:
1580                         isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1581                         dispatch_log(disp, level, "shutting down due to TCP "
1582                                      "receive error: %s: %s", buf,
1583                                      isc_result_totext(tcpmsg->result));
1584                         do_cancel(disp);
1585                         break;
1586                 }
1587
1588                 /*
1589                  * The event is statically allocated in the tcpmsg
1590                  * structure, and destroy_disp() frees the tcpmsg, so we must
1591                  * free the event *before* calling destroy_disp().
1592                  */
1593                 isc_event_free(&ev_in);
1594
1595                 disp->shutting_down = 1;
1596                 disp->shutdown_why = tcpmsg->result;
1597
1598                 /*
1599                  * If the recv() was canceled pass the word on.
1600                  */
1601                 killit = destroy_disp_ok(disp);
1602                 UNLOCK(&disp->lock);
1603                 if (killit)
1604                         isc_task_send(disp->task[0], &disp->ctlevent);
1605                 return;
1606         }
1607
1608         dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1609                      tcpmsg->result,
1610                      tcpmsg->buffer.length, tcpmsg->buffer.base);
1611
1612         /*
1613          * Peek into the buffer to see what we can see.
1614          */
1615         dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1616         if (dres != ISC_R_SUCCESS) {
1617                 dispatch_log(disp, LVL(10), "got garbage packet");
1618                 goto restart;
1619         }
1620
1621         dispatch_log(disp, LVL(92),
1622                      "got valid DNS message header, /QR %c, id %u",
1623                      ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1624
1625         /*
1626          * Allocate an event to send to the query or response client, and
1627          * allocate a new buffer for our use.
1628          */
1629
1630         /*
1631          * Look at flags.  If query, drop it. If response,
1632          * look to see where it goes.
1633          */
1634         if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1635                 /*
1636                  * Query.
1637                  */
1638                 goto restart;
1639         }
1640
1641         /*
1642          * Response.
1643          */
1644         bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1645         LOCK(&qid->lock);
1646         resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1647         dispatch_log(disp, LVL(90),
1648                      "search for response in bucket %d: %s",
1649                      bucket, (resp == NULL ? "not found" : "found"));
1650
1651         if (resp == NULL)
1652                 goto unlock;
1653         queue_response = resp->item_out;
1654         rev = allocate_devent(disp);
1655         if (rev == NULL)
1656                 goto unlock;
1657
1658         /*
1659          * At this point, rev contains the event we want to fill in, and
1660          * resp contains the information on the place to send it to.
1661          * Send the event off.
1662          */
1663         dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1664         disp->tcpbuffers++;
1665         rev->result = ISC_R_SUCCESS;
1666         rev->id = id;
1667         rev->addr = tcpmsg->address;
1668         if (queue_response) {
1669                 ISC_LIST_APPEND(resp->items, rev, ev_link);
1670         } else {
1671                 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1672                                resp->action, resp->arg, resp, NULL, NULL);
1673                 request_log(disp, resp, LVL(90),
1674                             "[b] Sent event %p buffer %p len %d to task %p",
1675                             rev, rev->buffer.base, rev->buffer.length,
1676                             resp->task);
1677                 resp->item_out = ISC_TRUE;
1678                 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1679         }
1680  unlock:
1681         UNLOCK(&qid->lock);
1682
1683         /*
1684          * Restart recv() to get the next packet.
1685          */
1686  restart:
1687         (void)startrecv(disp, NULL);
1688
1689         UNLOCK(&disp->lock);
1690
1691         isc_event_free(&ev_in);
1692 }
1693
1694 /*
1695  * disp must be locked.
1696  */
1697 static isc_result_t
1698 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1699         isc_result_t res;
1700         isc_region_t region;
1701         isc_socket_t *socket;
1702
1703         if (disp->shutting_down == 1)
1704                 return (ISC_R_SUCCESS);
1705
1706         if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1707                 return (ISC_R_SUCCESS);
1708
1709         if (disp->recv_pending != 0 && dispsock == NULL)
1710                 return (ISC_R_SUCCESS);
1711
1712         if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1713                 return (ISC_R_NOMEMORY);
1714
1715         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1716             dispsock == NULL)
1717                 return (ISC_R_SUCCESS);
1718
1719         if (dispsock != NULL)
1720                 socket = dispsock->socket;
1721         else
1722                 socket = disp->socket;
1723         INSIST(socket != NULL);
1724
1725         switch (disp->socktype) {
1726                 /*
1727                  * UDP reads are always maximal.
1728                  */
1729         case isc_sockettype_udp:
1730                 region.length = disp->mgr->buffersize;
1731                 region.base = allocate_udp_buffer(disp);
1732                 if (region.base == NULL)
1733                         return (ISC_R_NOMEMORY);
1734                 if (dispsock != NULL) {
1735                         isc_task_t *dt = dispsock->task;
1736                         isc_socketevent_t *sev =
1737                                 allocate_sevent(disp, socket,
1738                                                 ISC_SOCKEVENT_RECVDONE,
1739                                                 udp_exrecv, dispsock);
1740                         if (sev == NULL) {
1741                                 free_buffer(disp, region.base, region.length);
1742                                 return (ISC_R_NOMEMORY);
1743                         }
1744
1745                         res = isc_socket_recv2(socket, &region, 1, dt, sev, 0);
1746                         if (res != ISC_R_SUCCESS) {
1747                                 free_buffer(disp, region.base, region.length);
1748                                 return (res);
1749                         }
1750                 } else {
1751                         isc_task_t *dt = disp->task[0];
1752                         isc_socketevent_t *sev =
1753                                 allocate_sevent(disp, socket,
1754                                                 ISC_SOCKEVENT_RECVDONE,
1755                                                 udp_shrecv, disp);
1756                         if (sev == NULL) {
1757                                 free_buffer(disp, region.base, region.length);
1758                                 return (ISC_R_NOMEMORY);
1759                         }
1760
1761                         res = isc_socket_recv2(socket, &region, 1, dt, sev, 0);
1762                         if (res != ISC_R_SUCCESS) {
1763                                 free_buffer(disp, region.base, region.length);
1764                                 disp->shutdown_why = res;
1765                                 disp->shutting_down = 1;
1766                                 do_cancel(disp);
1767                                 return (ISC_R_SUCCESS); /* recover by cancel */
1768                         }
1769                         INSIST(disp->recv_pending == 0);
1770                         disp->recv_pending = 1;
1771                 }
1772                 break;
1773
1774         case isc_sockettype_tcp:
1775                 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1776                                              tcp_recv, disp);
1777                 if (res != ISC_R_SUCCESS) {
1778                         disp->shutdown_why = res;
1779                         disp->shutting_down = 1;
1780                         do_cancel(disp);
1781                         return (ISC_R_SUCCESS); /* recover by cancel */
1782                 }
1783                 INSIST(disp->recv_pending == 0);
1784                 disp->recv_pending = 1;
1785                 break;
1786         default:
1787                 INSIST(0);
1788                 break;
1789         }
1790
1791         return (ISC_R_SUCCESS);
1792 }
1793
1794 /*
1795  * Mgr must be locked when calling this function.
1796  */
1797 static isc_boolean_t
1798 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1799         mgr_log(mgr, LVL(90),
1800                 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1801                 "depool=%d, rpool=%d, dpool=%d",
1802                 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1803                 isc_mempool_getallocated(mgr->depool),
1804                 isc_mempool_getallocated(mgr->rpool),
1805                 isc_mempool_getallocated(mgr->dpool));
1806         if (!MGR_IS_SHUTTINGDOWN(mgr))
1807                 return (ISC_FALSE);
1808         if (!ISC_LIST_EMPTY(mgr->list))
1809                 return (ISC_FALSE);
1810         if (isc_mempool_getallocated(mgr->depool) != 0)
1811                 return (ISC_FALSE);
1812         if (isc_mempool_getallocated(mgr->rpool) != 0)
1813                 return (ISC_FALSE);
1814         if (isc_mempool_getallocated(mgr->dpool) != 0)
1815                 return (ISC_FALSE);
1816
1817         return (ISC_TRUE);
1818 }
1819
1820 /*
1821  * Mgr must be unlocked when calling this function.
1822  */
1823 static void
1824 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1825         isc_mem_t *mctx;
1826         dns_dispatchmgr_t *mgr;
1827
1828         mgr = *mgrp;
1829         *mgrp = NULL;
1830
1831         mctx = mgr->mctx;
1832
1833         mgr->magic = 0;
1834         mgr->mctx = NULL;
1835         DESTROYLOCK(&mgr->lock);
1836         mgr->state = 0;
1837
1838         DESTROYLOCK(&mgr->arc4_lock);
1839
1840         isc_mempool_destroy(&mgr->depool);
1841         isc_mempool_destroy(&mgr->rpool);
1842         isc_mempool_destroy(&mgr->dpool);
1843         if (mgr->bpool != NULL)
1844                 isc_mempool_destroy(&mgr->bpool);
1845         if (mgr->spool != NULL)
1846                 isc_mempool_destroy(&mgr->spool);
1847
1848         DESTROYLOCK(&mgr->spool_lock);
1849         DESTROYLOCK(&mgr->bpool_lock);
1850         DESTROYLOCK(&mgr->dpool_lock);
1851         DESTROYLOCK(&mgr->rpool_lock);
1852         DESTROYLOCK(&mgr->depool_lock);
1853
1854 #ifdef BIND9
1855         if (mgr->entropy != NULL)
1856                 isc_entropy_detach(&mgr->entropy);
1857 #endif /* BIND9 */
1858         if (mgr->qid != NULL)
1859                 qid_destroy(mctx, &mgr->qid);
1860
1861         DESTROYLOCK(&mgr->buffer_lock);
1862
1863         if (mgr->blackhole != NULL)
1864                 dns_acl_detach(&mgr->blackhole);
1865
1866         if (mgr->stats != NULL)
1867                 isc_stats_detach(&mgr->stats);
1868
1869         if (mgr->v4ports != NULL) {
1870                 isc_mem_put(mctx, mgr->v4ports,
1871                             mgr->nv4ports * sizeof(in_port_t));
1872         }
1873         if (mgr->v6ports != NULL) {
1874                 isc_mem_put(mctx, mgr->v6ports,
1875                             mgr->nv6ports * sizeof(in_port_t));
1876         }
1877         isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1878         isc_mem_detach(&mctx);
1879 }
1880
1881 static isc_result_t
1882 open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1883             unsigned int options, isc_socket_t **sockp,
1884             isc_socket_t *dup_socket)
1885 {
1886         isc_socket_t *sock;
1887         isc_result_t result;
1888
1889         sock = *sockp;
1890         if (sock != NULL) {
1891 #ifdef BIND9
1892                 result = isc_socket_open(sock);
1893                 if (result != ISC_R_SUCCESS)
1894                         return (result);
1895 #else
1896                 INSIST(0);
1897 #endif
1898         } else if (dup_socket != NULL) {
1899                 result = isc_socket_dup(dup_socket, &sock);
1900                 if (result != ISC_R_SUCCESS)
1901                         return (result);
1902
1903                 isc_socket_setname(sock, "dispatcher", NULL);
1904                 *sockp = sock;
1905                 return (ISC_R_SUCCESS);
1906         } else {
1907                 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1908                                         isc_sockettype_udp, &sock);
1909                 if (result != ISC_R_SUCCESS)
1910                         return (result);
1911         }
1912
1913         isc_socket_setname(sock, "dispatcher", NULL);
1914
1915 #ifndef ISC_ALLOW_MAPPED
1916         isc_socket_ipv6only(sock, ISC_TRUE);
1917 #endif
1918         result = isc_socket_bind(sock, local, options);
1919         if (result != ISC_R_SUCCESS) {
1920                 if (*sockp == NULL)
1921                         isc_socket_detach(&sock);
1922                 else {
1923 #ifdef BIND9
1924                         isc_socket_close(sock);
1925 #else
1926                         INSIST(0);
1927 #endif
1928                 }
1929                 return (result);
1930         }
1931
1932         *sockp = sock;
1933         return (ISC_R_SUCCESS);
1934 }
1935
1936 /*%
1937  * Create a temporary port list to set the initial default set of dispatch
1938  * ports: [1024, 65535].  This is almost meaningless as the application will
1939  * normally set the ports explicitly, but is provided to fill some minor corner
1940  * cases.
1941  */
1942 static isc_result_t
1943 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1944         isc_result_t result;
1945
1946         result = isc_portset_create(mctx, portsetp);
1947         if (result != ISC_R_SUCCESS)
1948                 return (result);
1949         isc_portset_addrange(*portsetp, 1024, 65535);
1950
1951         return (ISC_R_SUCCESS);
1952 }
1953
1954 /*
1955  * Publics.
1956  */
1957
1958 isc_result_t
1959 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1960                        dns_dispatchmgr_t **mgrp)
1961 {
1962         dns_dispatchmgr_t *mgr;
1963         isc_result_t result;
1964         isc_portset_t *v4portset = NULL;
1965         isc_portset_t *v6portset = NULL;
1966
1967         REQUIRE(mctx != NULL);
1968         REQUIRE(mgrp != NULL && *mgrp == NULL);
1969
1970         mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1971         if (mgr == NULL)
1972                 return (ISC_R_NOMEMORY);
1973
1974         mgr->mctx = NULL;
1975         isc_mem_attach(mctx, &mgr->mctx);
1976
1977         mgr->blackhole = NULL;
1978         mgr->stats = NULL;
1979
1980         result = isc_mutex_init(&mgr->lock);
1981         if (result != ISC_R_SUCCESS)
1982                 goto deallocate;
1983
1984         result = isc_mutex_init(&mgr->arc4_lock);
1985         if (result != ISC_R_SUCCESS)
1986                 goto kill_lock;
1987
1988         result = isc_mutex_init(&mgr->buffer_lock);
1989         if (result != ISC_R_SUCCESS)
1990                 goto kill_arc4_lock;
1991
1992         result = isc_mutex_init(&mgr->depool_lock);
1993         if (result != ISC_R_SUCCESS)
1994                 goto kill_buffer_lock;
1995
1996         result = isc_mutex_init(&mgr->rpool_lock);
1997         if (result != ISC_R_SUCCESS)
1998                 goto kill_depool_lock;
1999
2000         result = isc_mutex_init(&mgr->dpool_lock);
2001         if (result != ISC_R_SUCCESS)
2002                 goto kill_rpool_lock;
2003
2004         result = isc_mutex_init(&mgr->bpool_lock);
2005         if (result != ISC_R_SUCCESS)
2006                 goto kill_dpool_lock;
2007
2008         result = isc_mutex_init(&mgr->spool_lock);
2009         if (result != ISC_R_SUCCESS)
2010                 goto kill_bpool_lock;
2011
2012         mgr->depool = NULL;
2013         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
2014                                &mgr->depool) != ISC_R_SUCCESS) {
2015                 result = ISC_R_NOMEMORY;
2016                 goto kill_spool_lock;
2017         }
2018
2019         mgr->rpool = NULL;
2020         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
2021                                &mgr->rpool) != ISC_R_SUCCESS) {
2022                 result = ISC_R_NOMEMORY;
2023                 goto kill_depool;
2024         }
2025
2026         mgr->dpool = NULL;
2027         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
2028                                &mgr->dpool) != ISC_R_SUCCESS) {
2029                 result = ISC_R_NOMEMORY;
2030                 goto kill_rpool;
2031         }
2032
2033         isc_mempool_setname(mgr->depool, "dispmgr_depool");
2034         isc_mempool_setmaxalloc(mgr->depool, 32768);
2035         isc_mempool_setfreemax(mgr->depool, 32768);
2036         isc_mempool_associatelock(mgr->depool, &mgr->depool_lock);
2037         isc_mempool_setfillcount(mgr->depool, 256);
2038
2039         isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
2040         isc_mempool_setmaxalloc(mgr->rpool, 32768);
2041         isc_mempool_setfreemax(mgr->rpool, 32768);
2042         isc_mempool_associatelock(mgr->rpool, &mgr->rpool_lock);
2043         isc_mempool_setfillcount(mgr->rpool, 256);
2044
2045         isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
2046         isc_mempool_setmaxalloc(mgr->dpool, 32768);
2047         isc_mempool_setfreemax(mgr->dpool, 32768);
2048         isc_mempool_associatelock(mgr->dpool, &mgr->dpool_lock);
2049         isc_mempool_setfillcount(mgr->dpool, 256);
2050
2051         mgr->buffers = 0;
2052         mgr->buffersize = 0;
2053         mgr->maxbuffers = 0;
2054         mgr->bpool = NULL;
2055         mgr->spool = NULL;
2056         mgr->entropy = NULL;
2057         mgr->qid = NULL;
2058         mgr->state = 0;
2059         ISC_LIST_INIT(mgr->list);
2060         mgr->v4ports = NULL;
2061         mgr->v6ports = NULL;
2062         mgr->nv4ports = 0;
2063         mgr->nv6ports = 0;
2064         mgr->magic = DNS_DISPATCHMGR_MAGIC;
2065
2066         result = create_default_portset(mctx, &v4portset);
2067         if (result == ISC_R_SUCCESS) {
2068                 result = create_default_portset(mctx, &v6portset);
2069                 if (result == ISC_R_SUCCESS) {
2070                         result = dns_dispatchmgr_setavailports(mgr,
2071                                                                v4portset,
2072                                                                v6portset);
2073                 }
2074         }
2075         if (v4portset != NULL)
2076                 isc_portset_destroy(mctx, &v4portset);
2077         if (v6portset != NULL)
2078                 isc_portset_destroy(mctx, &v6portset);
2079         if (result != ISC_R_SUCCESS)
2080                 goto kill_dpool;
2081
2082 #ifdef BIND9
2083         if (entropy != NULL)
2084                 isc_entropy_attach(entropy, &mgr->entropy);
2085 #else
2086         UNUSED(entropy);
2087 #endif
2088
2089         dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
2090
2091         *mgrp = mgr;
2092         return (ISC_R_SUCCESS);
2093
2094  kill_dpool:
2095         isc_mempool_destroy(&mgr->dpool);
2096  kill_rpool:
2097         isc_mempool_destroy(&mgr->rpool);
2098  kill_depool:
2099         isc_mempool_destroy(&mgr->depool);
2100  kill_spool_lock:
2101         DESTROYLOCK(&mgr->spool_lock);
2102  kill_bpool_lock:
2103         DESTROYLOCK(&mgr->bpool_lock);
2104  kill_dpool_lock:
2105         DESTROYLOCK(&mgr->dpool_lock);
2106  kill_rpool_lock:
2107         DESTROYLOCK(&mgr->rpool_lock);
2108  kill_depool_lock:
2109         DESTROYLOCK(&mgr->depool_lock);
2110  kill_buffer_lock:
2111         DESTROYLOCK(&mgr->buffer_lock);
2112  kill_arc4_lock:
2113         DESTROYLOCK(&mgr->arc4_lock);
2114  kill_lock:
2115         DESTROYLOCK(&mgr->lock);
2116  deallocate:
2117         isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
2118         isc_mem_detach(&mctx);
2119
2120         return (result);
2121 }
2122
2123 void
2124 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
2125         REQUIRE(VALID_DISPATCHMGR(mgr));
2126         if (mgr->blackhole != NULL)
2127                 dns_acl_detach(&mgr->blackhole);
2128         dns_acl_attach(blackhole, &mgr->blackhole);
2129 }
2130
2131 dns_acl_t *
2132 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
2133         REQUIRE(VALID_DISPATCHMGR(mgr));
2134         return (mgr->blackhole);
2135 }
2136
2137 void
2138 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
2139                                  dns_portlist_t *portlist)
2140 {
2141         REQUIRE(VALID_DISPATCHMGR(mgr));
2142         UNUSED(portlist);
2143
2144         /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
2145         return;
2146 }
2147
2148 dns_portlist_t *
2149 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
2150         REQUIRE(VALID_DISPATCHMGR(mgr));
2151         return (NULL);          /* this function is deprecated */
2152 }
2153
2154 isc_result_t
2155 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
2156                               isc_portset_t *v6portset)
2157 {
2158         in_port_t *v4ports, *v6ports, p;
2159         unsigned int nv4ports, nv6ports, i4, i6;
2160
2161         REQUIRE(VALID_DISPATCHMGR(mgr));
2162
2163         nv4ports = isc_portset_nports(v4portset);
2164         nv6ports = isc_portset_nports(v6portset);
2165
2166         v4ports = NULL;
2167         if (nv4ports != 0) {
2168                 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
2169                 if (v4ports == NULL)
2170                         return (ISC_R_NOMEMORY);
2171         }
2172         v6ports = NULL;
2173         if (nv6ports != 0) {
2174                 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
2175                 if (v6ports == NULL) {
2176                         if (v4ports != NULL) {
2177                                 isc_mem_put(mgr->mctx, v4ports,
2178                                             sizeof(in_port_t) *
2179                                             isc_portset_nports(v4portset));
2180                         }
2181                         return (ISC_R_NOMEMORY);
2182                 }
2183         }
2184
2185         p = 0;
2186         i4 = 0;
2187         i6 = 0;
2188         do {
2189                 if (isc_portset_isset(v4portset, p)) {
2190                         INSIST(i4 < nv4ports);
2191                         v4ports[i4++] = p;
2192                 }
2193                 if (isc_portset_isset(v6portset, p)) {
2194                         INSIST(i6 < nv6ports);
2195                         v6ports[i6++] = p;
2196                 }
2197         } while (p++ < 65535);
2198         INSIST(i4 == nv4ports && i6 == nv6ports);
2199
2200         PORTBUFLOCK(mgr);
2201         if (mgr->v4ports != NULL) {
2202                 isc_mem_put(mgr->mctx, mgr->v4ports,
2203                             mgr->nv4ports * sizeof(in_port_t));
2204         }
2205         mgr->v4ports = v4ports;
2206         mgr->nv4ports = nv4ports;
2207
2208         if (mgr->v6ports != NULL) {
2209                 isc_mem_put(mgr->mctx, mgr->v6ports,
2210                             mgr->nv6ports * sizeof(in_port_t));
2211         }
2212         mgr->v6ports = v6ports;
2213         mgr->nv6ports = nv6ports;
2214         PORTBUFUNLOCK(mgr);
2215
2216         return (ISC_R_SUCCESS);
2217 }
2218
2219 static isc_result_t
2220 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2221                        unsigned int buffersize, unsigned int maxbuffers,
2222                        unsigned int maxrequests, unsigned int buckets,
2223                        unsigned int increment)
2224 {
2225         isc_result_t result;
2226
2227         REQUIRE(VALID_DISPATCHMGR(mgr));
2228         REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2229         REQUIRE(maxbuffers > 0);
2230         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2231         REQUIRE(increment > buckets);
2232
2233         /*
2234          * Keep some number of items around.  This should be a config
2235          * option.  For now, keep 8, but later keep at least two even
2236          * if the caller wants less.  This allows us to ensure certain
2237          * things, like an event can be "freed" and the next allocation
2238          * will always succeed.
2239          *
2240          * Note that if limits are placed on anything here, we use one
2241          * event internally, so the actual limit should be "wanted + 1."
2242          *
2243          * XXXMLG
2244          */
2245
2246         if (maxbuffers < 8)
2247                 maxbuffers = 8;
2248
2249         LOCK(&mgr->buffer_lock);
2250
2251         /* Create or adjust buffer pool */
2252         if (mgr->bpool != NULL) {
2253                 /*
2254                  * We only increase the maxbuffers to avoid accidental buffer
2255                  * shortage.  Ideally we'd separate the manager-wide maximum
2256                  * from per-dispatch limits and respect the latter within the
2257                  * global limit.  But at this moment that's deemed to be
2258                  * overkilling and isn't worth additional implementation
2259                  * complexity.
2260                  */
2261                 if (maxbuffers > mgr->maxbuffers) {
2262                         isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2263                         isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2264                         mgr->maxbuffers = maxbuffers;
2265                 }
2266         } else {
2267                 result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2268                 if (result != ISC_R_SUCCESS) {
2269                         UNLOCK(&mgr->buffer_lock);
2270                         return (result);
2271                 }
2272                 isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2273                 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2274                 isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2275                 isc_mempool_associatelock(mgr->bpool, &mgr->bpool_lock);
2276                 isc_mempool_setfillcount(mgr->bpool, 256);
2277         }
2278
2279         /* Create or adjust socket pool */
2280         if (mgr->spool != NULL) {
2281                 if (maxrequests < DNS_DISPATCH_POOLSOCKS * 2)
2282                   isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2283                   isc_mempool_setfreemax(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2284                 UNLOCK(&mgr->buffer_lock);
2285                 return (ISC_R_SUCCESS);
2286         }
2287         result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2288                                     &mgr->spool);
2289         if (result != ISC_R_SUCCESS) {
2290                 UNLOCK(&mgr->buffer_lock);
2291                 goto cleanup;
2292         }
2293         isc_mempool_setname(mgr->spool, "dispmgr_spool");
2294         isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2295         isc_mempool_setfreemax(mgr->spool, maxrequests);
2296         isc_mempool_associatelock(mgr->spool, &mgr->spool_lock);
2297         isc_mempool_setfillcount(mgr->spool, 256);
2298
2299         result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
2300         if (result != ISC_R_SUCCESS)
2301                 goto cleanup;
2302
2303         mgr->buffersize = buffersize;
2304         mgr->maxbuffers = maxbuffers;
2305         UNLOCK(&mgr->buffer_lock);
2306         return (ISC_R_SUCCESS);
2307
2308  cleanup:
2309         isc_mempool_destroy(&mgr->bpool);
2310         if (mgr->spool != NULL)
2311                 isc_mempool_destroy(&mgr->spool);
2312         UNLOCK(&mgr->buffer_lock);
2313         return (result);
2314 }
2315
2316 void
2317 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2318         dns_dispatchmgr_t *mgr;
2319         isc_boolean_t killit;
2320
2321         REQUIRE(mgrp != NULL);
2322         REQUIRE(VALID_DISPATCHMGR(*mgrp));
2323
2324         mgr = *mgrp;
2325         *mgrp = NULL;
2326
2327         LOCK(&mgr->lock);
2328         mgr->state |= MGR_SHUTTINGDOWN;
2329
2330         killit = destroy_mgr_ok(mgr);
2331         UNLOCK(&mgr->lock);
2332
2333         mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2334
2335         if (killit)
2336                 destroy_mgr(&mgr);
2337 }
2338
2339 void
2340 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2341         REQUIRE(VALID_DISPATCHMGR(mgr));
2342         REQUIRE(ISC_LIST_EMPTY(mgr->list));
2343         REQUIRE(mgr->stats == NULL);
2344
2345         isc_stats_attach(stats, &mgr->stats);
2346 }
2347
2348 static int
2349 port_cmp(const void *key, const void *ent) {
2350         in_port_t p1 = *(const in_port_t *)key;
2351         in_port_t p2 = *(const in_port_t *)ent;
2352
2353         if (p1 < p2)
2354                 return (-1);
2355         else if (p1 == p2)
2356                 return (0);
2357         else
2358                 return (1);
2359 }
2360
2361 static isc_boolean_t
2362 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2363               isc_sockaddr_t *sockaddrp)
2364 {
2365         isc_sockaddr_t sockaddr;
2366         isc_result_t result;
2367         in_port_t *ports, port;
2368         unsigned int nports;
2369         isc_boolean_t available = ISC_FALSE;
2370
2371         REQUIRE(sock != NULL || sockaddrp != NULL);
2372
2373         PORTBUFLOCK(mgr);
2374         if (sock != NULL) {
2375                 sockaddrp = &sockaddr;
2376                 result = isc_socket_getsockname(sock, sockaddrp);
2377                 if (result != ISC_R_SUCCESS)
2378                         goto unlock;
2379         }
2380
2381         if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2382                 ports = mgr->v4ports;
2383                 nports = mgr->nv4ports;
2384         } else {
2385                 ports = mgr->v6ports;
2386                 nports = mgr->nv6ports;
2387         }
2388         if (ports == NULL)
2389                 goto unlock;
2390
2391         port = isc_sockaddr_getport(sockaddrp);
2392         if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2393                 available = ISC_TRUE;
2394
2395 unlock:
2396         PORTBUFUNLOCK(mgr);
2397         return (available);
2398 }
2399
2400 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2401
2402 static isc_boolean_t
2403 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2404         isc_sockaddr_t sockaddr;
2405         isc_result_t result;
2406
2407         REQUIRE(disp->socket != NULL);
2408
2409         if (addr == NULL)
2410                 return (ISC_TRUE);
2411
2412         /*
2413          * Don't match wildcard ports unless the port is available in the
2414          * current configuration.
2415          */
2416         if (isc_sockaddr_getport(addr) == 0 &&
2417             isc_sockaddr_getport(&disp->local) == 0 &&
2418             !portavailable(disp->mgr, disp->socket, NULL)) {
2419                 return (ISC_FALSE);
2420         }
2421
2422         /*
2423          * Check if we match the binding <address,port>.
2424          * Wildcard ports match/fail here.
2425          */
2426         if (isc_sockaddr_equal(&disp->local, addr))
2427                 return (ISC_TRUE);
2428         if (isc_sockaddr_getport(addr) == 0)
2429                 return (ISC_FALSE);
2430
2431         /*
2432          * Check if we match a bound wildcard port <address,port>.
2433          */
2434         if (!isc_sockaddr_eqaddr(&disp->local, addr))
2435                 return (ISC_FALSE);
2436         result = isc_socket_getsockname(disp->socket, &sockaddr);
2437         if (result != ISC_R_SUCCESS)
2438                 return (ISC_FALSE);
2439
2440         return (isc_sockaddr_equal(&sockaddr, addr));
2441 }
2442
2443 /*
2444  * Requires mgr be locked.
2445  *
2446  * No dispatcher can be locked by this thread when calling this function.
2447  *
2448  *
2449  * NOTE:
2450  *      If a matching dispatcher is found, it is locked after this function
2451  *      returns, and must be unlocked by the caller.
2452  */
2453 static isc_result_t
2454 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2455               unsigned int attributes, unsigned int mask,
2456               dns_dispatch_t **dispp)
2457 {
2458         dns_dispatch_t *disp;
2459         isc_result_t result;
2460
2461         /*
2462          * Make certain that we will not match a private or exclusive dispatch.
2463          */
2464         attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2465         mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2466
2467         disp = ISC_LIST_HEAD(mgr->list);
2468         while (disp != NULL) {
2469                 LOCK(&disp->lock);
2470                 if ((disp->shutting_down == 0)
2471                     && ATTRMATCH(disp->attributes, attributes, mask)
2472                     && local_addr_match(disp, local))
2473                         break;
2474                 UNLOCK(&disp->lock);
2475                 disp = ISC_LIST_NEXT(disp, link);
2476         }
2477
2478         if (disp == NULL) {
2479                 result = ISC_R_NOTFOUND;
2480                 goto out;
2481         }
2482
2483         *dispp = disp;
2484         result = ISC_R_SUCCESS;
2485  out:
2486
2487         return (result);
2488 }
2489
2490 static isc_result_t
2491 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2492              unsigned int increment, dns_qid_t **qidp,
2493              isc_boolean_t needsocktable)
2494 {
2495         dns_qid_t *qid;
2496         unsigned int i;
2497         isc_result_t result;
2498
2499         REQUIRE(VALID_DISPATCHMGR(mgr));
2500         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2501         REQUIRE(increment > buckets);
2502         REQUIRE(qidp != NULL && *qidp == NULL);
2503
2504         qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2505         if (qid == NULL)
2506                 return (ISC_R_NOMEMORY);
2507
2508         qid->qid_table = isc_mem_get(mgr->mctx,
2509                                      buckets * sizeof(dns_displist_t));
2510         if (qid->qid_table == NULL) {
2511                 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2512                 return (ISC_R_NOMEMORY);
2513         }
2514
2515         qid->sock_table = NULL;
2516         if (needsocktable) {
2517                 qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2518                                               sizeof(dispsocketlist_t));
2519                 if (qid->sock_table == NULL) {
2520                         isc_mem_put(mgr->mctx, qid->qid_table,
2521                                     buckets * sizeof(dns_displist_t));
2522                         isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2523                         return (ISC_R_NOMEMORY);
2524                 }
2525         }
2526
2527         result = isc_mutex_init(&qid->lock);
2528         if (result != ISC_R_SUCCESS) {
2529                 if (qid->sock_table != NULL) {
2530                         isc_mem_put(mgr->mctx, qid->sock_table,
2531                                     buckets * sizeof(dispsocketlist_t));
2532                 }
2533                 isc_mem_put(mgr->mctx, qid->qid_table,
2534                             buckets * sizeof(dns_displist_t));
2535                 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2536                 return (result);
2537         }
2538
2539         for (i = 0; i < buckets; i++) {
2540                 ISC_LIST_INIT(qid->qid_table[i]);
2541                 if (qid->sock_table != NULL)
2542                         ISC_LIST_INIT(qid->sock_table[i]);
2543         }
2544
2545         qid->qid_nbuckets = buckets;
2546         qid->qid_increment = increment;
2547         qid->magic = QID_MAGIC;
2548         *qidp = qid;
2549         return (ISC_R_SUCCESS);
2550 }
2551
2552 static void
2553 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2554         dns_qid_t *qid;
2555
2556         REQUIRE(qidp != NULL);
2557         qid = *qidp;
2558
2559         REQUIRE(VALID_QID(qid));
2560
2561         *qidp = NULL;
2562         qid->magic = 0;
2563         isc_mem_put(mctx, qid->qid_table,
2564                     qid->qid_nbuckets * sizeof(dns_displist_t));
2565         if (qid->sock_table != NULL) {
2566                 isc_mem_put(mctx, qid->sock_table,
2567                             qid->qid_nbuckets * sizeof(dispsocketlist_t));
2568         }
2569         DESTROYLOCK(&qid->lock);
2570         isc_mem_put(mctx, qid, sizeof(*qid));
2571 }
2572
2573 /*
2574  * Allocate and set important limits.
2575  */
2576 static isc_result_t
2577 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2578                   dns_dispatch_t **dispp)
2579 {
2580         dns_dispatch_t *disp;
2581         isc_result_t result;
2582
2583         REQUIRE(VALID_DISPATCHMGR(mgr));
2584         REQUIRE(dispp != NULL && *dispp == NULL);
2585
2586         /*
2587          * Set up the dispatcher, mostly.  Don't bother setting some of
2588          * the options that are controlled by tcp vs. udp, etc.
2589          */
2590
2591         disp = isc_mempool_get(mgr->dpool);
2592         if (disp == NULL)
2593                 return (ISC_R_NOMEMORY);
2594
2595         disp->magic = 0;
2596         disp->mgr = mgr;
2597         disp->maxrequests = maxrequests;
2598         disp->attributes = 0;
2599         ISC_LINK_INIT(disp, link);
2600         disp->refcount = 1;
2601         disp->recv_pending = 0;
2602         memset(&disp->local, 0, sizeof(disp->local));
2603         disp->localport = 0;
2604         disp->shutting_down = 0;
2605         disp->shutdown_out = 0;
2606         disp->connected = 0;
2607         disp->tcpmsg_valid = 0;
2608         disp->shutdown_why = ISC_R_UNEXPECTED;
2609         disp->requests = 0;
2610         disp->tcpbuffers = 0;
2611         disp->qid = NULL;
2612         ISC_LIST_INIT(disp->activesockets);
2613         ISC_LIST_INIT(disp->inactivesockets);
2614         disp->nsockets = 0;
2615         dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL);
2616         disp->port_table = NULL;
2617         disp->portpool = NULL;
2618
2619         result = isc_mutex_init(&disp->lock);
2620         if (result != ISC_R_SUCCESS)
2621                 goto deallocate;
2622
2623         disp->failsafe_ev = allocate_devent(disp);
2624         if (disp->failsafe_ev == NULL) {
2625                 result = ISC_R_NOMEMORY;
2626                 goto kill_lock;
2627         }
2628
2629         disp->magic = DISPATCH_MAGIC;
2630
2631         *dispp = disp;
2632         return (ISC_R_SUCCESS);
2633
2634         /*
2635          * error returns
2636          */
2637  kill_lock:
2638         DESTROYLOCK(&disp->lock);
2639  deallocate:
2640         isc_mempool_put(mgr->dpool, disp);
2641
2642         return (result);
2643 }
2644
2645
2646 /*
2647  * MUST be unlocked, and not used by anything.
2648  */
2649 static void
2650 dispatch_free(dns_dispatch_t **dispp)
2651 {
2652         dns_dispatch_t *disp;
2653         dns_dispatchmgr_t *mgr;
2654         int i;
2655
2656         REQUIRE(VALID_DISPATCH(*dispp));
2657         disp = *dispp;
2658         *dispp = NULL;
2659
2660         mgr = disp->mgr;
2661         REQUIRE(VALID_DISPATCHMGR(mgr));
2662
2663         if (disp->tcpmsg_valid) {
2664                 dns_tcpmsg_invalidate(&disp->tcpmsg);
2665                 disp->tcpmsg_valid = 0;
2666         }
2667
2668         INSIST(disp->tcpbuffers == 0);
2669         INSIST(disp->requests == 0);
2670         INSIST(disp->recv_pending == 0);
2671         INSIST(ISC_LIST_EMPTY(disp->activesockets));
2672         INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2673
2674         isc_mempool_put(mgr->depool, disp->failsafe_ev);
2675         disp->failsafe_ev = NULL;
2676
2677         if (disp->qid != NULL)
2678                 qid_destroy(mgr->mctx, &disp->qid);
2679
2680         if (disp->port_table != NULL) {
2681                 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2682                         INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2683                 isc_mem_put(mgr->mctx, disp->port_table,
2684                             sizeof(disp->port_table[0]) *
2685                             DNS_DISPATCH_PORTTABLESIZE);
2686         }
2687
2688         if (disp->portpool != NULL)
2689                 isc_mempool_destroy(&disp->portpool);
2690
2691         disp->mgr = NULL;
2692         DESTROYLOCK(&disp->lock);
2693         disp->magic = 0;
2694         isc_mempool_put(mgr->dpool, disp);
2695 }
2696
2697 isc_result_t
2698 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2699                        isc_taskmgr_t *taskmgr, unsigned int buffersize,
2700                        unsigned int maxbuffers, unsigned int maxrequests,
2701                        unsigned int buckets, unsigned int increment,
2702                        unsigned int attributes, dns_dispatch_t **dispp)
2703 {
2704         isc_result_t result;
2705         dns_dispatch_t *disp;
2706
2707         UNUSED(maxbuffers);
2708         UNUSED(buffersize);
2709
2710         REQUIRE(VALID_DISPATCHMGR(mgr));
2711         REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2712         REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2713         REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2714
2715         attributes |= DNS_DISPATCHATTR_PRIVATE;  /* XXXMLG */
2716
2717         LOCK(&mgr->lock);
2718
2719         /*
2720          * dispatch_allocate() checks mgr for us.
2721          * qid_allocate() checks buckets and increment for us.
2722          */
2723         disp = NULL;
2724         result = dispatch_allocate(mgr, maxrequests, &disp);
2725         if (result != ISC_R_SUCCESS) {
2726                 UNLOCK(&mgr->lock);
2727                 return (result);
2728         }
2729
2730         result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2731         if (result != ISC_R_SUCCESS)
2732                 goto deallocate_dispatch;
2733
2734         disp->socktype = isc_sockettype_tcp;
2735         disp->socket = NULL;
2736         isc_socket_attach(sock, &disp->socket);
2737
2738         disp->sepool = NULL;
2739
2740         disp->ntasks = 1;
2741         disp->task[0] = NULL;
2742         result = isc_task_create(taskmgr, 0, &disp->task[0]);
2743         if (result != ISC_R_SUCCESS)
2744                 goto kill_socket;
2745
2746         disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2747                                             DNS_EVENT_DISPATCHCONTROL,
2748                                             destroy_disp, disp,
2749                                             sizeof(isc_event_t));
2750         if (disp->ctlevent == NULL) {
2751                 result = ISC_R_NOMEMORY;
2752                 goto kill_task;
2753         }
2754
2755         isc_task_setname(disp->task[0], "tcpdispatch", disp);
2756
2757         dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2758         disp->tcpmsg_valid = 1;
2759
2760         disp->attributes = attributes;
2761
2762         /*
2763          * Append it to the dispatcher list.
2764          */
2765         ISC_LIST_APPEND(mgr->list, disp, link);
2766         UNLOCK(&mgr->lock);
2767
2768         mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2769         dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2770
2771         *dispp = disp;
2772
2773         return (ISC_R_SUCCESS);
2774
2775         /*
2776          * Error returns.
2777          */
2778  kill_task:
2779         isc_task_detach(&disp->task[0]);
2780  kill_socket:
2781         isc_socket_detach(&disp->socket);
2782  deallocate_dispatch:
2783         dispatch_free(&disp);
2784
2785         UNLOCK(&mgr->lock);
2786
2787         return (result);
2788 }
2789
2790 isc_result_t
2791 dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2792                     isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2793                     unsigned int buffersize,
2794                     unsigned int maxbuffers, unsigned int maxrequests,
2795                     unsigned int buckets, unsigned int increment,
2796                     unsigned int attributes, unsigned int mask,
2797                     dns_dispatch_t **dispp, dns_dispatch_t *dup_dispatch)
2798 {
2799         isc_result_t result;
2800         dns_dispatch_t *disp = NULL;
2801
2802         REQUIRE(VALID_DISPATCHMGR(mgr));
2803         REQUIRE(sockmgr != NULL);
2804         REQUIRE(localaddr != NULL);
2805         REQUIRE(taskmgr != NULL);
2806         REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2807         REQUIRE(maxbuffers > 0);
2808         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2809         REQUIRE(increment > buckets);
2810         REQUIRE(dispp != NULL && *dispp == NULL);
2811         REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2812
2813         result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2814                                         maxrequests, buckets, increment);
2815         if (result != ISC_R_SUCCESS)
2816                 return (result);
2817
2818         LOCK(&mgr->lock);
2819
2820         if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2821                 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2822                 goto createudp;
2823         }
2824
2825         /*
2826          * See if we have a dispatcher that matches.
2827          */
2828         if (dup_dispatch == NULL) {
2829                 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2830                 if (result == ISC_R_SUCCESS) {
2831                         disp->refcount++;
2832
2833                         if (disp->maxrequests < maxrequests)
2834                                 disp->maxrequests = maxrequests;
2835
2836                         if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0
2837                             && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2838                         {
2839                                 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2840                                 if (disp->recv_pending != 0)
2841                                         isc_socket_cancel(disp->socket,
2842                                                           disp->task[0],
2843                                                           ISC_SOCKCANCEL_RECV);
2844                         }
2845
2846                         UNLOCK(&disp->lock);
2847                         UNLOCK(&mgr->lock);
2848
2849                         *dispp = disp;
2850
2851                         return (ISC_R_SUCCESS);
2852                 }
2853         }
2854
2855  createudp:
2856         /*
2857          * Nope, create one.
2858          */
2859         result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2860                                     maxrequests, attributes, &disp,
2861                                     dup_dispatch == NULL
2862                                             ? NULL
2863                                             : dup_dispatch->socket);
2864
2865         if (result != ISC_R_SUCCESS) {
2866                 UNLOCK(&mgr->lock);
2867                 return (result);
2868         }
2869
2870         UNLOCK(&mgr->lock);
2871         *dispp = disp;
2872
2873         return (ISC_R_SUCCESS);
2874 }
2875
2876 isc_result_t
2877 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2878                     isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2879                     unsigned int buffersize,
2880                     unsigned int maxbuffers, unsigned int maxrequests,
2881                     unsigned int buckets, unsigned int increment,
2882                     unsigned int attributes, unsigned int mask,
2883                     dns_dispatch_t **dispp)
2884 {
2885         return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr,
2886                                         buffersize, maxbuffers, maxrequests,
2887                                         buckets, increment, attributes,
2888                                         mask, dispp, NULL));
2889 }
2890
2891 /*
2892  * mgr should be locked.
2893  */
2894
2895 #ifndef DNS_DISPATCH_HELD
2896 #define DNS_DISPATCH_HELD 20U
2897 #endif
2898
2899 static isc_result_t
2900 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2901               isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2902               isc_socket_t **sockp, isc_socket_t *dup_socket)
2903 {
2904         unsigned int i, j;
2905         isc_socket_t *held[DNS_DISPATCH_HELD];
2906         isc_sockaddr_t localaddr_bound;
2907         isc_socket_t *sock = NULL;
2908         isc_result_t result = ISC_R_SUCCESS;
2909         isc_boolean_t anyport;
2910
2911         INSIST(sockp != NULL && *sockp == NULL);
2912
2913         localaddr_bound = *localaddr;
2914         anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2915
2916         if (anyport) {
2917                 unsigned int nports;
2918                 in_port_t *ports;
2919
2920                 /*
2921                  * If no port is specified, we first try to pick up a random
2922                  * port by ourselves.
2923                  */
2924                 if (isc_sockaddr_pf(localaddr) == AF_INET) {
2925                         nports = disp->mgr->nv4ports;
2926                         ports = disp->mgr->v4ports;
2927                 } else {
2928                         nports = disp->mgr->nv6ports;
2929                         ports = disp->mgr->v6ports;
2930                 }
2931                 if (nports == 0)
2932                         return (ISC_R_ADDRNOTAVAIL);
2933
2934                 for (i = 0; i < 1024; i++) {
2935                         in_port_t prt;
2936
2937                         prt = ports[dispatch_uniformrandom(
2938                                         DISP_ARC4CTX(disp),
2939                                         nports)];
2940                         isc_sockaddr_setport(&localaddr_bound, prt);
2941                         result = open_socket(sockmgr, &localaddr_bound,
2942                                              0, &sock, NULL);
2943                         /*
2944                          * Continue if the port choosen is already in use
2945                          * or the OS has reserved it.
2946                          */
2947                         if (result == ISC_R_NOPERM ||
2948                             result == ISC_R_ADDRINUSE)
2949                                 continue;
2950                         disp->localport = prt;
2951                         *sockp = sock;
2952                         return (result);
2953                 }
2954
2955                 /*
2956                  * If this fails 1024 times, we then ask the kernel for
2957                  * choosing one.
2958                  */
2959         } else {
2960                 /* Allow to reuse address for non-random ports. */
2961                 result = open_socket(sockmgr, localaddr,
2962                                      ISC_SOCKET_REUSEADDRESS, &sock,
2963                                      dup_socket);
2964
2965                 if (result == ISC_R_SUCCESS)
2966                         *sockp = sock;
2967
2968                 return (result);
2969         }
2970
2971         memset(held, 0, sizeof(held));
2972         i = 0;
2973
2974         for (j = 0; j < 0xffffU; j++) {
2975                 result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
2976                 if (result != ISC_R_SUCCESS)
2977                         goto end;
2978                 else if (portavailable(mgr, sock, NULL))
2979                         break;
2980                 if (held[i] != NULL)
2981                         isc_socket_detach(&held[i]);
2982                 held[i++] = sock;
2983                 sock = NULL;
2984                 if (i == DNS_DISPATCH_HELD)
2985                         i = 0;
2986         }
2987         if (j == 0xffffU) {
2988                 mgr_log(mgr, ISC_LOG_ERROR,
2989                         "avoid-v%s-udp-ports: unable to allocate "
2990                         "an available port",
2991                         isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2992                 result = ISC_R_FAILURE;
2993                 goto end;
2994         }
2995         *sockp = sock;
2996
2997 end:
2998         for (i = 0; i < DNS_DISPATCH_HELD; i++) {
2999                 if (held[i] != NULL)
3000                         isc_socket_detach(&held[i]);
3001         }
3002
3003         return (result);
3004 }
3005
3006 static isc_result_t
3007 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
3008                    isc_taskmgr_t *taskmgr,
3009                    isc_sockaddr_t *localaddr,
3010                    unsigned int maxrequests,
3011                    unsigned int attributes,
3012                    dns_dispatch_t **dispp,
3013                    isc_socket_t *dup_socket)
3014 {
3015         isc_result_t result;
3016         dns_dispatch_t *disp;
3017         isc_socket_t *sock = NULL;
3018         int i = 0;
3019
3020         /*
3021          * dispatch_allocate() checks mgr for us.
3022          */
3023         disp = NULL;
3024         result = dispatch_allocate(mgr, maxrequests, &disp);
3025         if (result != ISC_R_SUCCESS)
3026                 return (result);
3027
3028         if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
3029                 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock,
3030                                        dup_socket);
3031                 if (result != ISC_R_SUCCESS)
3032                         goto deallocate_dispatch;
3033
3034                 if (isc_log_wouldlog(dns_lctx, 90)) {
3035                         char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3036
3037                         isc_sockaddr_format(localaddr, addrbuf,
3038                                             ISC_SOCKADDR_FORMATSIZE);
3039                         mgr_log(mgr, LVL(90), "dns_dispatch_createudp: Created"
3040                                 " UDP dispatch for %s with socket fd %d\n",
3041                                 addrbuf, isc_socket_getfd(sock));
3042                 }
3043
3044         } else {
3045                 isc_sockaddr_t sa_any;
3046
3047                 /*
3048                  * For dispatches using exclusive sockets with a specific
3049                  * source address, we only check if the specified address is
3050                  * available on the system.  Query sockets will be created later
3051                  * on demand.
3052                  */
3053                 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
3054                 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
3055                         result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
3056                         if (sock != NULL)
3057                                 isc_socket_detach(&sock);
3058                         if (result != ISC_R_SUCCESS)
3059                                 goto deallocate_dispatch;
3060                 }
3061
3062                 disp->port_table = isc_mem_get(mgr->mctx,
3063                                                sizeof(disp->port_table[0]) *
3064                                                DNS_DISPATCH_PORTTABLESIZE);
3065                 if (disp->port_table == NULL)
3066                         goto deallocate_dispatch;
3067                 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
3068                         ISC_LIST_INIT(disp->port_table[i]);
3069
3070                 result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
3071                                             &disp->portpool);
3072                 if (result != ISC_R_SUCCESS)
3073                         goto deallocate_dispatch;
3074                 isc_mempool_setname(disp->portpool, "disp_portpool");
3075                 isc_mempool_setfreemax(disp->portpool, 128);
3076         }
3077         disp->socktype = isc_sockettype_udp;
3078         disp->socket = sock;
3079         disp->local = *localaddr;
3080
3081         if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3082                 disp->ntasks = MAX_INTERNAL_TASKS;
3083         else
3084                 disp->ntasks = 1;
3085         for (i = 0; i < disp->ntasks; i++) {
3086                 disp->task[i] = NULL;
3087                 result = isc_task_create(taskmgr, 0, &disp->task[i]);
3088                 if (result != ISC_R_SUCCESS) {
3089                         while (--i >= 0) {
3090                                 isc_task_shutdown(disp->task[i]);
3091                                 isc_task_detach(&disp->task[i]);
3092                         }
3093                         goto kill_socket;
3094                 }
3095                 isc_task_setname(disp->task[i], "udpdispatch", disp);
3096         }
3097
3098         disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
3099                                             DNS_EVENT_DISPATCHCONTROL,
3100                                             destroy_disp, disp,
3101                                             sizeof(isc_event_t));
3102         if (disp->ctlevent == NULL) {
3103                 result = ISC_R_NOMEMORY;
3104                 goto kill_task;
3105         }
3106
3107         disp->sepool = NULL;
3108         if (isc_mempool_create(mgr->mctx, sizeof(isc_socketevent_t),
3109                                &disp->sepool) != ISC_R_SUCCESS)
3110         {
3111                 result = ISC_R_NOMEMORY;
3112                 goto kill_ctlevent;
3113         }
3114
3115         result = isc_mutex_init(&disp->sepool_lock);
3116         if (result != ISC_R_SUCCESS)
3117                 goto kill_sepool;
3118
3119         isc_mempool_setname(disp->sepool, "disp_sepool");
3120         isc_mempool_setmaxalloc(disp->sepool, 32768);
3121         isc_mempool_setfreemax(disp->sepool, 32768);
3122         isc_mempool_associatelock(disp->sepool, &disp->sepool_lock);
3123         isc_mempool_setfillcount(disp->sepool, 16);
3124
3125         attributes &= ~DNS_DISPATCHATTR_TCP;
3126         attributes |= DNS_DISPATCHATTR_UDP;
3127         disp->attributes = attributes;
3128
3129         /*
3130          * Append it to the dispatcher list.
3131          */
3132         ISC_LIST_APPEND(mgr->list, disp, link);
3133
3134         mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
3135         dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
3136         if (disp->socket != NULL)
3137                 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
3138
3139         *dispp = disp;
3140
3141         return (result);
3142
3143         /*
3144          * Error returns.
3145          */
3146  kill_sepool:
3147         isc_mempool_destroy(&disp->sepool);
3148  kill_ctlevent:
3149         isc_event_free(&disp->ctlevent);
3150  kill_task:
3151         for (i = 0; i < disp->ntasks; i++)
3152                 isc_task_detach(&disp->task[i]);
3153  kill_socket:
3154         if (disp->socket != NULL)
3155                 isc_socket_detach(&disp->socket);
3156  deallocate_dispatch:
3157         dispatch_free(&disp);
3158
3159         return (result);
3160 }
3161
3162 void
3163 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
3164         REQUIRE(VALID_DISPATCH(disp));
3165         REQUIRE(dispp != NULL && *dispp == NULL);
3166
3167         LOCK(&disp->lock);
3168         disp->refcount++;
3169         UNLOCK(&disp->lock);
3170
3171         *dispp = disp;
3172 }
3173
3174 /*
3175  * It is important to lock the manager while we are deleting the dispatch,
3176  * since dns_dispatch_getudp will call dispatch_find, which returns to
3177  * the caller a dispatch but does not attach to it until later.  _getudp
3178  * locks the manager, however, so locking it here will keep us from attaching
3179  * to a dispatcher that is in the process of going away.
3180  */
3181 void
3182 dns_dispatch_detach(dns_dispatch_t **dispp) {
3183         dns_dispatch_t *disp;
3184         dispsocket_t *dispsock;
3185         isc_boolean_t killit;
3186
3187         REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
3188
3189         disp = *dispp;
3190         *dispp = NULL;
3191
3192         LOCK(&disp->lock);
3193
3194         INSIST(disp->refcount > 0);
3195         disp->refcount--;
3196         if (disp->refcount == 0) {
3197                 if (disp->recv_pending > 0)
3198                         isc_socket_cancel(disp->socket, disp->task[0],
3199                                           ISC_SOCKCANCEL_RECV);
3200                 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3201                      dispsock != NULL;
3202                      dispsock = ISC_LIST_NEXT(dispsock, link)) {
3203                         isc_socket_cancel(dispsock->socket, dispsock->task,
3204                                           ISC_SOCKCANCEL_RECV);
3205                 }
3206                 disp->shutting_down = 1;
3207         }
3208
3209         dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
3210
3211         killit = destroy_disp_ok(disp);
3212         UNLOCK(&disp->lock);
3213         if (killit)
3214                 isc_task_send(disp->task[0], &disp->ctlevent);
3215 }
3216
3217 isc_result_t
3218 dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3219                           isc_task_t *task, isc_taskaction_t action, void *arg,
3220                           dns_messageid_t *idp, dns_dispentry_t **resp,
3221                           isc_socketmgr_t *sockmgr)
3222 {
3223         dns_dispentry_t *res;
3224         unsigned int bucket;
3225         in_port_t localport = 0;
3226         dns_messageid_t id;
3227         int i;
3228         isc_boolean_t ok;
3229         dns_qid_t *qid;
3230         dispsocket_t *dispsocket = NULL;
3231         isc_result_t result;
3232
3233         REQUIRE(VALID_DISPATCH(disp));
3234         REQUIRE(task != NULL);
3235         REQUIRE(dest != NULL);
3236         REQUIRE(resp != NULL && *resp == NULL);
3237         REQUIRE(idp != NULL);
3238         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3239                 REQUIRE(sockmgr != NULL);
3240
3241         LOCK(&disp->lock);
3242
3243         if (disp->shutting_down == 1) {
3244                 UNLOCK(&disp->lock);
3245                 return (ISC_R_SHUTTINGDOWN);
3246         }
3247
3248         if (disp->requests >= disp->maxrequests) {
3249                 UNLOCK(&disp->lock);
3250                 return (ISC_R_QUOTA);
3251         }
3252
3253         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
3254             disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
3255                 dispsocket_t *oldestsocket;
3256                 dns_dispentry_t *oldestresp;
3257                 dns_dispatchevent_t *rev;
3258
3259                 /*
3260                  * Kill oldest outstanding query if the number of sockets
3261                  * exceeds the quota to keep the room for new queries.
3262                  */
3263                 oldestsocket = ISC_LIST_HEAD(disp->activesockets);
3264                 oldestresp = oldestsocket->resp;
3265                 if (oldestresp != NULL && !oldestresp->item_out) {
3266                         rev = allocate_devent(oldestresp->disp);
3267                         if (rev != NULL) {
3268                                 rev->buffer.base = NULL;
3269                                 rev->result = ISC_R_CANCELED;
3270                                 rev->id = oldestresp->id;
3271                                 ISC_EVENT_INIT(rev, sizeof(*rev), 0,
3272                                                NULL, DNS_EVENT_DISPATCH,
3273                                                oldestresp->action,
3274                                                oldestresp->arg, oldestresp,
3275                                                NULL, NULL);
3276                                 oldestresp->item_out = ISC_TRUE;
3277                                 isc_task_send(oldestresp->task,
3278                                               ISC_EVENT_PTR(&rev));
3279                                 inc_stats(disp->mgr,
3280                                           dns_resstatscounter_dispabort);
3281                         }
3282                 }
3283
3284                 /*
3285                  * Move this entry to the tail so that it won't (easily) be
3286                  * examined before actually being canceled.
3287                  */
3288                 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3289                 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3290         }
3291
3292         qid = DNS_QID(disp);
3293
3294         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3295                 /*
3296                  * Get a separate UDP socket with a random port number.
3297                  */
3298                 result = get_dispsocket(disp, dest, sockmgr, &dispsocket,
3299                                         &localport);
3300                 if (result != ISC_R_SUCCESS) {
3301                         UNLOCK(&disp->lock);
3302                         inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3303                         return (result);
3304                 }
3305         } else {
3306                 localport = disp->localport;
3307         }
3308
3309         /*
3310          * Try somewhat hard to find an unique ID.
3311          */
3312         LOCK(&qid->lock);
3313         id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp));
3314         bucket = dns_hash(qid, dest, id, localport);
3315         ok = ISC_FALSE;
3316         for (i = 0; i < 64; i++) {
3317                 if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3318                         ok = ISC_TRUE;
3319                         break;
3320                 }
3321                 id += qid->qid_increment;
3322                 id &= 0x0000ffff;
3323                 bucket = dns_hash(qid, dest, id, localport);
3324         }
3325         UNLOCK(&qid->lock);
3326
3327         if (!ok) {
3328                 UNLOCK(&disp->lock);
3329                 return (ISC_R_NOMORE);
3330         }
3331
3332         res = isc_mempool_get(disp->mgr->rpool);
3333         if (res == NULL) {
3334                 UNLOCK(&disp->lock);
3335                 if (dispsocket != NULL)
3336                         destroy_dispsocket(disp, &dispsocket);
3337                 return (ISC_R_NOMEMORY);
3338         }
3339
3340         disp->refcount++;
3341         disp->requests++;
3342         res->task = NULL;
3343         isc_task_attach(task, &res->task);
3344         res->disp = disp;
3345         res->id = id;
3346         res->port = localport;
3347         res->bucket = bucket;
3348         res->host = *dest;
3349         res->action = action;
3350         res->arg = arg;
3351         res->dispsocket = dispsocket;
3352         if (dispsocket != NULL)
3353                 dispsocket->resp = res;
3354         res->item_out = ISC_FALSE;
3355         ISC_LIST_INIT(res->items);
3356         ISC_LINK_INIT(res, link);
3357         res->magic = RESPONSE_MAGIC;
3358
3359         LOCK(&qid->lock);
3360         ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3361         UNLOCK(&qid->lock);
3362
3363         request_log(disp, res, LVL(90),
3364                     "attached to task %p", res->task);
3365
3366         if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3367             ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3368                 result = startrecv(disp, dispsocket);
3369                 if (result != ISC_R_SUCCESS) {
3370                         LOCK(&qid->lock);
3371                         ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3372                         UNLOCK(&qid->lock);
3373
3374                         if (dispsocket != NULL)
3375                                 destroy_dispsocket(disp, &dispsocket);
3376
3377                         disp->refcount--;
3378                         disp->requests--;
3379
3380                         UNLOCK(&disp->lock);
3381                         isc_task_detach(&res->task);
3382                         isc_mempool_put(disp->mgr->rpool, res);
3383                         return (result);
3384                 }
3385         }
3386
3387         if (dispsocket != NULL)
3388                 ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3389
3390         UNLOCK(&disp->lock);
3391
3392         *idp = id;
3393         *resp = res;
3394
3395         if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3396                 INSIST(res->dispsocket != NULL);
3397
3398         return (ISC_R_SUCCESS);
3399 }
3400
3401 isc_result_t
3402 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3403                          isc_task_t *task, isc_taskaction_t action, void *arg,
3404                          dns_messageid_t *idp, dns_dispentry_t **resp)
3405 {
3406         REQUIRE(VALID_DISPATCH(disp));
3407         REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3408
3409         return (dns_dispatch_addresponse2(disp, dest, task, action, arg,
3410                                           idp, resp, NULL));
3411 }
3412
3413 void
3414 dns_dispatch_starttcp(dns_dispatch_t *disp) {
3415
3416         REQUIRE(VALID_DISPATCH(disp));
3417
3418         dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3419
3420         LOCK(&disp->lock);
3421         disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3422         (void)startrecv(disp, NULL);
3423         UNLOCK(&disp->lock);
3424 }
3425
3426 void
3427 dns_dispatch_removeresponse(dns_dispentry_t **resp,
3428                             dns_dispatchevent_t **sockevent)
3429 {
3430         dns_dispatchmgr_t *mgr;
3431         dns_dispatch_t *disp;
3432         dns_dispentry_t *res;
3433         dispsocket_t *dispsock;
3434         dns_dispatchevent_t *ev;
3435         unsigned int bucket;
3436         isc_boolean_t killit;
3437         unsigned int n;
3438         isc_eventlist_t events;
3439         dns_qid_t *qid;
3440
3441         REQUIRE(resp != NULL);
3442         REQUIRE(VALID_RESPONSE(*resp));
3443
3444         res = *resp;
3445         *resp = NULL;
3446
3447         disp = res->disp;
3448         REQUIRE(VALID_DISPATCH(disp));
3449         mgr = disp->mgr;
3450         REQUIRE(VALID_DISPATCHMGR(mgr));
3451
3452         qid = DNS_QID(disp);
3453
3454         if (sockevent != NULL) {
3455                 REQUIRE(*sockevent != NULL);
3456                 ev = *sockevent;
3457                 *sockevent = NULL;
3458         } else {
3459                 ev = NULL;
3460         }
3461
3462         LOCK(&disp->lock);
3463
3464         INSIST(disp->requests > 0);
3465         disp->requests--;
3466         INSIST(disp->refcount > 0);
3467         disp->refcount--;
3468         if (disp->refcount == 0) {
3469                 if (disp->recv_pending > 0)
3470                         isc_socket_cancel(disp->socket, disp->task[0],
3471                                           ISC_SOCKCANCEL_RECV);
3472                 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3473                      dispsock != NULL;
3474                      dispsock = ISC_LIST_NEXT(dispsock, link)) {
3475                         isc_socket_cancel(dispsock->socket, dispsock->task,
3476                                           ISC_SOCKCANCEL_RECV);
3477                 }
3478                 disp->shutting_down = 1;
3479         }
3480
3481         bucket = res->bucket;
3482
3483         LOCK(&qid->lock);
3484         ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3485         UNLOCK(&qid->lock);
3486
3487         if (ev == NULL && res->item_out) {
3488                 /*
3489                  * We've posted our event, but the caller hasn't gotten it
3490                  * yet.  Take it back.
3491                  */
3492                 ISC_LIST_INIT(events);
3493                 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3494                                     NULL, &events);
3495                 /*
3496                  * We had better have gotten it back.
3497                  */
3498                 INSIST(n == 1);
3499                 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3500         }
3501
3502         if (ev != NULL) {
3503                 REQUIRE(res->item_out == ISC_TRUE);
3504                 res->item_out = ISC_FALSE;
3505                 if (ev->buffer.base != NULL)
3506                         free_buffer(disp, ev->buffer.base, ev->buffer.length);
3507                 free_devent(disp, ev);
3508         }
3509
3510         request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3511         isc_task_detach(&res->task);
3512
3513         if (res->dispsocket != NULL) {
3514                 isc_socket_cancel(res->dispsocket->socket,
3515                                   res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3516                 res->dispsocket->resp = NULL;
3517         }
3518
3519         /*
3520          * Free any buffered requests as well
3521          */
3522         ev = ISC_LIST_HEAD(res->items);
3523         while (ev != NULL) {
3524                 ISC_LIST_UNLINK(res->items, ev, ev_link);
3525                 if (ev->buffer.base != NULL)
3526                         free_buffer(disp, ev->buffer.base, ev->buffer.length);
3527                 free_devent(disp, ev);
3528                 ev = ISC_LIST_HEAD(res->items);
3529         }
3530         res->magic = 0;
3531         isc_mempool_put(disp->mgr->rpool, res);
3532         if (disp->shutting_down == 1)
3533                 do_cancel(disp);
3534         else
3535                 (void)startrecv(disp, NULL);
3536
3537         killit = destroy_disp_ok(disp);
3538         UNLOCK(&disp->lock);
3539         if (killit)
3540                 isc_task_send(disp->task[0], &disp->ctlevent);
3541 }
3542
3543 static void
3544 do_cancel(dns_dispatch_t *disp) {
3545         dns_dispatchevent_t *ev;
3546         dns_dispentry_t *resp;
3547         dns_qid_t *qid;
3548
3549         if (disp->shutdown_out == 1)
3550                 return;
3551
3552         qid = DNS_QID(disp);
3553
3554         /*
3555          * Search for the first response handler without packets outstanding
3556          * unless a specific hander is given.
3557          */
3558         LOCK(&qid->lock);
3559         for (resp = linear_first(qid);
3560              resp != NULL && resp->item_out;
3561              /* Empty. */)
3562                 resp = linear_next(qid, resp);
3563
3564         /*
3565          * No one to send the cancel event to, so nothing to do.
3566          */
3567         if (resp == NULL)
3568                 goto unlock;
3569
3570         /*
3571          * Send the shutdown failsafe event to this resp.
3572          */
3573         ev = disp->failsafe_ev;
3574         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3575                        resp->action, resp->arg, resp, NULL, NULL);
3576         ev->result = disp->shutdown_why;
3577         ev->buffer.base = NULL;
3578         ev->buffer.length = 0;
3579         disp->shutdown_out = 1;
3580         request_log(disp, resp, LVL(10),
3581                     "cancel: failsafe event %p -> task %p",
3582                     ev, resp->task);
3583         resp->item_out = ISC_TRUE;
3584         isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3585  unlock:
3586         UNLOCK(&qid->lock);
3587 }
3588
3589 isc_socket_t *
3590 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3591         REQUIRE(VALID_DISPATCH(disp));
3592
3593         return (disp->socket);
3594 }
3595
3596 isc_socket_t *
3597 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3598         REQUIRE(VALID_RESPONSE(resp));
3599
3600         if (resp->dispsocket != NULL)
3601                 return (resp->dispsocket->socket);
3602         else
3603                 return (NULL);
3604 }
3605
3606 isc_result_t
3607 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3608
3609         REQUIRE(VALID_DISPATCH(disp));
3610         REQUIRE(addrp != NULL);
3611
3612         if (disp->socktype == isc_sockettype_udp) {
3613                 *addrp = disp->local;
3614                 return (ISC_R_SUCCESS);
3615         }
3616         return (ISC_R_NOTIMPLEMENTED);
3617 }
3618
3619 void
3620 dns_dispatch_cancel(dns_dispatch_t *disp) {
3621         REQUIRE(VALID_DISPATCH(disp));
3622
3623         LOCK(&disp->lock);
3624
3625         if (disp->shutting_down == 1) {
3626                 UNLOCK(&disp->lock);
3627                 return;
3628         }
3629
3630         disp->shutdown_why = ISC_R_CANCELED;
3631         disp->shutting_down = 1;
3632         do_cancel(disp);
3633
3634         UNLOCK(&disp->lock);
3635
3636         return;
3637 }
3638
3639 unsigned int
3640 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3641         REQUIRE(VALID_DISPATCH(disp));
3642
3643         /*
3644          * We don't bother locking disp here; it's the caller's responsibility
3645          * to use only non volatile flags.
3646          */
3647         return (disp->attributes);
3648 }
3649
3650 void
3651 dns_dispatch_changeattributes(dns_dispatch_t *disp,
3652                               unsigned int attributes, unsigned int mask)
3653 {
3654         REQUIRE(VALID_DISPATCH(disp));
3655         /* Exclusive attribute can only be set on creation */
3656         REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3657         /* Also, a dispatch with randomport specified cannot start listening */
3658         REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3659                 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3660
3661         /* XXXMLG
3662          * Should check for valid attributes here!
3663          */
3664
3665         LOCK(&disp->lock);
3666
3667         if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3668                 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3669                     (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3670                         disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3671                         (void)startrecv(disp, NULL);
3672                 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3673                            == 0 &&
3674                            (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3675                         disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3676                         if (disp->recv_pending != 0)
3677                                 isc_socket_cancel(disp->socket, disp->task[0],
3678                                                   ISC_SOCKCANCEL_RECV);
3679                 }
3680         }
3681
3682         disp->attributes &= ~mask;
3683         disp->attributes |= (attributes & mask);
3684         UNLOCK(&disp->lock);
3685 }
3686
3687 void
3688 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3689         void *buf;
3690         isc_socketevent_t *sevent, *newsevent;
3691
3692         REQUIRE(VALID_DISPATCH(disp));
3693         REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3694         REQUIRE(event != NULL);
3695
3696         sevent = (isc_socketevent_t *)event;
3697
3698         INSIST(sevent->n <= disp->mgr->buffersize);
3699         newsevent = (isc_socketevent_t *)
3700                     isc_event_allocate(disp->mgr->mctx, NULL,
3701                                       DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3702                                       disp, sizeof(isc_socketevent_t));
3703         if (newsevent == NULL)
3704                 return;
3705
3706         buf = allocate_udp_buffer(disp);
3707         if (buf == NULL) {
3708                 isc_event_free(ISC_EVENT_PTR(&newsevent));
3709                 return;
3710         }
3711         memcpy(buf, sevent->region.base, sevent->n);
3712         newsevent->region.base = buf;
3713         newsevent->region.length = disp->mgr->buffersize;
3714         newsevent->n = sevent->n;
3715         newsevent->result = sevent->result;
3716         newsevent->address = sevent->address;
3717         newsevent->timestamp = sevent->timestamp;
3718         newsevent->pktinfo = sevent->pktinfo;
3719         newsevent->attributes = sevent->attributes;
3720
3721         isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3722 }
3723
3724 dns_dispatch_t *
3725 dns_dispatchset_get(dns_dispatchset_t *dset) {
3726         dns_dispatch_t *disp;
3727
3728         /* check that dispatch set is configured */
3729         if (dset == NULL || dset->ndisp == 0)
3730                 return (NULL);
3731
3732         LOCK(&dset->lock);
3733         disp = dset->dispatches[dset->cur];
3734         dset->cur++;
3735         if (dset->cur == dset->ndisp)
3736                 dset->cur = 0;
3737         UNLOCK(&dset->lock);
3738
3739         return (disp);
3740 }
3741
3742 isc_result_t
3743 dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr,
3744                        isc_taskmgr_t *taskmgr, dns_dispatch_t *source,
3745                        dns_dispatchset_t **dsetp, int n)
3746 {
3747         isc_result_t result;
3748         dns_dispatchset_t *dset;
3749         dns_dispatchmgr_t *mgr;
3750         int i, j;
3751
3752         REQUIRE(VALID_DISPATCH(source));
3753         REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0);
3754         REQUIRE(dsetp != NULL && *dsetp == NULL);
3755
3756         mgr = source->mgr;
3757
3758         dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t));
3759         if (dset == NULL)
3760                 return (ISC_R_NOMEMORY);
3761         memset(dset, 0, sizeof(*dset));
3762
3763         result = isc_mutex_init(&dset->lock);
3764         if (result != ISC_R_SUCCESS)
3765                 goto fail_alloc;
3766
3767         dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n);
3768         if (dset == NULL) {
3769                 result = ISC_R_NOMEMORY;
3770                 goto fail_lock;
3771         }
3772
3773         isc_mem_attach(mctx, &dset->mctx);
3774         dset->ndisp = n;
3775         dset->cur = 0;
3776
3777         dset->dispatches[0] = NULL;
3778         dns_dispatch_attach(source, &dset->dispatches[0]);
3779
3780         LOCK(&mgr->lock);
3781         for (i = 1; i < n; i++) {
3782                 dset->dispatches[i] = NULL;
3783                 result = dispatch_createudp(mgr, sockmgr, taskmgr,
3784                                             &source->local,
3785                                             source->maxrequests,
3786                                             source->attributes,
3787                                             &dset->dispatches[i],
3788                                             source->socket);
3789                 if (result != ISC_R_SUCCESS)
3790                         goto fail;
3791         }
3792
3793         UNLOCK(&mgr->lock);
3794         *dsetp = dset;
3795
3796         return (ISC_R_SUCCESS);
3797
3798  fail:
3799         UNLOCK(&mgr->lock);
3800
3801         for (j = 0; j < i; j++)
3802                 dns_dispatch_detach(&(dset->dispatches[j]));
3803         isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n);
3804         if (dset->mctx == mctx)
3805                 isc_mem_detach(&dset->mctx);
3806
3807  fail_lock:
3808         DESTROYLOCK(&dset->lock);
3809
3810  fail_alloc:
3811         isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t));
3812         return (result);
3813 }
3814
3815 void
3816 dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) {
3817         int i;
3818
3819         REQUIRE(dset != NULL);
3820
3821         for (i = 0; i < dset->ndisp; i++) {
3822                 isc_socket_t *sock;
3823                 sock = dns_dispatch_getsocket(dset->dispatches[i]);
3824                 isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL);
3825         }
3826 }
3827
3828 void
3829 dns_dispatchset_destroy(dns_dispatchset_t **dsetp) {
3830         dns_dispatchset_t *dset;
3831         int i;
3832
3833         REQUIRE(dsetp != NULL && *dsetp != NULL);
3834
3835         dset = *dsetp;
3836         for (i = 0; i < dset->ndisp; i++)
3837                 dns_dispatch_detach(&(dset->dispatches[i]));
3838         isc_mem_put(dset->mctx, dset->dispatches,
3839                     sizeof(dns_dispatch_t *) * dset->ndisp);
3840         DESTROYLOCK(&dset->lock);
3841         isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t));
3842
3843         *dsetp = NULL;
3844 }
3845
3846 #if 0
3847 void
3848 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3849         dns_dispatch_t *disp;
3850         char foo[1024];
3851
3852         disp = ISC_LIST_HEAD(mgr->list);
3853         while (disp != NULL) {
3854                 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3855                 printf("\tdispatch %p, addr %s\n", disp, foo);
3856                 disp = ISC_LIST_NEXT(disp, link);
3857         }
3858 }
3859 #endif