]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - services/mesh.c
import unbound 1.4.17
[FreeBSD/FreeBSD.git] / services / mesh.c
1 /*
2  * services/mesh.c - deal with mesh of query states and handle events for that.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains functions to assist in dealing with a mesh of
40  * query states. This mesh is supposed to be thread-specific.
41  * It consists of query states (per qname, qtype, qclass) and connections
42  * between query states and the super and subquery states, and replies to
43  * send back to clients.
44  */
45 #include "config.h"
46 #include <ldns/wire2host.h>
47 #include "services/mesh.h"
48 #include "services/outbound_list.h"
49 #include "services/cache/dns.h"
50 #include "util/log.h"
51 #include "util/net_help.h"
52 #include "util/module.h"
53 #include "util/regional.h"
54 #include "util/data/msgencode.h"
55 #include "util/timehist.h"
56 #include "util/fptr_wlist.h"
57 #include "util/alloc.h"
58 #include "util/config_file.h"
59
60 /** subtract timers and the values do not overflow or become negative */
61 static void
62 timeval_subtract(struct timeval* d, const struct timeval* end, const struct timeval* start)
63 {
64 #ifndef S_SPLINT_S
65         time_t end_usec = end->tv_usec;
66         d->tv_sec = end->tv_sec - start->tv_sec;
67         if(end_usec < start->tv_usec) {
68                 end_usec += 1000000;
69                 d->tv_sec--;
70         }
71         d->tv_usec = end_usec - start->tv_usec;
72 #endif
73 }
74
75 /** add timers and the values do not overflow or become negative */
76 static void
77 timeval_add(struct timeval* d, const struct timeval* add)
78 {
79 #ifndef S_SPLINT_S
80         d->tv_sec += add->tv_sec;
81         d->tv_usec += add->tv_usec;
82         if(d->tv_usec > 1000000 ) {
83                 d->tv_usec -= 1000000;
84                 d->tv_sec++;
85         }
86 #endif
87 }
88
89 /** divide sum of timers to get average */
90 static void
91 timeval_divide(struct timeval* avg, const struct timeval* sum, size_t d)
92 {
93 #ifndef S_SPLINT_S
94         size_t leftover;
95         if(d == 0) {
96                 avg->tv_sec = 0;
97                 avg->tv_usec = 0;
98                 return;
99         }
100         avg->tv_sec = sum->tv_sec / d;
101         avg->tv_usec = sum->tv_usec / d;
102         /* handle fraction from seconds divide */
103         leftover = sum->tv_sec - avg->tv_sec*d;
104         avg->tv_usec += (leftover*1000000)/d;
105 #endif
106 }
107
108 /** histogram compare of time values */
109 static int
110 timeval_smaller(const struct timeval* x, const struct timeval* y)
111 {
112 #ifndef S_SPLINT_S
113         if(x->tv_sec < y->tv_sec)
114                 return 1;
115         else if(x->tv_sec == y->tv_sec) {
116                 if(x->tv_usec <= y->tv_usec)
117                         return 1;
118                 else    return 0;
119         }
120         else    return 0;
121 #endif
122 }
123
124 int
125 mesh_state_compare(const void* ap, const void* bp)
126 {
127         struct mesh_state* a = (struct mesh_state*)ap;
128         struct mesh_state* b = (struct mesh_state*)bp;
129
130         if(a->s.is_priming && !b->s.is_priming)
131                 return -1;
132         if(!a->s.is_priming && b->s.is_priming)
133                 return 1;
134
135         if((a->s.query_flags&BIT_RD) && !(b->s.query_flags&BIT_RD))
136                 return -1;
137         if(!(a->s.query_flags&BIT_RD) && (b->s.query_flags&BIT_RD))
138                 return 1;
139
140         if((a->s.query_flags&BIT_CD) && !(b->s.query_flags&BIT_CD))
141                 return -1;
142         if(!(a->s.query_flags&BIT_CD) && (b->s.query_flags&BIT_CD))
143                 return 1;
144
145         return query_info_compare(&a->s.qinfo, &b->s.qinfo);
146 }
147
148 int
149 mesh_state_ref_compare(const void* ap, const void* bp)
150 {
151         struct mesh_state_ref* a = (struct mesh_state_ref*)ap;
152         struct mesh_state_ref* b = (struct mesh_state_ref*)bp;
153         return mesh_state_compare(a->s, b->s);
154 }
155
156 struct mesh_area* 
157 mesh_create(struct module_stack* stack, struct module_env* env)
158 {
159         struct mesh_area* mesh = calloc(1, sizeof(struct mesh_area));
160         if(!mesh) {
161                 log_err("mesh area alloc: out of memory");
162                 return NULL;
163         }
164         mesh->histogram = timehist_setup();
165         mesh->qbuf_bak = ldns_buffer_new(env->cfg->msg_buffer_size);
166         if(!mesh->histogram || !mesh->qbuf_bak) {
167                 free(mesh);
168                 log_err("mesh area alloc: out of memory");
169                 return NULL;
170         }
171         mesh->mods = *stack;
172         mesh->env = env;
173         rbtree_init(&mesh->run, &mesh_state_compare);
174         rbtree_init(&mesh->all, &mesh_state_compare);
175         mesh->num_reply_addrs = 0;
176         mesh->num_reply_states = 0;
177         mesh->num_detached_states = 0;
178         mesh->num_forever_states = 0;
179         mesh->stats_jostled = 0;
180         mesh->stats_dropped = 0;
181         mesh->max_reply_states = env->cfg->num_queries_per_thread;
182         mesh->max_forever_states = (mesh->max_reply_states+1)/2;
183 #ifndef S_SPLINT_S
184         mesh->jostle_max.tv_sec = (time_t)(env->cfg->jostle_time / 1000);
185         mesh->jostle_max.tv_usec = (time_t)((env->cfg->jostle_time % 1000)
186                 *1000);
187 #endif
188         return mesh;
189 }
190
191 /** help mesh delete delete mesh states */
192 static void
193 mesh_delete_helper(rbnode_t* n)
194 {
195         struct mesh_state* mstate = (struct mesh_state*)n->key;
196         /* perform a full delete, not only 'cleanup' routine,
197          * because other callbacks expect a clean state in the mesh.
198          * For 're-entrant' calls */
199         mesh_state_delete(&mstate->s);
200         /* but because these delete the items from the tree, postorder
201          * traversal and rbtree rebalancing do not work together */
202 }
203
204 void 
205 mesh_delete(struct mesh_area* mesh)
206 {
207         if(!mesh)
208                 return;
209         /* free all query states */
210         while(mesh->all.count)
211                 mesh_delete_helper(mesh->all.root);
212         timehist_delete(mesh->histogram);
213         ldns_buffer_free(mesh->qbuf_bak);
214         free(mesh);
215 }
216
217 void
218 mesh_delete_all(struct mesh_area* mesh)
219 {
220         /* free all query states */
221         while(mesh->all.count)
222                 mesh_delete_helper(mesh->all.root);
223         mesh->stats_dropped += mesh->num_reply_addrs;
224         /* clear mesh area references */
225         rbtree_init(&mesh->run, &mesh_state_compare);
226         rbtree_init(&mesh->all, &mesh_state_compare);
227         mesh->num_reply_addrs = 0;
228         mesh->num_reply_states = 0;
229         mesh->num_detached_states = 0;
230         mesh->num_forever_states = 0;
231         mesh->forever_first = NULL;
232         mesh->forever_last = NULL;
233         mesh->jostle_first = NULL;
234         mesh->jostle_last = NULL;
235 }
236
237 int mesh_make_new_space(struct mesh_area* mesh, ldns_buffer* qbuf)
238 {
239         struct mesh_state* m = mesh->jostle_first;
240         /* free space is available */
241         if(mesh->num_reply_states < mesh->max_reply_states)
242                 return 1;
243         /* try to kick out a jostle-list item */
244         if(m && m->reply_list && m->list_select == mesh_jostle_list) {
245                 /* how old is it? */
246                 struct timeval age;
247                 timeval_subtract(&age, mesh->env->now_tv, 
248                         &m->reply_list->start_time);
249                 if(timeval_smaller(&mesh->jostle_max, &age)) {
250                         /* its a goner */
251                         log_nametypeclass(VERB_ALGO, "query jostled out to "
252                                 "make space for a new one",
253                                 m->s.qinfo.qname, m->s.qinfo.qtype,
254                                 m->s.qinfo.qclass);
255                         /* backup the query */
256                         if(qbuf) ldns_buffer_copy(mesh->qbuf_bak, qbuf);
257                         /* notify supers */
258                         if(m->super_set.count > 0) {
259                                 verbose(VERB_ALGO, "notify supers of failure");
260                                 m->s.return_msg = NULL;
261                                 m->s.return_rcode = LDNS_RCODE_SERVFAIL;
262                                 mesh_walk_supers(mesh, m);
263                         }
264                         mesh->stats_jostled ++;
265                         mesh_state_delete(&m->s);
266                         /* restore the query - note that the qinfo ptr to
267                          * the querybuffer is then correct again. */
268                         if(qbuf) ldns_buffer_copy(qbuf, mesh->qbuf_bak);
269                         return 1;
270                 }
271         }
272         /* no space for new item */
273         return 0;
274 }
275
276 void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo,
277         uint16_t qflags, struct edns_data* edns, struct comm_reply* rep,
278         uint16_t qid)
279 {
280         /* do not use CD flag from user for mesh state, we want the CD-query
281          * to receive validation anyway, to protect out cache contents and
282          * avoid bad-data in this cache that a downstream validator cannot
283          * remove from this cache */
284         struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&BIT_RD, 0);
285         int was_detached = 0;
286         int was_noreply = 0;
287         int added = 0;
288         /* does this create a new reply state? */
289         if(!s || s->list_select == mesh_no_list) {
290                 if(!mesh_make_new_space(mesh, rep->c->buffer)) {
291                         verbose(VERB_ALGO, "Too many queries. dropping "
292                                 "incoming query.");
293                         comm_point_drop_reply(rep);
294                         mesh->stats_dropped ++;
295                         return;
296                 }
297                 /* for this new reply state, the reply address is free,
298                  * so the limit of reply addresses does not stop reply states*/
299         } else {
300                 /* protect our memory usage from storing reply addresses */
301                 if(mesh->num_reply_addrs > mesh->max_reply_states*16) {
302                         verbose(VERB_ALGO, "Too many requests queued. "
303                                 "dropping incoming query.");
304                         mesh->stats_dropped++;
305                         comm_point_drop_reply(rep);
306                         return;
307                 }
308         }
309         /* see if it already exists, if not, create one */
310         if(!s) {
311 #ifdef UNBOUND_DEBUG
312                 struct rbnode_t* n;
313 #endif
314                 s = mesh_state_create(mesh->env, qinfo, qflags&BIT_RD, 0);
315                 if(!s) {
316                         log_err("mesh_state_create: out of memory; SERVFAIL");
317                         error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL,
318                                 qinfo, qid, qflags, edns);
319                         comm_point_send_reply(rep);
320                         return;
321                 }
322 #ifdef UNBOUND_DEBUG
323                 n =
324 #endif
325                 rbtree_insert(&mesh->all, &s->node);
326                 log_assert(n != NULL);
327                 /* set detached (it is now) */
328                 mesh->num_detached_states++;
329                 added = 1;
330         }
331         if(!s->reply_list && !s->cb_list && s->super_set.count == 0)
332                 was_detached = 1;
333         if(!s->reply_list && !s->cb_list)
334                 was_noreply = 1;
335         /* add reply to s */
336         if(!mesh_state_add_reply(s, edns, rep, qid, qflags, qinfo->qname)) {
337                         log_err("mesh_new_client: out of memory; SERVFAIL");
338                         error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL,
339                                 qinfo, qid, qflags, edns);
340                         comm_point_send_reply(rep);
341                         if(added)
342                                 mesh_state_delete(&s->s);
343                         return;
344         }
345         /* update statistics */
346         if(was_detached) {
347                 log_assert(mesh->num_detached_states > 0);
348                 mesh->num_detached_states--;
349         }
350         if(was_noreply) {
351                 mesh->num_reply_states ++;
352         }
353         mesh->num_reply_addrs++;
354         if(s->list_select == mesh_no_list) {
355                 /* move to either the forever or the jostle_list */
356                 if(mesh->num_forever_states < mesh->max_forever_states) {
357                         mesh->num_forever_states ++;
358                         mesh_list_insert(s, &mesh->forever_first, 
359                                 &mesh->forever_last);
360                         s->list_select = mesh_forever_list;
361                 } else {
362                         mesh_list_insert(s, &mesh->jostle_first, 
363                                 &mesh->jostle_last);
364                         s->list_select = mesh_jostle_list;
365                 }
366         }
367         if(added)
368                 mesh_run(mesh, s, module_event_new, NULL);
369 }
370
371 int 
372 mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo,
373         uint16_t qflags, struct edns_data* edns, ldns_buffer* buf, 
374         uint16_t qid, mesh_cb_func_t cb, void* cb_arg)
375 {
376         struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&BIT_RD, 0);
377         int was_detached = 0;
378         int was_noreply = 0;
379         int added = 0;
380         /* there are no limits on the number of callbacks */
381
382         /* see if it already exists, if not, create one */
383         if(!s) {
384 #ifdef UNBOUND_DEBUG
385                 struct rbnode_t* n;
386 #endif
387                 s = mesh_state_create(mesh->env, qinfo, qflags&BIT_RD, 0);
388                 if(!s) {
389                         return 0;
390                 }
391 #ifdef UNBOUND_DEBUG
392                 n =
393 #endif
394                 rbtree_insert(&mesh->all, &s->node);
395                 log_assert(n != NULL);
396                 /* set detached (it is now) */
397                 mesh->num_detached_states++;
398                 added = 1;
399         }
400         if(!s->reply_list && !s->cb_list && s->super_set.count == 0)
401                 was_detached = 1;
402         if(!s->reply_list && !s->cb_list)
403                 was_noreply = 1;
404         /* add reply to s */
405         if(!mesh_state_add_cb(s, edns, buf, cb, cb_arg, qid, qflags)) {
406                         if(added)
407                                 mesh_state_delete(&s->s);
408                         return 0;
409         }
410         /* update statistics */
411         if(was_detached) {
412                 log_assert(mesh->num_detached_states > 0);
413                 mesh->num_detached_states--;
414         }
415         if(was_noreply) {
416                 mesh->num_reply_states ++;
417         }
418         mesh->num_reply_addrs++;
419         if(added)
420                 mesh_run(mesh, s, module_event_new, NULL);
421         return 1;
422 }
423
424 void mesh_new_prefetch(struct mesh_area* mesh, struct query_info* qinfo,
425         uint16_t qflags, uint32_t leeway)
426 {
427         struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&BIT_RD, 0);
428 #ifdef UNBOUND_DEBUG
429         struct rbnode_t* n;
430 #endif
431         /* already exists, and for a different purpose perhaps.
432          * if mesh_no_list, keep it that way. */
433         if(s) {
434                 /* make it ignore the cache from now on */
435                 if(!s->s.blacklist)
436                         sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region);
437                 if(s->s.prefetch_leeway < leeway)
438                         s->s.prefetch_leeway = leeway;
439                 return;
440         }
441         if(!mesh_make_new_space(mesh, NULL)) {
442                 verbose(VERB_ALGO, "Too many queries. dropped prefetch.");
443                 mesh->stats_dropped ++;
444                 return;
445         }
446         s = mesh_state_create(mesh->env, qinfo, qflags&BIT_RD, 0);
447         if(!s) {
448                 log_err("prefetch mesh_state_create: out of memory");
449                 return;
450         }
451 #ifdef UNBOUND_DEBUG
452         n =
453 #endif
454         rbtree_insert(&mesh->all, &s->node);
455         log_assert(n != NULL);
456         /* set detached (it is now) */
457         mesh->num_detached_states++;
458         /* make it ignore the cache */
459         sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region);
460         s->s.prefetch_leeway = leeway;
461
462         if(s->list_select == mesh_no_list) {
463                 /* move to either the forever or the jostle_list */
464                 if(mesh->num_forever_states < mesh->max_forever_states) {
465                         mesh->num_forever_states ++;
466                         mesh_list_insert(s, &mesh->forever_first, 
467                                 &mesh->forever_last);
468                         s->list_select = mesh_forever_list;
469                 } else {
470                         mesh_list_insert(s, &mesh->jostle_first, 
471                                 &mesh->jostle_last);
472                         s->list_select = mesh_jostle_list;
473                 }
474         }
475         mesh_run(mesh, s, module_event_new, NULL);
476 }
477
478 void mesh_report_reply(struct mesh_area* mesh, struct outbound_entry* e,
479         struct comm_reply* reply, int what)
480 {
481         enum module_ev event = module_event_reply;
482         e->qstate->reply = reply;
483         if(what != NETEVENT_NOERROR) {
484                 event = module_event_noreply;
485                 if(what == NETEVENT_CAPSFAIL)
486                         event = module_event_capsfail;
487         }
488         mesh_run(mesh, e->qstate->mesh_info, event, e);
489 }
490
491 struct mesh_state* 
492 mesh_state_create(struct module_env* env, struct query_info* qinfo, 
493         uint16_t qflags, int prime)
494 {
495         struct regional* region = alloc_reg_obtain(env->alloc);
496         struct mesh_state* mstate;
497         int i;
498         if(!region)
499                 return NULL;
500         mstate = (struct mesh_state*)regional_alloc(region, 
501                 sizeof(struct mesh_state));
502         if(!mstate) {
503                 alloc_reg_release(env->alloc, region);
504                 return NULL;
505         }
506         memset(mstate, 0, sizeof(*mstate));
507         mstate->node = *RBTREE_NULL;
508         mstate->run_node = *RBTREE_NULL;
509         mstate->node.key = mstate;
510         mstate->run_node.key = mstate;
511         mstate->reply_list = NULL;
512         mstate->list_select = mesh_no_list;
513         mstate->replies_sent = 0;
514         rbtree_init(&mstate->super_set, &mesh_state_ref_compare);
515         rbtree_init(&mstate->sub_set, &mesh_state_ref_compare);
516         mstate->num_activated = 0;
517         /* init module qstate */
518         mstate->s.qinfo.qtype = qinfo->qtype;
519         mstate->s.qinfo.qclass = qinfo->qclass;
520         mstate->s.qinfo.qname_len = qinfo->qname_len;
521         mstate->s.qinfo.qname = regional_alloc_init(region, qinfo->qname,
522                 qinfo->qname_len);
523         if(!mstate->s.qinfo.qname) {
524                 alloc_reg_release(env->alloc, region);
525                 return NULL;
526         }
527         /* remove all weird bits from qflags */
528         mstate->s.query_flags = (qflags & (BIT_RD|BIT_CD));
529         mstate->s.is_priming = prime;
530         mstate->s.reply = NULL;
531         mstate->s.region = region;
532         mstate->s.curmod = 0;
533         mstate->s.return_msg = 0;
534         mstate->s.return_rcode = LDNS_RCODE_NOERROR;
535         mstate->s.env = env;
536         mstate->s.mesh_info = mstate;
537         mstate->s.prefetch_leeway = 0;
538         /* init modules */
539         for(i=0; i<env->mesh->mods.num; i++) {
540                 mstate->s.minfo[i] = NULL;
541                 mstate->s.ext_state[i] = module_state_initial;
542         }
543         return mstate;
544 }
545
546 void 
547 mesh_state_cleanup(struct mesh_state* mstate)
548 {
549         struct mesh_area* mesh;
550         int i;
551         if(!mstate)
552                 return;
553         mesh = mstate->s.env->mesh;
554         /* drop unsent replies */
555         if(!mstate->replies_sent) {
556                 struct mesh_reply* rep;
557                 struct mesh_cb* cb;
558                 for(rep=mstate->reply_list; rep; rep=rep->next) {
559                         comm_point_drop_reply(&rep->query_reply);
560                         mesh->num_reply_addrs--;
561                 }
562                 for(cb=mstate->cb_list; cb; cb=cb->next) {
563                         fptr_ok(fptr_whitelist_mesh_cb(cb->cb));
564                         (*cb->cb)(cb->cb_arg, LDNS_RCODE_SERVFAIL, NULL,
565                                 sec_status_unchecked, NULL);
566                         mesh->num_reply_addrs--;
567                 }
568         }
569
570         /* de-init modules */
571         for(i=0; i<mesh->mods.num; i++) {
572                 fptr_ok(fptr_whitelist_mod_clear(mesh->mods.mod[i]->clear));
573                 (*mesh->mods.mod[i]->clear)(&mstate->s, i);
574                 mstate->s.minfo[i] = NULL;
575                 mstate->s.ext_state[i] = module_finished;
576         }
577         alloc_reg_release(mstate->s.env->alloc, mstate->s.region);
578 }
579
580 void 
581 mesh_state_delete(struct module_qstate* qstate)
582 {
583         struct mesh_area* mesh;
584         struct mesh_state_ref* super, ref;
585         struct mesh_state* mstate;
586         if(!qstate)
587                 return;
588         mstate = qstate->mesh_info;
589         mesh = mstate->s.env->mesh;
590         mesh_detach_subs(&mstate->s);
591         if(mstate->list_select == mesh_forever_list) {
592                 mesh->num_forever_states --;
593                 mesh_list_remove(mstate, &mesh->forever_first, 
594                         &mesh->forever_last);
595         } else if(mstate->list_select == mesh_jostle_list) {
596                 mesh_list_remove(mstate, &mesh->jostle_first, 
597                         &mesh->jostle_last);
598         }
599         if(!mstate->reply_list && !mstate->cb_list
600                 && mstate->super_set.count == 0) {
601                 log_assert(mesh->num_detached_states > 0);
602                 mesh->num_detached_states--;
603         }
604         if(mstate->reply_list || mstate->cb_list) {
605                 log_assert(mesh->num_reply_states > 0);
606                 mesh->num_reply_states--;
607         }
608         ref.node.key = &ref;
609         ref.s = mstate;
610         RBTREE_FOR(super, struct mesh_state_ref*, &mstate->super_set) {
611                 (void)rbtree_delete(&super->s->sub_set, &ref);
612         }
613         (void)rbtree_delete(&mesh->run, mstate);
614         (void)rbtree_delete(&mesh->all, mstate);
615         mesh_state_cleanup(mstate);
616 }
617
618 /** helper recursive rbtree find routine */
619 static int
620 find_in_subsub(struct mesh_state* m, struct mesh_state* tofind, size_t *c)
621 {
622         struct mesh_state_ref* r;
623         if((*c)++ > MESH_MAX_SUBSUB)
624                 return 1;
625         RBTREE_FOR(r, struct mesh_state_ref*, &m->sub_set) {
626                 if(r->s == tofind || find_in_subsub(r->s, tofind, c))
627                         return 1;
628         }
629         return 0;
630 }
631
632 /** find cycle for already looked up mesh_state */
633 static int 
634 mesh_detect_cycle_found(struct module_qstate* qstate, struct mesh_state* dep_m)
635 {
636         struct mesh_state* cyc_m = qstate->mesh_info;
637         size_t counter = 0;
638         if(!dep_m)
639                 return 0;
640         if(dep_m == cyc_m || find_in_subsub(dep_m, cyc_m, &counter)) {
641                 if(counter > MESH_MAX_SUBSUB)
642                         return 2;
643                 return 1;
644         }
645         return 0;
646 }
647
648 void mesh_detach_subs(struct module_qstate* qstate)
649 {
650         struct mesh_area* mesh = qstate->env->mesh;
651         struct mesh_state_ref* ref, lookup;
652 #ifdef UNBOUND_DEBUG
653         struct rbnode_t* n;
654 #endif
655         lookup.node.key = &lookup;
656         lookup.s = qstate->mesh_info;
657         RBTREE_FOR(ref, struct mesh_state_ref*, &qstate->mesh_info->sub_set) {
658 #ifdef UNBOUND_DEBUG
659                 n =
660 #endif
661                 rbtree_delete(&ref->s->super_set, &lookup);
662                 log_assert(n != NULL); /* must have been present */
663                 if(!ref->s->reply_list && !ref->s->cb_list
664                         && ref->s->super_set.count == 0) {
665                         mesh->num_detached_states++;
666                         log_assert(mesh->num_detached_states + 
667                                 mesh->num_reply_states <= mesh->all.count);
668                 }
669         }
670         rbtree_init(&qstate->mesh_info->sub_set, &mesh_state_ref_compare);
671 }
672
673 int mesh_attach_sub(struct module_qstate* qstate, struct query_info* qinfo,
674         uint16_t qflags, int prime, struct module_qstate** newq)
675 {
676         /* find it, if not, create it */
677         struct mesh_area* mesh = qstate->env->mesh;
678         struct mesh_state* sub = mesh_area_find(mesh, qinfo, qflags, prime);
679         if(mesh_detect_cycle_found(qstate, sub)) {
680                 verbose(VERB_ALGO, "attach failed, cycle detected");
681                 return 0;
682         }
683         if(!sub) {
684 #ifdef UNBOUND_DEBUG
685                 struct rbnode_t* n;
686 #endif
687                 /* create a new one */
688                 sub = mesh_state_create(qstate->env, qinfo, qflags, prime);
689                 if(!sub) {
690                         log_err("mesh_attach_sub: out of memory");
691                         return 0;
692                 }
693 #ifdef UNBOUND_DEBUG
694                 n =
695 #endif
696                 rbtree_insert(&mesh->all, &sub->node);
697                 log_assert(n != NULL);
698                 /* set detached (it is now) */
699                 mesh->num_detached_states++;
700                 /* set new query state to run */
701 #ifdef UNBOUND_DEBUG
702                 n =
703 #endif
704                 rbtree_insert(&mesh->run, &sub->run_node);
705                 log_assert(n != NULL);
706                 *newq = &sub->s;
707         } else
708                 *newq = NULL;
709         if(!mesh_state_attachment(qstate->mesh_info, sub))
710                 return 0;
711         if(!sub->reply_list && !sub->cb_list && sub->super_set.count == 1) {
712                 /* it used to be detached, before this one got added */
713                 log_assert(mesh->num_detached_states > 0);
714                 mesh->num_detached_states--;
715         }
716         /* *newq will be run when inited after the current module stops */
717         return 1;
718 }
719
720 int mesh_state_attachment(struct mesh_state* super, struct mesh_state* sub)
721 {
722 #ifdef UNBOUND_DEBUG
723         struct rbnode_t* n;
724 #endif
725         struct mesh_state_ref* subref; /* points to sub, inserted in super */
726         struct mesh_state_ref* superref; /* points to super, inserted in sub */
727         if( !(subref = regional_alloc(super->s.region,
728                 sizeof(struct mesh_state_ref))) ||
729                 !(superref = regional_alloc(sub->s.region,
730                 sizeof(struct mesh_state_ref))) ) {
731                 log_err("mesh_state_attachment: out of memory");
732                 return 0;
733         }
734         superref->node.key = superref;
735         superref->s = super;
736         subref->node.key = subref;
737         subref->s = sub;
738 #ifdef UNBOUND_DEBUG
739         n =
740 #endif
741         rbtree_insert(&sub->super_set, &superref->node);
742         log_assert(n != NULL);
743 #ifdef UNBOUND_DEBUG
744         n =
745 #endif
746         rbtree_insert(&super->sub_set, &subref->node);
747         log_assert(n != NULL);
748         return 1;
749 }
750
751 /**
752  * callback results to mesh cb entry
753  * @param m: mesh state to send it for.
754  * @param rcode: if not 0, error code.
755  * @param rep: reply to send (or NULL if rcode is set).
756  * @param r: callback entry
757  */
758 static void
759 mesh_do_callback(struct mesh_state* m, int rcode, struct reply_info* rep,
760         struct mesh_cb* r)
761 {
762         int secure;
763         char* reason = NULL;
764         /* bogus messages are not made into servfail, sec_status passed 
765          * to the callback function */
766         if(rep && rep->security == sec_status_secure)
767                 secure = 1;
768         else    secure = 0;
769         if(!rep && rcode == LDNS_RCODE_NOERROR)
770                 rcode = LDNS_RCODE_SERVFAIL;
771         if(!rcode && rep->security == sec_status_bogus) {
772                 if(!(reason = errinf_to_str(&m->s)))
773                         rcode = LDNS_RCODE_SERVFAIL;
774         }
775         /* send the reply */
776         if(rcode) {
777                 fptr_ok(fptr_whitelist_mesh_cb(r->cb));
778                 (*r->cb)(r->cb_arg, rcode, r->buf, sec_status_unchecked, NULL);
779         } else {
780                 size_t udp_size = r->edns.udp_size;
781                 ldns_buffer_clear(r->buf);
782                 r->edns.edns_version = EDNS_ADVERTISED_VERSION;
783                 r->edns.udp_size = EDNS_ADVERTISED_SIZE;
784                 r->edns.ext_rcode = 0;
785                 r->edns.bits &= EDNS_DO;
786                 if(!reply_info_answer_encode(&m->s.qinfo, rep, r->qid, 
787                         r->qflags, r->buf, 0, 1, 
788                         m->s.env->scratch, udp_size, &r->edns, 
789                         (int)(r->edns.bits & EDNS_DO), secure)) 
790                 {
791                         fptr_ok(fptr_whitelist_mesh_cb(r->cb));
792                         (*r->cb)(r->cb_arg, LDNS_RCODE_SERVFAIL, r->buf,
793                                 sec_status_unchecked, NULL);
794                 } else {
795                         fptr_ok(fptr_whitelist_mesh_cb(r->cb));
796                         (*r->cb)(r->cb_arg, LDNS_RCODE_NOERROR, r->buf,
797                                 rep->security, reason);
798                 }
799         }
800         free(reason);
801         m->s.env->mesh->num_reply_addrs--;
802 }
803
804 /**
805  * Send reply to mesh reply entry
806  * @param m: mesh state to send it for.
807  * @param rcode: if not 0, error code.
808  * @param rep: reply to send (or NULL if rcode is set).
809  * @param r: reply entry
810  * @param prev: previous reply, already has its answer encoded in buffer.
811  */
812 static void
813 mesh_send_reply(struct mesh_state* m, int rcode, struct reply_info* rep,
814         struct mesh_reply* r, struct mesh_reply* prev)
815 {
816         struct timeval end_time;
817         struct timeval duration;
818         int secure;
819         /* examine security status */
820         if(m->s.env->need_to_validate && (!(r->qflags&BIT_CD) ||
821                 m->s.env->cfg->ignore_cd) && rep && 
822                 rep->security <= sec_status_bogus) {
823                 rcode = LDNS_RCODE_SERVFAIL;
824                 if(m->s.env->cfg->stat_extended) 
825                         m->s.env->mesh->ans_bogus++;
826         }
827         if(rep && rep->security == sec_status_secure)
828                 secure = 1;
829         else    secure = 0;
830         if(!rep && rcode == LDNS_RCODE_NOERROR)
831                 rcode = LDNS_RCODE_SERVFAIL;
832         /* send the reply */
833         if(prev && prev->qflags == r->qflags && 
834                 prev->edns.edns_present == r->edns.edns_present && 
835                 prev->edns.bits == r->edns.bits && 
836                 prev->edns.udp_size == r->edns.udp_size) {
837                 /* if the previous reply is identical to this one, fix ID */
838                 if(prev->query_reply.c->buffer != r->query_reply.c->buffer)
839                         ldns_buffer_copy(r->query_reply.c->buffer, 
840                                 prev->query_reply.c->buffer);
841                 ldns_buffer_write_at(r->query_reply.c->buffer, 0, 
842                         &r->qid, sizeof(uint16_t));
843                 ldns_buffer_write_at(r->query_reply.c->buffer, 12, 
844                         r->qname, m->s.qinfo.qname_len);
845                 comm_point_send_reply(&r->query_reply);
846         } else if(rcode) {
847                 m->s.qinfo.qname = r->qname;
848                 error_encode(r->query_reply.c->buffer, rcode, &m->s.qinfo,
849                         r->qid, r->qflags, &r->edns);
850                 comm_point_send_reply(&r->query_reply);
851         } else {
852                 size_t udp_size = r->edns.udp_size;
853                 r->edns.edns_version = EDNS_ADVERTISED_VERSION;
854                 r->edns.udp_size = EDNS_ADVERTISED_SIZE;
855                 r->edns.ext_rcode = 0;
856                 r->edns.bits &= EDNS_DO;
857                 m->s.qinfo.qname = r->qname;
858                 if(!reply_info_answer_encode(&m->s.qinfo, rep, r->qid, 
859                         r->qflags, r->query_reply.c->buffer, 0, 1, 
860                         m->s.env->scratch, udp_size, &r->edns, 
861                         (int)(r->edns.bits & EDNS_DO), secure)) 
862                 {
863                         error_encode(r->query_reply.c->buffer, 
864                                 LDNS_RCODE_SERVFAIL, &m->s.qinfo, r->qid, 
865                                 r->qflags, &r->edns);
866                 }
867                 comm_point_send_reply(&r->query_reply);
868         }
869         /* account */
870         m->s.env->mesh->num_reply_addrs--;
871         end_time = *m->s.env->now_tv;
872         timeval_subtract(&duration, &end_time, &r->start_time);
873         verbose(VERB_ALGO, "query took %d.%6.6d sec",
874                 (int)duration.tv_sec, (int)duration.tv_usec);
875         m->s.env->mesh->replies_sent++;
876         timeval_add(&m->s.env->mesh->replies_sum_wait, &duration);
877         timehist_insert(m->s.env->mesh->histogram, &duration);
878         if(m->s.env->cfg->stat_extended) {
879                 uint16_t rc = FLAGS_GET_RCODE(ldns_buffer_read_u16_at(r->
880                         query_reply.c->buffer, 2));
881                 if(secure) m->s.env->mesh->ans_secure++;
882                 m->s.env->mesh->ans_rcode[ rc ] ++;
883                 if(rc == 0 && LDNS_ANCOUNT(ldns_buffer_begin(r->
884                         query_reply.c->buffer)) == 0)
885                         m->s.env->mesh->ans_nodata++;
886         }
887 }
888
889 void mesh_query_done(struct mesh_state* mstate)
890 {
891         struct mesh_reply* r;
892         struct mesh_reply* prev = NULL;
893         struct mesh_cb* c;
894         struct reply_info* rep = (mstate->s.return_msg?
895                 mstate->s.return_msg->rep:NULL);
896         for(r = mstate->reply_list; r; r = r->next) {
897                 mesh_send_reply(mstate, mstate->s.return_rcode, rep, r, prev);
898                 prev = r;
899         }
900         mstate->replies_sent = 1;
901         for(c = mstate->cb_list; c; c = c->next) {
902                 mesh_do_callback(mstate, mstate->s.return_rcode, rep, c);
903         }
904 }
905
906 void mesh_walk_supers(struct mesh_area* mesh, struct mesh_state* mstate)
907 {
908         struct mesh_state_ref* ref;
909         RBTREE_FOR(ref, struct mesh_state_ref*, &mstate->super_set)
910         {
911                 /* make super runnable */
912                 (void)rbtree_insert(&mesh->run, &ref->s->run_node);
913                 /* callback the function to inform super of result */
914                 fptr_ok(fptr_whitelist_mod_inform_super(
915                         mesh->mods.mod[ref->s->s.curmod]->inform_super));
916                 (*mesh->mods.mod[ref->s->s.curmod]->inform_super)(&mstate->s, 
917                         ref->s->s.curmod, &ref->s->s);
918         }
919 }
920
921 struct mesh_state* mesh_area_find(struct mesh_area* mesh,
922         struct query_info* qinfo, uint16_t qflags, int prime)
923 {
924         struct mesh_state key;
925         struct mesh_state* result;
926
927         key.node.key = &key;
928         key.s.is_priming = prime;
929         key.s.qinfo = *qinfo;
930         key.s.query_flags = qflags;
931         
932         result = (struct mesh_state*)rbtree_search(&mesh->all, &key);
933         return result;
934 }
935
936 int mesh_state_add_cb(struct mesh_state* s, struct edns_data* edns,
937         ldns_buffer* buf, mesh_cb_func_t cb, void* cb_arg,
938         uint16_t qid, uint16_t qflags)
939 {
940         struct mesh_cb* r = regional_alloc(s->s.region, 
941                 sizeof(struct mesh_cb));
942         if(!r)
943                 return 0;
944         r->buf = buf;
945         log_assert(fptr_whitelist_mesh_cb(cb)); /* early failure ifmissing*/
946         r->cb = cb;
947         r->cb_arg = cb_arg;
948         r->edns = *edns;
949         r->qid = qid;
950         r->qflags = qflags;
951         r->next = s->cb_list;
952         s->cb_list = r;
953         return 1;
954
955 }
956
957 int mesh_state_add_reply(struct mesh_state* s, struct edns_data* edns,
958         struct comm_reply* rep, uint16_t qid, uint16_t qflags, uint8_t* qname)
959 {
960         struct mesh_reply* r = regional_alloc(s->s.region, 
961                 sizeof(struct mesh_reply));
962         if(!r)
963                 return 0;
964         r->query_reply = *rep;
965         r->edns = *edns;
966         r->qid = qid;
967         r->qflags = qflags;
968         r->start_time = *s->s.env->now_tv;
969         r->next = s->reply_list;
970         r->qname = regional_alloc_init(s->s.region, qname, 
971                 s->s.qinfo.qname_len);
972         if(!r->qname)
973                 return 0;
974         s->reply_list = r;
975         return 1;
976
977 }
978
979 /**
980  * Continue processing the mesh state at another module.
981  * Handles module to modules tranfer of control.
982  * Handles module finished.
983  * @param mesh: the mesh area.
984  * @param mstate: currently active mesh state.
985  *      Deleted if finished, calls _done and _supers to 
986  *      send replies to clients and inform other mesh states.
987  *      This in turn may create additional runnable mesh states.
988  * @param s: state at which the current module exited.
989  * @param ev: the event sent to the module.
990  *      returned is the event to send to the next module.
991  * @return true if continue processing at the new module.
992  *      false if not continued processing is needed.
993  */
994 static int
995 mesh_continue(struct mesh_area* mesh, struct mesh_state* mstate,
996         enum module_ext_state s, enum module_ev* ev)
997 {
998         mstate->num_activated++;
999         if(mstate->num_activated > MESH_MAX_ACTIVATION) {
1000                 /* module is looping. Stop it. */
1001                 log_err("internal error: looping module stopped");
1002                 log_query_info(VERB_QUERY, "pass error for qstate",
1003                         &mstate->s.qinfo);
1004                 s = module_error;
1005         }
1006         if(s == module_wait_module || s == module_restart_next) {
1007                 /* start next module */
1008                 mstate->s.curmod++;
1009                 if(mesh->mods.num == mstate->s.curmod) {
1010                         log_err("Cannot pass to next module; at last module");
1011                         log_query_info(VERB_QUERY, "pass error for qstate",
1012                                 &mstate->s.qinfo);
1013                         mstate->s.curmod--;
1014                         return mesh_continue(mesh, mstate, module_error, ev);
1015                 }
1016                 if(s == module_restart_next) {
1017                         fptr_ok(fptr_whitelist_mod_clear(
1018                                 mesh->mods.mod[mstate->s.curmod]->clear));
1019                         (*mesh->mods.mod[mstate->s.curmod]->clear)
1020                                 (&mstate->s, mstate->s.curmod);
1021                         mstate->s.minfo[mstate->s.curmod] = NULL;
1022                 }
1023                 *ev = module_event_pass;
1024                 return 1;
1025         }
1026         if(s == module_error && mstate->s.return_rcode == LDNS_RCODE_NOERROR) {
1027                 /* error is bad, handle pass back up below */
1028                 mstate->s.return_rcode = LDNS_RCODE_SERVFAIL;
1029         }
1030         if(s == module_error || s == module_finished) {
1031                 if(mstate->s.curmod == 0) {
1032                         mesh_query_done(mstate);
1033                         mesh_walk_supers(mesh, mstate);
1034                         mesh_state_delete(&mstate->s);
1035                         return 0;
1036                 }
1037                 /* pass along the locus of control */
1038                 mstate->s.curmod --;
1039                 *ev = module_event_moddone;
1040                 return 1;
1041         }
1042         return 0;
1043 }
1044
1045 void mesh_run(struct mesh_area* mesh, struct mesh_state* mstate,
1046         enum module_ev ev, struct outbound_entry* e)
1047 {
1048         enum module_ext_state s;
1049         verbose(VERB_ALGO, "mesh_run: start");
1050         while(mstate) {
1051                 /* run the module */
1052                 fptr_ok(fptr_whitelist_mod_operate(
1053                         mesh->mods.mod[mstate->s.curmod]->operate));
1054                 (*mesh->mods.mod[mstate->s.curmod]->operate)
1055                         (&mstate->s, ev, mstate->s.curmod, e);
1056
1057                 /* examine results */
1058                 mstate->s.reply = NULL;
1059                 regional_free_all(mstate->s.env->scratch);
1060                 s = mstate->s.ext_state[mstate->s.curmod];
1061                 verbose(VERB_ALGO, "mesh_run: %s module exit state is %s", 
1062                         mesh->mods.mod[mstate->s.curmod]->name, strextstate(s));
1063                 e = NULL;
1064                 if(mesh_continue(mesh, mstate, s, &ev))
1065                         continue;
1066
1067                 /* run more modules */
1068                 ev = module_event_pass;
1069                 if(mesh->run.count > 0) {
1070                         /* pop random element off the runnable tree */
1071                         mstate = (struct mesh_state*)mesh->run.root->key;
1072                         (void)rbtree_delete(&mesh->run, mstate);
1073                 } else mstate = NULL;
1074         }
1075         if(verbosity >= VERB_ALGO) {
1076                 mesh_stats(mesh, "mesh_run: end");
1077                 mesh_log_list(mesh);
1078         }
1079 }
1080
1081 void 
1082 mesh_log_list(struct mesh_area* mesh)
1083 {
1084         char buf[30];
1085         struct mesh_state* m;
1086         int num = 0;
1087         RBTREE_FOR(m, struct mesh_state*, &mesh->all) {
1088                 snprintf(buf, sizeof(buf), "%d%s%s%s%s%s mod%d %s%s", 
1089                         num++, (m->s.is_priming)?"p":"",  /* prime */
1090                         (m->s.query_flags&BIT_RD)?"RD":"",
1091                         (m->s.query_flags&BIT_CD)?"CD":"",
1092                         (m->super_set.count==0)?"d":"", /* detached */
1093                         (m->sub_set.count!=0)?"c":"",  /* children */
1094                         m->s.curmod, (m->reply_list)?"rep":"", /*hasreply*/
1095                         (m->cb_list)?"cb":"" /* callbacks */
1096                         ); 
1097                 log_query_info(VERB_ALGO, buf, &m->s.qinfo);
1098         }
1099 }
1100
1101 void 
1102 mesh_stats(struct mesh_area* mesh, const char* str)
1103 {
1104         verbose(VERB_DETAIL, "%s %u recursion states (%u with reply, "
1105                 "%u detached), %u waiting replies, %u recursion replies "
1106                 "sent, %d replies dropped, %d states jostled out", 
1107                 str, (unsigned)mesh->all.count, 
1108                 (unsigned)mesh->num_reply_states,
1109                 (unsigned)mesh->num_detached_states,
1110                 (unsigned)mesh->num_reply_addrs,
1111                 (unsigned)mesh->replies_sent,
1112                 (unsigned)mesh->stats_dropped,
1113                 (unsigned)mesh->stats_jostled);
1114         if(mesh->replies_sent > 0) {
1115                 struct timeval avg;
1116                 timeval_divide(&avg, &mesh->replies_sum_wait, 
1117                         mesh->replies_sent);
1118                 log_info("average recursion processing time "
1119                         "%d.%6.6d sec", (int)avg.tv_sec, (int)avg.tv_usec);
1120                 log_info("histogram of recursion processing times");
1121                 timehist_log(mesh->histogram, "recursions");
1122         }
1123 }
1124
1125 void 
1126 mesh_stats_clear(struct mesh_area* mesh)
1127 {
1128         if(!mesh)
1129                 return;
1130         mesh->replies_sent = 0;
1131         mesh->replies_sum_wait.tv_sec = 0;
1132         mesh->replies_sum_wait.tv_usec = 0;
1133         mesh->stats_jostled = 0;
1134         mesh->stats_dropped = 0;
1135         timehist_clear(mesh->histogram);
1136         mesh->ans_secure = 0;
1137         mesh->ans_bogus = 0;
1138         memset(&mesh->ans_rcode[0], 0, sizeof(size_t)*16);
1139         mesh->ans_nodata = 0;
1140 }
1141
1142 size_t 
1143 mesh_get_mem(struct mesh_area* mesh)
1144 {
1145         struct mesh_state* m;
1146         size_t s = sizeof(*mesh) + sizeof(struct timehist) +
1147                 sizeof(struct th_buck)*mesh->histogram->num +
1148                 sizeof(ldns_buffer) + ldns_buffer_capacity(mesh->qbuf_bak);
1149         RBTREE_FOR(m, struct mesh_state*, &mesh->all) {
1150                 /* all, including m itself allocated in qstate region */
1151                 s += regional_get_mem(m->s.region);
1152         }
1153         return s;
1154 }
1155
1156 int 
1157 mesh_detect_cycle(struct module_qstate* qstate, struct query_info* qinfo,
1158         uint16_t flags, int prime)
1159 {
1160         struct mesh_area* mesh = qstate->env->mesh;
1161         struct mesh_state* dep_m = mesh_area_find(mesh, qinfo, flags, prime);
1162         return mesh_detect_cycle_found(qstate, dep_m);
1163 }
1164
1165 void mesh_list_insert(struct mesh_state* m, struct mesh_state** fp,
1166         struct mesh_state** lp)
1167 {
1168         /* insert as last element */
1169         m->prev = *lp;
1170         m->next = NULL;
1171         if(*lp)
1172                 (*lp)->next = m;
1173         else    *fp = m;
1174         *lp = m;
1175 }
1176
1177 void mesh_list_remove(struct mesh_state* m, struct mesh_state** fp,
1178         struct mesh_state** lp)
1179 {
1180         if(m->next)
1181                 m->next->prev = m->prev;
1182         else    *lp = m->prev;
1183         if(m->prev)
1184                 m->prev->next = m->next;
1185         else    *fp = m->next;
1186 }