]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - services/mesh.c
import unbound 1.6.0
[FreeBSD/FreeBSD.git] / services / mesh.c
1 /*
2  * services/mesh.c - deal with mesh of query states and handle events for that.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains functions to assist in dealing with a mesh of
40  * query states. This mesh is supposed to be thread-specific.
41  * It consists of query states (per qname, qtype, qclass) and connections
42  * between query states and the super and subquery states, and replies to
43  * send back to clients.
44  */
45 #include "config.h"
46 #include "services/mesh.h"
47 #include "services/outbound_list.h"
48 #include "services/cache/dns.h"
49 #include "util/log.h"
50 #include "util/net_help.h"
51 #include "util/module.h"
52 #include "util/regional.h"
53 #include "util/data/msgencode.h"
54 #include "util/timehist.h"
55 #include "util/fptr_wlist.h"
56 #include "util/alloc.h"
57 #include "util/config_file.h"
58 #include "sldns/sbuffer.h"
59 #include "sldns/wire2str.h"
60 #include "services/localzone.h"
61 #include "util/data/dname.h"
62
63 /** subtract timers and the values do not overflow or become negative */
64 static void
65 timeval_subtract(struct timeval* d, const struct timeval* end, const struct timeval* start)
66 {
67 #ifndef S_SPLINT_S
68         time_t end_usec = end->tv_usec;
69         d->tv_sec = end->tv_sec - start->tv_sec;
70         if(end_usec < start->tv_usec) {
71                 end_usec += 1000000;
72                 d->tv_sec--;
73         }
74         d->tv_usec = end_usec - start->tv_usec;
75 #endif
76 }
77
78 /** add timers and the values do not overflow or become negative */
79 static void
80 timeval_add(struct timeval* d, const struct timeval* add)
81 {
82 #ifndef S_SPLINT_S
83         d->tv_sec += add->tv_sec;
84         d->tv_usec += add->tv_usec;
85         if(d->tv_usec > 1000000 ) {
86                 d->tv_usec -= 1000000;
87                 d->tv_sec++;
88         }
89 #endif
90 }
91
92 /** divide sum of timers to get average */
93 static void
94 timeval_divide(struct timeval* avg, const struct timeval* sum, size_t d)
95 {
96 #ifndef S_SPLINT_S
97         size_t leftover;
98         if(d == 0) {
99                 avg->tv_sec = 0;
100                 avg->tv_usec = 0;
101                 return;
102         }
103         avg->tv_sec = sum->tv_sec / d;
104         avg->tv_usec = sum->tv_usec / d;
105         /* handle fraction from seconds divide */
106         leftover = sum->tv_sec - avg->tv_sec*d;
107         avg->tv_usec += (leftover*1000000)/d;
108 #endif
109 }
110
111 /** histogram compare of time values */
112 static int
113 timeval_smaller(const struct timeval* x, const struct timeval* y)
114 {
115 #ifndef S_SPLINT_S
116         if(x->tv_sec < y->tv_sec)
117                 return 1;
118         else if(x->tv_sec == y->tv_sec) {
119                 if(x->tv_usec <= y->tv_usec)
120                         return 1;
121                 else    return 0;
122         }
123         else    return 0;
124 #endif
125 }
126
127 int
128 mesh_state_compare(const void* ap, const void* bp)
129 {
130         struct mesh_state* a = (struct mesh_state*)ap;
131         struct mesh_state* b = (struct mesh_state*)bp;
132
133         if(a->unique < b->unique)
134                 return -1;
135         if(a->unique > b->unique)
136                 return 1;
137
138         if(a->s.is_priming && !b->s.is_priming)
139                 return -1;
140         if(!a->s.is_priming && b->s.is_priming)
141                 return 1;
142
143         if(a->s.is_valrec && !b->s.is_valrec)
144                 return -1;
145         if(!a->s.is_valrec && b->s.is_valrec)
146                 return 1;
147
148         if((a->s.query_flags&BIT_RD) && !(b->s.query_flags&BIT_RD))
149                 return -1;
150         if(!(a->s.query_flags&BIT_RD) && (b->s.query_flags&BIT_RD))
151                 return 1;
152
153         if((a->s.query_flags&BIT_CD) && !(b->s.query_flags&BIT_CD))
154                 return -1;
155         if(!(a->s.query_flags&BIT_CD) && (b->s.query_flags&BIT_CD))
156                 return 1;
157
158         return query_info_compare(&a->s.qinfo, &b->s.qinfo);
159 }
160
161 int
162 mesh_state_ref_compare(const void* ap, const void* bp)
163 {
164         struct mesh_state_ref* a = (struct mesh_state_ref*)ap;
165         struct mesh_state_ref* b = (struct mesh_state_ref*)bp;
166         return mesh_state_compare(a->s, b->s);
167 }
168
169 struct mesh_area* 
170 mesh_create(struct module_stack* stack, struct module_env* env)
171 {
172         struct mesh_area* mesh = calloc(1, sizeof(struct mesh_area));
173         if(!mesh) {
174                 log_err("mesh area alloc: out of memory");
175                 return NULL;
176         }
177         mesh->histogram = timehist_setup();
178         mesh->qbuf_bak = sldns_buffer_new(env->cfg->msg_buffer_size);
179         if(!mesh->histogram || !mesh->qbuf_bak) {
180                 free(mesh);
181                 log_err("mesh area alloc: out of memory");
182                 return NULL;
183         }
184         mesh->mods = *stack;
185         mesh->env = env;
186         rbtree_init(&mesh->run, &mesh_state_compare);
187         rbtree_init(&mesh->all, &mesh_state_compare);
188         mesh->num_reply_addrs = 0;
189         mesh->num_reply_states = 0;
190         mesh->num_detached_states = 0;
191         mesh->num_forever_states = 0;
192         mesh->stats_jostled = 0;
193         mesh->stats_dropped = 0;
194         mesh->max_reply_states = env->cfg->num_queries_per_thread;
195         mesh->max_forever_states = (mesh->max_reply_states+1)/2;
196 #ifndef S_SPLINT_S
197         mesh->jostle_max.tv_sec = (time_t)(env->cfg->jostle_time / 1000);
198         mesh->jostle_max.tv_usec = (time_t)((env->cfg->jostle_time % 1000)
199                 *1000);
200 #endif
201         return mesh;
202 }
203
204 /** help mesh delete delete mesh states */
205 static void
206 mesh_delete_helper(rbnode_t* n)
207 {
208         struct mesh_state* mstate = (struct mesh_state*)n->key;
209         /* perform a full delete, not only 'cleanup' routine,
210          * because other callbacks expect a clean state in the mesh.
211          * For 're-entrant' calls */
212         mesh_state_delete(&mstate->s);
213         /* but because these delete the items from the tree, postorder
214          * traversal and rbtree rebalancing do not work together */
215 }
216
217 void 
218 mesh_delete(struct mesh_area* mesh)
219 {
220         if(!mesh)
221                 return;
222         /* free all query states */
223         while(mesh->all.count)
224                 mesh_delete_helper(mesh->all.root);
225         timehist_delete(mesh->histogram);
226         sldns_buffer_free(mesh->qbuf_bak);
227         free(mesh);
228 }
229
230 void
231 mesh_delete_all(struct mesh_area* mesh)
232 {
233         /* free all query states */
234         while(mesh->all.count)
235                 mesh_delete_helper(mesh->all.root);
236         mesh->stats_dropped += mesh->num_reply_addrs;
237         /* clear mesh area references */
238         rbtree_init(&mesh->run, &mesh_state_compare);
239         rbtree_init(&mesh->all, &mesh_state_compare);
240         mesh->num_reply_addrs = 0;
241         mesh->num_reply_states = 0;
242         mesh->num_detached_states = 0;
243         mesh->num_forever_states = 0;
244         mesh->forever_first = NULL;
245         mesh->forever_last = NULL;
246         mesh->jostle_first = NULL;
247         mesh->jostle_last = NULL;
248 }
249
250 int mesh_make_new_space(struct mesh_area* mesh, sldns_buffer* qbuf)
251 {
252         struct mesh_state* m = mesh->jostle_first;
253         /* free space is available */
254         if(mesh->num_reply_states < mesh->max_reply_states)
255                 return 1;
256         /* try to kick out a jostle-list item */
257         if(m && m->reply_list && m->list_select == mesh_jostle_list) {
258                 /* how old is it? */
259                 struct timeval age;
260                 timeval_subtract(&age, mesh->env->now_tv, 
261                         &m->reply_list->start_time);
262                 if(timeval_smaller(&mesh->jostle_max, &age)) {
263                         /* its a goner */
264                         log_nametypeclass(VERB_ALGO, "query jostled out to "
265                                 "make space for a new one",
266                                 m->s.qinfo.qname, m->s.qinfo.qtype,
267                                 m->s.qinfo.qclass);
268                         /* backup the query */
269                         if(qbuf) sldns_buffer_copy(mesh->qbuf_bak, qbuf);
270                         /* notify supers */
271                         if(m->super_set.count > 0) {
272                                 verbose(VERB_ALGO, "notify supers of failure");
273                                 m->s.return_msg = NULL;
274                                 m->s.return_rcode = LDNS_RCODE_SERVFAIL;
275                                 mesh_walk_supers(mesh, m);
276                         }
277                         mesh->stats_jostled ++;
278                         mesh_state_delete(&m->s);
279                         /* restore the query - note that the qinfo ptr to
280                          * the querybuffer is then correct again. */
281                         if(qbuf) sldns_buffer_copy(qbuf, mesh->qbuf_bak);
282                         return 1;
283                 }
284         }
285         /* no space for new item */
286         return 0;
287 }
288
289 void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo,
290         uint16_t qflags, struct edns_data* edns, struct comm_reply* rep,
291         uint16_t qid)
292 {
293         struct mesh_state* s = NULL;
294         int unique = edns_unique_mesh_state(edns->opt_list, mesh->env);
295         int was_detached = 0;
296         int was_noreply = 0;
297         int added = 0;
298         if(!unique)
299                 s = mesh_area_find(mesh, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0);
300         /* does this create a new reply state? */
301         if(!s || s->list_select == mesh_no_list) {
302                 if(!mesh_make_new_space(mesh, rep->c->buffer)) {
303                         verbose(VERB_ALGO, "Too many queries. dropping "
304                                 "incoming query.");
305                         comm_point_drop_reply(rep);
306                         mesh->stats_dropped ++;
307                         return;
308                 }
309                 /* for this new reply state, the reply address is free,
310                  * so the limit of reply addresses does not stop reply states*/
311         } else {
312                 /* protect our memory usage from storing reply addresses */
313                 if(mesh->num_reply_addrs > mesh->max_reply_states*16) {
314                         verbose(VERB_ALGO, "Too many requests queued. "
315                                 "dropping incoming query.");
316                         mesh->stats_dropped++;
317                         comm_point_drop_reply(rep);
318                         return;
319                 }
320         }
321         /* see if it already exists, if not, create one */
322         if(!s) {
323 #ifdef UNBOUND_DEBUG
324                 struct rbnode_t* n;
325 #endif
326                 s = mesh_state_create(mesh->env, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0);
327                 if(!s) {
328                         log_err("mesh_state_create: out of memory; SERVFAIL");
329                         if(!inplace_cb_reply_servfail_call(mesh->env, qinfo, NULL, NULL,
330                                 LDNS_RCODE_SERVFAIL, edns, mesh->env->scratch))
331                                         edns->opt_list = NULL;
332                         error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL,
333                                 qinfo, qid, qflags, edns);
334                         comm_point_send_reply(rep);
335                         return;
336                 }
337                 if(unique)
338                         mesh_state_make_unique(s);
339                 /* copy the edns options we got from the front */
340                 if(edns->opt_list) {
341                         s->s.edns_opts_front_in = edns_opt_copy_region(edns->opt_list,
342                                 s->s.region);
343                         if(!s->s.edns_opts_front_in) {
344                                 log_err("mesh_state_create: out of memory; SERVFAIL");
345                                 if(!inplace_cb_reply_servfail_call(mesh->env, qinfo, NULL,
346                                         NULL, LDNS_RCODE_SERVFAIL, edns, mesh->env->scratch))
347                                                 edns->opt_list = NULL;
348                                 error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL,
349                                         qinfo, qid, qflags, edns);
350                                 comm_point_send_reply(rep);
351                                 return;
352                         }
353                 }
354
355 #ifdef UNBOUND_DEBUG
356                 n =
357 #else
358                 (void)
359 #endif
360                 rbtree_insert(&mesh->all, &s->node);
361                 log_assert(n != NULL);
362                 /* set detached (it is now) */
363                 mesh->num_detached_states++;
364                 added = 1;
365         }
366         if(!s->reply_list && !s->cb_list && s->super_set.count == 0)
367                 was_detached = 1;
368         if(!s->reply_list && !s->cb_list)
369                 was_noreply = 1;
370         /* add reply to s */
371         if(!mesh_state_add_reply(s, edns, rep, qid, qflags, qinfo)) {
372                         log_err("mesh_new_client: out of memory; SERVFAIL");
373                         if(!inplace_cb_reply_servfail_call(mesh->env, qinfo, &s->s,
374                                 NULL, LDNS_RCODE_SERVFAIL, edns, mesh->env->scratch))
375                                         edns->opt_list = NULL;
376                         error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL,
377                                 qinfo, qid, qflags, edns);
378                         comm_point_send_reply(rep);
379                         if(added)
380                                 mesh_state_delete(&s->s);
381                         return;
382         }
383         /* update statistics */
384         if(was_detached) {
385                 log_assert(mesh->num_detached_states > 0);
386                 mesh->num_detached_states--;
387         }
388         if(was_noreply) {
389                 mesh->num_reply_states ++;
390         }
391         mesh->num_reply_addrs++;
392         if(s->list_select == mesh_no_list) {
393                 /* move to either the forever or the jostle_list */
394                 if(mesh->num_forever_states < mesh->max_forever_states) {
395                         mesh->num_forever_states ++;
396                         mesh_list_insert(s, &mesh->forever_first, 
397                                 &mesh->forever_last);
398                         s->list_select = mesh_forever_list;
399                 } else {
400                         mesh_list_insert(s, &mesh->jostle_first, 
401                                 &mesh->jostle_last);
402                         s->list_select = mesh_jostle_list;
403                 }
404         }
405         if(added)
406                 mesh_run(mesh, s, module_event_new, NULL);
407 }
408
409 int 
410 mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo,
411         uint16_t qflags, struct edns_data* edns, sldns_buffer* buf, 
412         uint16_t qid, mesh_cb_func_t cb, void* cb_arg)
413 {
414         struct mesh_state* s = NULL;
415         int unique = edns_unique_mesh_state(edns->opt_list, mesh->env);
416         int was_detached = 0;
417         int was_noreply = 0;
418         int added = 0;
419         if(!unique)
420                 s = mesh_area_find(mesh, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0);
421         /* there are no limits on the number of callbacks */
422
423         /* see if it already exists, if not, create one */
424         if(!s) {
425 #ifdef UNBOUND_DEBUG
426                 struct rbnode_t* n;
427 #endif
428                 s = mesh_state_create(mesh->env, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0);
429                 if(!s) {
430                         return 0;
431                 }
432                 if(unique)
433                         mesh_state_make_unique(s);
434                 if(edns->opt_list) {
435                         s->s.edns_opts_front_in = edns_opt_copy_region(edns->opt_list,
436                                 s->s.region);
437                         if(!s->s.edns_opts_front_in) {
438                                 return 0;
439                         }
440                 }
441 #ifdef UNBOUND_DEBUG
442                 n =
443 #else
444                 (void)
445 #endif
446                 rbtree_insert(&mesh->all, &s->node);
447                 log_assert(n != NULL);
448                 /* set detached (it is now) */
449                 mesh->num_detached_states++;
450                 added = 1;
451         }
452         if(!s->reply_list && !s->cb_list && s->super_set.count == 0)
453                 was_detached = 1;
454         if(!s->reply_list && !s->cb_list)
455                 was_noreply = 1;
456         /* add reply to s */
457         if(!mesh_state_add_cb(s, edns, buf, cb, cb_arg, qid, qflags)) {
458                         if(added)
459                                 mesh_state_delete(&s->s);
460                         return 0;
461         }
462         /* update statistics */
463         if(was_detached) {
464                 log_assert(mesh->num_detached_states > 0);
465                 mesh->num_detached_states--;
466         }
467         if(was_noreply) {
468                 mesh->num_reply_states ++;
469         }
470         mesh->num_reply_addrs++;
471         if(added)
472                 mesh_run(mesh, s, module_event_new, NULL);
473         return 1;
474 }
475
476 void mesh_new_prefetch(struct mesh_area* mesh, struct query_info* qinfo,
477         uint16_t qflags, time_t leeway)
478 {
479         struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&(BIT_RD|BIT_CD),
480                 0, 0);
481 #ifdef UNBOUND_DEBUG
482         struct rbnode_t* n;
483 #endif
484         /* already exists, and for a different purpose perhaps.
485          * if mesh_no_list, keep it that way. */
486         if(s) {
487                 /* make it ignore the cache from now on */
488                 if(!s->s.blacklist)
489                         sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region);
490                 if(s->s.prefetch_leeway < leeway)
491                         s->s.prefetch_leeway = leeway;
492                 return;
493         }
494         if(!mesh_make_new_space(mesh, NULL)) {
495                 verbose(VERB_ALGO, "Too many queries. dropped prefetch.");
496                 mesh->stats_dropped ++;
497                 return;
498         }
499
500         s = mesh_state_create(mesh->env, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0);
501         if(!s) {
502                 log_err("prefetch mesh_state_create: out of memory");
503                 return;
504         }
505 #ifdef UNBOUND_DEBUG
506         n =
507 #else
508         (void)
509 #endif
510         rbtree_insert(&mesh->all, &s->node);
511         log_assert(n != NULL);
512         /* set detached (it is now) */
513         mesh->num_detached_states++;
514         /* make it ignore the cache */
515         sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region);
516         s->s.prefetch_leeway = leeway;
517
518         if(s->list_select == mesh_no_list) {
519                 /* move to either the forever or the jostle_list */
520                 if(mesh->num_forever_states < mesh->max_forever_states) {
521                         mesh->num_forever_states ++;
522                         mesh_list_insert(s, &mesh->forever_first, 
523                                 &mesh->forever_last);
524                         s->list_select = mesh_forever_list;
525                 } else {
526                         mesh_list_insert(s, &mesh->jostle_first, 
527                                 &mesh->jostle_last);
528                         s->list_select = mesh_jostle_list;
529                 }
530         }
531         mesh_run(mesh, s, module_event_new, NULL);
532 }
533
534 void mesh_report_reply(struct mesh_area* mesh, struct outbound_entry* e,
535         struct comm_reply* reply, int what)
536 {
537         enum module_ev event = module_event_reply;
538         e->qstate->reply = reply;
539         if(what != NETEVENT_NOERROR) {
540                 event = module_event_noreply;
541                 if(what == NETEVENT_CAPSFAIL)
542                         event = module_event_capsfail;
543         }
544         mesh_run(mesh, e->qstate->mesh_info, event, e);
545 }
546
547 struct mesh_state* 
548 mesh_state_create(struct module_env* env, struct query_info* qinfo, 
549         uint16_t qflags, int prime, int valrec)
550 {
551         struct regional* region = alloc_reg_obtain(env->alloc);
552         struct mesh_state* mstate;
553         int i;
554         if(!region)
555                 return NULL;
556         mstate = (struct mesh_state*)regional_alloc(region, 
557                 sizeof(struct mesh_state));
558         if(!mstate) {
559                 alloc_reg_release(env->alloc, region);
560                 return NULL;
561         }
562         memset(mstate, 0, sizeof(*mstate));
563         mstate->node = *RBTREE_NULL;
564         mstate->run_node = *RBTREE_NULL;
565         mstate->node.key = mstate;
566         mstate->run_node.key = mstate;
567         mstate->reply_list = NULL;
568         mstate->list_select = mesh_no_list;
569         mstate->replies_sent = 0;
570         rbtree_init(&mstate->super_set, &mesh_state_ref_compare);
571         rbtree_init(&mstate->sub_set, &mesh_state_ref_compare);
572         mstate->num_activated = 0;
573         mstate->unique = NULL;
574         /* init module qstate */
575         mstate->s.qinfo.qtype = qinfo->qtype;
576         mstate->s.qinfo.qclass = qinfo->qclass;
577         mstate->s.qinfo.local_alias = NULL;
578         mstate->s.qinfo.qname_len = qinfo->qname_len;
579         mstate->s.qinfo.qname = regional_alloc_init(region, qinfo->qname,
580                 qinfo->qname_len);
581         if(!mstate->s.qinfo.qname) {
582                 alloc_reg_release(env->alloc, region);
583                 return NULL;
584         }
585         /* remove all weird bits from qflags */
586         mstate->s.query_flags = (qflags & (BIT_RD|BIT_CD));
587         mstate->s.is_priming = prime;
588         mstate->s.is_valrec = valrec;
589         mstate->s.reply = NULL;
590         mstate->s.region = region;
591         mstate->s.curmod = 0;
592         mstate->s.return_msg = 0;
593         mstate->s.return_rcode = LDNS_RCODE_NOERROR;
594         mstate->s.env = env;
595         mstate->s.mesh_info = mstate;
596         mstate->s.prefetch_leeway = 0;
597         mstate->s.no_cache_lookup = 0;
598         mstate->s.no_cache_store = 0;
599         /* init modules */
600         for(i=0; i<env->mesh->mods.num; i++) {
601                 mstate->s.minfo[i] = NULL;
602                 mstate->s.ext_state[i] = module_state_initial;
603         }
604         /* init edns option lists */
605         mstate->s.edns_opts_front_in = NULL;
606         mstate->s.edns_opts_back_out = NULL;
607         mstate->s.edns_opts_back_in = NULL;
608         mstate->s.edns_opts_front_out = NULL;
609
610         return mstate;
611 }
612
613 int
614 mesh_state_is_unique(struct mesh_state* mstate)
615 {
616         return mstate->unique != NULL;
617 }
618
619 void
620 mesh_state_make_unique(struct mesh_state* mstate)
621 {
622         mstate->unique = mstate;
623 }
624
625 void 
626 mesh_state_cleanup(struct mesh_state* mstate)
627 {
628         struct mesh_area* mesh;
629         int i;
630         if(!mstate)
631                 return;
632         mesh = mstate->s.env->mesh;
633         /* drop unsent replies */
634         if(!mstate->replies_sent) {
635                 struct mesh_reply* rep;
636                 struct mesh_cb* cb;
637                 for(rep=mstate->reply_list; rep; rep=rep->next) {
638                         comm_point_drop_reply(&rep->query_reply);
639                         mesh->num_reply_addrs--;
640                 }
641                 for(cb=mstate->cb_list; cb; cb=cb->next) {
642                         fptr_ok(fptr_whitelist_mesh_cb(cb->cb));
643                         (*cb->cb)(cb->cb_arg, LDNS_RCODE_SERVFAIL, NULL,
644                                 sec_status_unchecked, NULL);
645                         mesh->num_reply_addrs--;
646                 }
647         }
648
649         /* de-init modules */
650         for(i=0; i<mesh->mods.num; i++) {
651                 fptr_ok(fptr_whitelist_mod_clear(mesh->mods.mod[i]->clear));
652                 (*mesh->mods.mod[i]->clear)(&mstate->s, i);
653                 mstate->s.minfo[i] = NULL;
654                 mstate->s.ext_state[i] = module_finished;
655         }
656         alloc_reg_release(mstate->s.env->alloc, mstate->s.region);
657 }
658
659 void 
660 mesh_state_delete(struct module_qstate* qstate)
661 {
662         struct mesh_area* mesh;
663         struct mesh_state_ref* super, ref;
664         struct mesh_state* mstate;
665         if(!qstate)
666                 return;
667         mstate = qstate->mesh_info;
668         mesh = mstate->s.env->mesh;
669         mesh_detach_subs(&mstate->s);
670         if(mstate->list_select == mesh_forever_list) {
671                 mesh->num_forever_states --;
672                 mesh_list_remove(mstate, &mesh->forever_first, 
673                         &mesh->forever_last);
674         } else if(mstate->list_select == mesh_jostle_list) {
675                 mesh_list_remove(mstate, &mesh->jostle_first, 
676                         &mesh->jostle_last);
677         }
678         if(!mstate->reply_list && !mstate->cb_list
679                 && mstate->super_set.count == 0) {
680                 log_assert(mesh->num_detached_states > 0);
681                 mesh->num_detached_states--;
682         }
683         if(mstate->reply_list || mstate->cb_list) {
684                 log_assert(mesh->num_reply_states > 0);
685                 mesh->num_reply_states--;
686         }
687         ref.node.key = &ref;
688         ref.s = mstate;
689         RBTREE_FOR(super, struct mesh_state_ref*, &mstate->super_set) {
690                 (void)rbtree_delete(&super->s->sub_set, &ref);
691         }
692         (void)rbtree_delete(&mesh->run, mstate);
693         (void)rbtree_delete(&mesh->all, mstate);
694         mesh_state_cleanup(mstate);
695 }
696
697 /** helper recursive rbtree find routine */
698 static int
699 find_in_subsub(struct mesh_state* m, struct mesh_state* tofind, size_t *c)
700 {
701         struct mesh_state_ref* r;
702         if((*c)++ > MESH_MAX_SUBSUB)
703                 return 1;
704         RBTREE_FOR(r, struct mesh_state_ref*, &m->sub_set) {
705                 if(r->s == tofind || find_in_subsub(r->s, tofind, c))
706                         return 1;
707         }
708         return 0;
709 }
710
711 /** find cycle for already looked up mesh_state */
712 static int 
713 mesh_detect_cycle_found(struct module_qstate* qstate, struct mesh_state* dep_m)
714 {
715         struct mesh_state* cyc_m = qstate->mesh_info;
716         size_t counter = 0;
717         if(!dep_m)
718                 return 0;
719         if(dep_m == cyc_m || find_in_subsub(dep_m, cyc_m, &counter)) {
720                 if(counter > MESH_MAX_SUBSUB)
721                         return 2;
722                 return 1;
723         }
724         return 0;
725 }
726
727 void mesh_detach_subs(struct module_qstate* qstate)
728 {
729         struct mesh_area* mesh = qstate->env->mesh;
730         struct mesh_state_ref* ref, lookup;
731 #ifdef UNBOUND_DEBUG
732         struct rbnode_t* n;
733 #endif
734         lookup.node.key = &lookup;
735         lookup.s = qstate->mesh_info;
736         RBTREE_FOR(ref, struct mesh_state_ref*, &qstate->mesh_info->sub_set) {
737 #ifdef UNBOUND_DEBUG
738                 n =
739 #else
740                 (void)
741 #endif
742                 rbtree_delete(&ref->s->super_set, &lookup);
743                 log_assert(n != NULL); /* must have been present */
744                 if(!ref->s->reply_list && !ref->s->cb_list
745                         && ref->s->super_set.count == 0) {
746                         mesh->num_detached_states++;
747                         log_assert(mesh->num_detached_states + 
748                                 mesh->num_reply_states <= mesh->all.count);
749                 }
750         }
751         rbtree_init(&qstate->mesh_info->sub_set, &mesh_state_ref_compare);
752 }
753
754 int mesh_attach_sub(struct module_qstate* qstate, struct query_info* qinfo,
755         uint16_t qflags, int prime, int valrec, struct module_qstate** newq)
756 {
757         /* find it, if not, create it */
758         struct mesh_area* mesh = qstate->env->mesh;
759         struct mesh_state* sub = mesh_area_find(mesh, qinfo, qflags, prime, valrec);
760         int was_detached;
761         if(mesh_detect_cycle_found(qstate, sub)) {
762                 verbose(VERB_ALGO, "attach failed, cycle detected");
763                 return 0;
764         }
765         if(!sub) {
766 #ifdef UNBOUND_DEBUG
767                 struct rbnode_t* n;
768 #endif
769                 /* create a new one */
770                 sub = mesh_state_create(qstate->env, qinfo, qflags, prime, valrec);
771                 if(!sub) {
772                         log_err("mesh_attach_sub: out of memory");
773                         return 0;
774                 }
775 #ifdef UNBOUND_DEBUG
776                 n =
777 #else
778                 (void)
779 #endif
780                 rbtree_insert(&mesh->all, &sub->node);
781                 log_assert(n != NULL);
782                 /* set detached (it is now) */
783                 mesh->num_detached_states++;
784                 /* set new query state to run */
785 #ifdef UNBOUND_DEBUG
786                 n =
787 #else
788                 (void)
789 #endif
790                 rbtree_insert(&mesh->run, &sub->run_node);
791                 log_assert(n != NULL);
792                 *newq = &sub->s;
793         } else
794                 *newq = NULL;
795         was_detached = (sub->super_set.count == 0);
796         if(!mesh_state_attachment(qstate->mesh_info, sub))
797                 return 0;
798         /* if it was a duplicate  attachment, the count was not zero before */
799         if(!sub->reply_list && !sub->cb_list && was_detached && 
800                 sub->super_set.count == 1) {
801                 /* it used to be detached, before this one got added */
802                 log_assert(mesh->num_detached_states > 0);
803                 mesh->num_detached_states--;
804         }
805         /* *newq will be run when inited after the current module stops */
806         return 1;
807 }
808
809 int mesh_state_attachment(struct mesh_state* super, struct mesh_state* sub)
810 {
811 #ifdef UNBOUND_DEBUG
812         struct rbnode_t* n;
813 #endif
814         struct mesh_state_ref* subref; /* points to sub, inserted in super */
815         struct mesh_state_ref* superref; /* points to super, inserted in sub */
816         if( !(subref = regional_alloc(super->s.region,
817                 sizeof(struct mesh_state_ref))) ||
818                 !(superref = regional_alloc(sub->s.region,
819                 sizeof(struct mesh_state_ref))) ) {
820                 log_err("mesh_state_attachment: out of memory");
821                 return 0;
822         }
823         superref->node.key = superref;
824         superref->s = super;
825         subref->node.key = subref;
826         subref->s = sub;
827         if(!rbtree_insert(&sub->super_set, &superref->node)) {
828                 /* this should not happen, iterator and validator do not
829                  * attach subqueries that are identical. */
830                 /* already attached, we are done, nothing todo.
831                  * since superref and subref already allocated in region,
832                  * we cannot free them */
833                 return 1;
834         }
835 #ifdef UNBOUND_DEBUG
836         n =
837 #else
838         (void)
839 #endif
840         rbtree_insert(&super->sub_set, &subref->node);
841         log_assert(n != NULL); /* we checked above if statement, the reverse
842           administration should not fail now, unless they are out of sync */
843         return 1;
844 }
845
846 /**
847  * callback results to mesh cb entry
848  * @param m: mesh state to send it for.
849  * @param rcode: if not 0, error code.
850  * @param rep: reply to send (or NULL if rcode is set).
851  * @param r: callback entry
852  */
853 static void
854 mesh_do_callback(struct mesh_state* m, int rcode, struct reply_info* rep,
855         struct mesh_cb* r)
856 {
857         int secure;
858         char* reason = NULL;
859         /* bogus messages are not made into servfail, sec_status passed 
860          * to the callback function */
861         if(rep && rep->security == sec_status_secure)
862                 secure = 1;
863         else    secure = 0;
864         if(!rep && rcode == LDNS_RCODE_NOERROR)
865                 rcode = LDNS_RCODE_SERVFAIL;
866         if(!rcode && rep->security == sec_status_bogus) {
867                 if(!(reason = errinf_to_str(&m->s)))
868                         rcode = LDNS_RCODE_SERVFAIL;
869         }
870         /* send the reply */
871         if(rcode) {
872                 if(rcode == LDNS_RCODE_SERVFAIL) {
873                         if(!inplace_cb_reply_servfail_call(m->s.env, &m->s.qinfo, &m->s,
874                                 rep, rcode, &r->edns, m->s.region))
875                                         r->edns.opt_list = NULL;
876                 } else {
877                         if(!inplace_cb_reply_call(m->s.env, &m->s.qinfo, &m->s, rep, rcode,
878                                 &r->edns, m->s.region))
879                                         r->edns.opt_list = NULL;
880                 }
881                 fptr_ok(fptr_whitelist_mesh_cb(r->cb));
882                 (*r->cb)(r->cb_arg, rcode, r->buf, sec_status_unchecked, NULL);
883         } else {
884                 size_t udp_size = r->edns.udp_size;
885                 sldns_buffer_clear(r->buf);
886                 r->edns.edns_version = EDNS_ADVERTISED_VERSION;
887                 r->edns.udp_size = EDNS_ADVERTISED_SIZE;
888                 r->edns.ext_rcode = 0;
889                 r->edns.bits &= EDNS_DO;
890
891                 if(!inplace_cb_reply_call(m->s.env, &m->s.qinfo, &m->s, rep,
892                         LDNS_RCODE_NOERROR, &r->edns, m->s.region) ||
893                         !reply_info_answer_encode(&m->s.qinfo, rep, r->qid, 
894                         r->qflags, r->buf, 0, 1, 
895                         m->s.env->scratch, udp_size, &r->edns, 
896                         (int)(r->edns.bits & EDNS_DO), secure)) 
897                 {
898                         fptr_ok(fptr_whitelist_mesh_cb(r->cb));
899                         (*r->cb)(r->cb_arg, LDNS_RCODE_SERVFAIL, r->buf,
900                                 sec_status_unchecked, NULL);
901                 } else {
902                         fptr_ok(fptr_whitelist_mesh_cb(r->cb));
903                         (*r->cb)(r->cb_arg, LDNS_RCODE_NOERROR, r->buf,
904                                 rep->security, reason);
905                 }
906         }
907         free(reason);
908         m->s.env->mesh->num_reply_addrs--;
909 }
910
911 /**
912  * Send reply to mesh reply entry
913  * @param m: mesh state to send it for.
914  * @param rcode: if not 0, error code.
915  * @param rep: reply to send (or NULL if rcode is set).
916  * @param r: reply entry
917  * @param prev: previous reply, already has its answer encoded in buffer.
918  */
919 static void
920 mesh_send_reply(struct mesh_state* m, int rcode, struct reply_info* rep,
921         struct mesh_reply* r, struct mesh_reply* prev)
922 {
923         struct timeval end_time;
924         struct timeval duration;
925         int secure;
926         /* Copy the client's EDNS for later restore, to make sure the edns
927          * compare is with the correct edns options. */
928         struct edns_data edns_bak = r->edns;
929         /* examine security status */
930         if(m->s.env->need_to_validate && (!(r->qflags&BIT_CD) ||
931                 m->s.env->cfg->ignore_cd) && rep && 
932                 rep->security <= sec_status_bogus) {
933                 rcode = LDNS_RCODE_SERVFAIL;
934                 if(m->s.env->cfg->stat_extended) 
935                         m->s.env->mesh->ans_bogus++;
936         }
937         if(rep && rep->security == sec_status_secure)
938                 secure = 1;
939         else    secure = 0;
940         if(!rep && rcode == LDNS_RCODE_NOERROR)
941                 rcode = LDNS_RCODE_SERVFAIL;
942         /* send the reply */
943         /* We don't reuse the encoded answer if either the previous or current
944          * response has a local alias.  We could compare the alias records
945          * and still reuse the previous answer if they are the same, but that
946          * would be complicated and error prone for the relatively minor case.
947          * So we err on the side of safety. */
948         if(prev && prev->qflags == r->qflags && 
949                 !prev->local_alias && !r->local_alias &&
950                 prev->edns.edns_present == r->edns.edns_present && 
951                 prev->edns.bits == r->edns.bits && 
952                 prev->edns.udp_size == r->edns.udp_size &&
953                 edns_opt_list_compare(prev->edns.opt_list, r->edns.opt_list)
954                 == 0) {
955                 /* if the previous reply is identical to this one, fix ID */
956                 if(prev->query_reply.c->buffer != r->query_reply.c->buffer)
957                         sldns_buffer_copy(r->query_reply.c->buffer, 
958                                 prev->query_reply.c->buffer);
959                 sldns_buffer_write_at(r->query_reply.c->buffer, 0, 
960                         &r->qid, sizeof(uint16_t));
961                 sldns_buffer_write_at(r->query_reply.c->buffer, 12, 
962                         r->qname, m->s.qinfo.qname_len);
963                 comm_point_send_reply(&r->query_reply);
964         } else if(rcode) {
965                 m->s.qinfo.qname = r->qname;
966                 m->s.qinfo.local_alias = r->local_alias;
967                 if(rcode == LDNS_RCODE_SERVFAIL) {
968                         if(!inplace_cb_reply_servfail_call(m->s.env, &m->s.qinfo, &m->s,
969                                 rep, rcode, &r->edns, m->s.region))
970                                         r->edns.opt_list = NULL;
971                 } else { 
972                         if(!inplace_cb_reply_call(m->s.env, &m->s.qinfo, &m->s, rep, rcode,
973                                 &r->edns, m->s.region))
974                                         r->edns.opt_list = NULL;
975                 }
976                 error_encode(r->query_reply.c->buffer, rcode, &m->s.qinfo,
977                         r->qid, r->qflags, &r->edns);
978                 comm_point_send_reply(&r->query_reply);
979         } else {
980                 size_t udp_size = r->edns.udp_size;
981                 r->edns.edns_version = EDNS_ADVERTISED_VERSION;
982                 r->edns.udp_size = EDNS_ADVERTISED_SIZE;
983                 r->edns.ext_rcode = 0;
984                 r->edns.bits &= EDNS_DO;
985                 m->s.qinfo.qname = r->qname;
986                 m->s.qinfo.local_alias = r->local_alias;
987                 if(!inplace_cb_reply_call(m->s.env, &m->s.qinfo, &m->s, rep,
988                         LDNS_RCODE_NOERROR, &r->edns, m->s.region) ||
989                         !reply_info_answer_encode(&m->s.qinfo, rep, r->qid, 
990                         r->qflags, r->query_reply.c->buffer, 0, 1, 
991                         m->s.env->scratch, udp_size, &r->edns, 
992                         (int)(r->edns.bits & EDNS_DO), secure)) 
993                 {
994                         if(!inplace_cb_reply_servfail_call(m->s.env, &m->s.qinfo, &m->s,
995                         rep, LDNS_RCODE_SERVFAIL, &r->edns, m->s.region))
996                                 r->edns.opt_list = NULL;
997                         error_encode(r->query_reply.c->buffer, 
998                                 LDNS_RCODE_SERVFAIL, &m->s.qinfo, r->qid, 
999                                 r->qflags, &r->edns);
1000                 }
1001                 r->edns = edns_bak;
1002                 comm_point_send_reply(&r->query_reply);
1003         }
1004         /* account */
1005         m->s.env->mesh->num_reply_addrs--;
1006         end_time = *m->s.env->now_tv;
1007         timeval_subtract(&duration, &end_time, &r->start_time);
1008         verbose(VERB_ALGO, "query took " ARG_LL "d.%6.6d sec",
1009                 (long long)duration.tv_sec, (int)duration.tv_usec);
1010         m->s.env->mesh->replies_sent++;
1011         timeval_add(&m->s.env->mesh->replies_sum_wait, &duration);
1012         timehist_insert(m->s.env->mesh->histogram, &duration);
1013         if(m->s.env->cfg->stat_extended) {
1014                 uint16_t rc = FLAGS_GET_RCODE(sldns_buffer_read_u16_at(r->
1015                         query_reply.c->buffer, 2));
1016                 if(secure) m->s.env->mesh->ans_secure++;
1017                 m->s.env->mesh->ans_rcode[ rc ] ++;
1018                 if(rc == 0 && LDNS_ANCOUNT(sldns_buffer_begin(r->
1019                         query_reply.c->buffer)) == 0)
1020                         m->s.env->mesh->ans_nodata++;
1021         }
1022 }
1023
1024 void mesh_query_done(struct mesh_state* mstate)
1025 {
1026         struct mesh_reply* r;
1027         struct mesh_reply* prev = NULL;
1028         struct mesh_cb* c;
1029         struct reply_info* rep = (mstate->s.return_msg?
1030                 mstate->s.return_msg->rep:NULL);
1031         for(r = mstate->reply_list; r; r = r->next) {
1032                 mesh_send_reply(mstate, mstate->s.return_rcode, rep, r, prev);
1033                 prev = r;
1034         }
1035         mstate->replies_sent = 1;
1036         for(c = mstate->cb_list; c; c = c->next) {
1037                 mesh_do_callback(mstate, mstate->s.return_rcode, rep, c);
1038         }
1039 }
1040
1041 void mesh_walk_supers(struct mesh_area* mesh, struct mesh_state* mstate)
1042 {
1043         struct mesh_state_ref* ref;
1044         RBTREE_FOR(ref, struct mesh_state_ref*, &mstate->super_set)
1045         {
1046                 /* make super runnable */
1047                 (void)rbtree_insert(&mesh->run, &ref->s->run_node);
1048                 /* callback the function to inform super of result */
1049                 fptr_ok(fptr_whitelist_mod_inform_super(
1050                         mesh->mods.mod[ref->s->s.curmod]->inform_super));
1051                 (*mesh->mods.mod[ref->s->s.curmod]->inform_super)(&mstate->s, 
1052                         ref->s->s.curmod, &ref->s->s);
1053         }
1054 }
1055
1056 struct mesh_state* mesh_area_find(struct mesh_area* mesh,
1057         struct query_info* qinfo, uint16_t qflags, int prime, int valrec)
1058 {
1059         struct mesh_state key;
1060         struct mesh_state* result;
1061
1062         key.node.key = &key;
1063         key.s.is_priming = prime;
1064         key.s.is_valrec = valrec;
1065         key.s.qinfo = *qinfo;
1066         key.s.query_flags = qflags;
1067         /* We are searching for a similar mesh state when we DO want to
1068          * aggregate the state. Thus unique is set to NULL. (default when we
1069          * desire aggregation).*/
1070         key.unique = NULL;
1071         
1072         result = (struct mesh_state*)rbtree_search(&mesh->all, &key);
1073         return result;
1074 }
1075
1076 int mesh_state_add_cb(struct mesh_state* s, struct edns_data* edns,
1077         sldns_buffer* buf, mesh_cb_func_t cb, void* cb_arg,
1078         uint16_t qid, uint16_t qflags)
1079 {
1080         struct mesh_cb* r = regional_alloc(s->s.region, 
1081                 sizeof(struct mesh_cb));
1082         if(!r)
1083                 return 0;
1084         r->buf = buf;
1085         log_assert(fptr_whitelist_mesh_cb(cb)); /* early failure ifmissing*/
1086         r->cb = cb;
1087         r->cb_arg = cb_arg;
1088         r->edns = *edns;
1089         if(edns->opt_list) {
1090                 r->edns.opt_list = edns_opt_copy_region(edns->opt_list,
1091                         s->s.region);
1092                 if(!r->edns.opt_list)
1093                         return 0;
1094         }
1095         r->qid = qid;
1096         r->qflags = qflags;
1097         r->next = s->cb_list;
1098         s->cb_list = r;
1099         return 1;
1100
1101 }
1102
1103 int mesh_state_add_reply(struct mesh_state* s, struct edns_data* edns,
1104         struct comm_reply* rep, uint16_t qid, uint16_t qflags,
1105         const struct query_info* qinfo)
1106 {
1107         struct mesh_reply* r = regional_alloc(s->s.region, 
1108                 sizeof(struct mesh_reply));
1109         if(!r)
1110                 return 0;
1111         r->query_reply = *rep;
1112         r->edns = *edns;
1113         if(edns->opt_list) {
1114                 r->edns.opt_list = edns_opt_copy_region(edns->opt_list,
1115                         s->s.region);
1116                 if(!r->edns.opt_list)
1117                         return 0;
1118         }
1119         r->qid = qid;
1120         r->qflags = qflags;
1121         r->start_time = *s->s.env->now_tv;
1122         r->next = s->reply_list;
1123         r->qname = regional_alloc_init(s->s.region, qinfo->qname,
1124                 s->s.qinfo.qname_len);
1125         if(!r->qname)
1126                 return 0;
1127
1128         /* Data related to local alias stored in 'qinfo' (if any) is ephemeral
1129          * and can be different for different original queries (even if the
1130          * replaced query name is the same).  So we need to make a deep copy
1131          * and store the copy for each reply info. */
1132         if(qinfo->local_alias) {
1133                 struct packed_rrset_data* d;
1134                 struct packed_rrset_data* dsrc;
1135                 r->local_alias = regional_alloc_zero(s->s.region,
1136                         sizeof(*qinfo->local_alias));
1137                 if(!r->local_alias)
1138                         return 0;
1139                 r->local_alias->rrset = regional_alloc_init(s->s.region,
1140                         qinfo->local_alias->rrset,
1141                         sizeof(*qinfo->local_alias->rrset));
1142                 if(!r->local_alias->rrset)
1143                         return 0;
1144                 dsrc = qinfo->local_alias->rrset->entry.data;
1145
1146                 /* In the current implementation, a local alias must be
1147                  * a single CNAME RR (see worker_handle_request()). */
1148                 log_assert(!qinfo->local_alias->next && dsrc->count == 1 &&
1149                         qinfo->local_alias->rrset->rk.type ==
1150                         htons(LDNS_RR_TYPE_CNAME));
1151                 /* Technically, we should make a local copy for the owner
1152                  * name of the RRset, but in the case of the first (and
1153                  * currently only) local alias RRset, the owner name should
1154                  * point to the qname of the corresponding query, which should
1155                  * be valid throughout the lifetime of this mesh_reply.  So
1156                  * we can skip copying. */
1157                 log_assert(qinfo->local_alias->rrset->rk.dname ==
1158                         sldns_buffer_at(rep->c->buffer, LDNS_HEADER_SIZE));
1159
1160                 d = regional_alloc_init(s->s.region, dsrc,
1161                         sizeof(struct packed_rrset_data)
1162                         + sizeof(size_t) + sizeof(uint8_t*) + sizeof(time_t));
1163                 if(!d)
1164                         return 0;
1165                 r->local_alias->rrset->entry.data = d;
1166                 d->rr_len = (size_t*)((uint8_t*)d +
1167                         sizeof(struct packed_rrset_data));
1168                 d->rr_data = (uint8_t**)&(d->rr_len[1]);
1169                 d->rr_ttl = (time_t*)&(d->rr_data[1]);
1170                 d->rr_len[0] = dsrc->rr_len[0];
1171                 d->rr_ttl[0] = dsrc->rr_ttl[0];
1172                 d->rr_data[0] = regional_alloc_init(s->s.region,
1173                         dsrc->rr_data[0], d->rr_len[0]);
1174                 if(!d->rr_data[0])
1175                         return 0;
1176         } else
1177                 r->local_alias = NULL;
1178
1179         s->reply_list = r;
1180         return 1;
1181 }
1182
1183 /**
1184  * Continue processing the mesh state at another module.
1185  * Handles module to modules tranfer of control.
1186  * Handles module finished.
1187  * @param mesh: the mesh area.
1188  * @param mstate: currently active mesh state.
1189  *      Deleted if finished, calls _done and _supers to 
1190  *      send replies to clients and inform other mesh states.
1191  *      This in turn may create additional runnable mesh states.
1192  * @param s: state at which the current module exited.
1193  * @param ev: the event sent to the module.
1194  *      returned is the event to send to the next module.
1195  * @return true if continue processing at the new module.
1196  *      false if not continued processing is needed.
1197  */
1198 static int
1199 mesh_continue(struct mesh_area* mesh, struct mesh_state* mstate,
1200         enum module_ext_state s, enum module_ev* ev)
1201 {
1202         mstate->num_activated++;
1203         if(mstate->num_activated > MESH_MAX_ACTIVATION) {
1204                 /* module is looping. Stop it. */
1205                 log_err("internal error: looping module stopped");
1206                 log_query_info(VERB_QUERY, "pass error for qstate",
1207                         &mstate->s.qinfo);
1208                 s = module_error;
1209         }
1210         if(s == module_wait_module || s == module_restart_next) {
1211                 /* start next module */
1212                 mstate->s.curmod++;
1213                 if(mesh->mods.num == mstate->s.curmod) {
1214                         log_err("Cannot pass to next module; at last module");
1215                         log_query_info(VERB_QUERY, "pass error for qstate",
1216                                 &mstate->s.qinfo);
1217                         mstate->s.curmod--;
1218                         return mesh_continue(mesh, mstate, module_error, ev);
1219                 }
1220                 if(s == module_restart_next) {
1221                         fptr_ok(fptr_whitelist_mod_clear(
1222                                 mesh->mods.mod[mstate->s.curmod]->clear));
1223                         (*mesh->mods.mod[mstate->s.curmod]->clear)
1224                                 (&mstate->s, mstate->s.curmod);
1225                         mstate->s.minfo[mstate->s.curmod] = NULL;
1226                 }
1227                 *ev = module_event_pass;
1228                 return 1;
1229         }
1230         if(s == module_wait_subquery && mstate->sub_set.count == 0) {
1231                 log_err("module cannot wait for subquery, subquery list empty");
1232                 log_query_info(VERB_QUERY, "pass error for qstate",
1233                         &mstate->s.qinfo);
1234                 s = module_error;
1235         }
1236         if(s == module_error && mstate->s.return_rcode == LDNS_RCODE_NOERROR) {
1237                 /* error is bad, handle pass back up below */
1238                 mstate->s.return_rcode = LDNS_RCODE_SERVFAIL;
1239         }
1240         if(s == module_error || s == module_finished) {
1241                 if(mstate->s.curmod == 0) {
1242                         mesh_query_done(mstate);
1243                         mesh_walk_supers(mesh, mstate);
1244                         mesh_state_delete(&mstate->s);
1245                         return 0;
1246                 }
1247                 /* pass along the locus of control */
1248                 mstate->s.curmod --;
1249                 *ev = module_event_moddone;
1250                 return 1;
1251         }
1252         return 0;
1253 }
1254
1255 void mesh_run(struct mesh_area* mesh, struct mesh_state* mstate,
1256         enum module_ev ev, struct outbound_entry* e)
1257 {
1258         enum module_ext_state s;
1259         verbose(VERB_ALGO, "mesh_run: start");
1260         while(mstate) {
1261                 /* run the module */
1262                 fptr_ok(fptr_whitelist_mod_operate(
1263                         mesh->mods.mod[mstate->s.curmod]->operate));
1264                 (*mesh->mods.mod[mstate->s.curmod]->operate)
1265                         (&mstate->s, ev, mstate->s.curmod, e);
1266
1267                 /* examine results */
1268                 mstate->s.reply = NULL;
1269                 regional_free_all(mstate->s.env->scratch);
1270                 s = mstate->s.ext_state[mstate->s.curmod];
1271                 verbose(VERB_ALGO, "mesh_run: %s module exit state is %s", 
1272                         mesh->mods.mod[mstate->s.curmod]->name, strextstate(s));
1273                 e = NULL;
1274                 if(mesh_continue(mesh, mstate, s, &ev))
1275                         continue;
1276
1277                 /* run more modules */
1278                 ev = module_event_pass;
1279                 if(mesh->run.count > 0) {
1280                         /* pop random element off the runnable tree */
1281                         mstate = (struct mesh_state*)mesh->run.root->key;
1282                         (void)rbtree_delete(&mesh->run, mstate);
1283                 } else mstate = NULL;
1284         }
1285         if(verbosity >= VERB_ALGO) {
1286                 mesh_stats(mesh, "mesh_run: end");
1287                 mesh_log_list(mesh);
1288         }
1289 }
1290
1291 void 
1292 mesh_log_list(struct mesh_area* mesh)
1293 {
1294         char buf[30];
1295         struct mesh_state* m;
1296         int num = 0;
1297         RBTREE_FOR(m, struct mesh_state*, &mesh->all) {
1298                 snprintf(buf, sizeof(buf), "%d%s%s%s%s%s%s mod%d %s%s", 
1299                         num++, (m->s.is_priming)?"p":"",  /* prime */
1300                         (m->s.is_valrec)?"v":"",  /* prime */
1301                         (m->s.query_flags&BIT_RD)?"RD":"",
1302                         (m->s.query_flags&BIT_CD)?"CD":"",
1303                         (m->super_set.count==0)?"d":"", /* detached */
1304                         (m->sub_set.count!=0)?"c":"",  /* children */
1305                         m->s.curmod, (m->reply_list)?"rep":"", /*hasreply*/
1306                         (m->cb_list)?"cb":"" /* callbacks */
1307                         ); 
1308                 log_query_info(VERB_ALGO, buf, &m->s.qinfo);
1309         }
1310 }
1311
1312 void 
1313 mesh_stats(struct mesh_area* mesh, const char* str)
1314 {
1315         verbose(VERB_DETAIL, "%s %u recursion states (%u with reply, "
1316                 "%u detached), %u waiting replies, %u recursion replies "
1317                 "sent, %d replies dropped, %d states jostled out", 
1318                 str, (unsigned)mesh->all.count, 
1319                 (unsigned)mesh->num_reply_states,
1320                 (unsigned)mesh->num_detached_states,
1321                 (unsigned)mesh->num_reply_addrs,
1322                 (unsigned)mesh->replies_sent,
1323                 (unsigned)mesh->stats_dropped,
1324                 (unsigned)mesh->stats_jostled);
1325         if(mesh->replies_sent > 0) {
1326                 struct timeval avg;
1327                 timeval_divide(&avg, &mesh->replies_sum_wait, 
1328                         mesh->replies_sent);
1329                 log_info("average recursion processing time "
1330                         ARG_LL "d.%6.6d sec",
1331                         (long long)avg.tv_sec, (int)avg.tv_usec);
1332                 log_info("histogram of recursion processing times");
1333                 timehist_log(mesh->histogram, "recursions");
1334         }
1335 }
1336
1337 void 
1338 mesh_stats_clear(struct mesh_area* mesh)
1339 {
1340         if(!mesh)
1341                 return;
1342         mesh->replies_sent = 0;
1343         mesh->replies_sum_wait.tv_sec = 0;
1344         mesh->replies_sum_wait.tv_usec = 0;
1345         mesh->stats_jostled = 0;
1346         mesh->stats_dropped = 0;
1347         timehist_clear(mesh->histogram);
1348         mesh->ans_secure = 0;
1349         mesh->ans_bogus = 0;
1350         memset(&mesh->ans_rcode[0], 0, sizeof(size_t)*16);
1351         mesh->ans_nodata = 0;
1352 }
1353
1354 size_t 
1355 mesh_get_mem(struct mesh_area* mesh)
1356 {
1357         struct mesh_state* m;
1358         size_t s = sizeof(*mesh) + sizeof(struct timehist) +
1359                 sizeof(struct th_buck)*mesh->histogram->num +
1360                 sizeof(sldns_buffer) + sldns_buffer_capacity(mesh->qbuf_bak);
1361         RBTREE_FOR(m, struct mesh_state*, &mesh->all) {
1362                 /* all, including m itself allocated in qstate region */
1363                 s += regional_get_mem(m->s.region);
1364         }
1365         return s;
1366 }
1367
1368 int 
1369 mesh_detect_cycle(struct module_qstate* qstate, struct query_info* qinfo,
1370         uint16_t flags, int prime, int valrec)
1371 {
1372         struct mesh_area* mesh = qstate->env->mesh;
1373         struct mesh_state* dep_m = NULL;
1374         if(!mesh_state_is_unique(qstate->mesh_info))
1375                 dep_m = mesh_area_find(mesh, qinfo, flags, prime, valrec);
1376         return mesh_detect_cycle_found(qstate, dep_m);
1377 }
1378
1379 void mesh_list_insert(struct mesh_state* m, struct mesh_state** fp,
1380         struct mesh_state** lp)
1381 {
1382         /* insert as last element */
1383         m->prev = *lp;
1384         m->next = NULL;
1385         if(*lp)
1386                 (*lp)->next = m;
1387         else    *fp = m;
1388         *lp = m;
1389 }
1390
1391 void mesh_list_remove(struct mesh_state* m, struct mesh_state** fp,
1392         struct mesh_state** lp)
1393 {
1394         if(m->next)
1395                 m->next->prev = m->prev;
1396         else    *lp = m->prev;
1397         if(m->prev)
1398                 m->prev->next = m->next;
1399         else    *fp = m->next;
1400 }