]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - contrib/ntp/libntp/ntp_intres.c
Fix multiple vulnerabilities of ntp.
[FreeBSD/releng/10.2.git] / contrib / ntp / libntp / ntp_intres.c
1 /*
2  * ntp_intres.c - Implements a generic blocking worker child or thread,
3  *                initially to provide a nonblocking solution for DNS
4  *                name to address lookups available with getaddrinfo().
5  *
6  * This is a new implementation as of 2009 sharing the filename and
7  * very little else with the prior implementation, which used a
8  * temporary file to receive a single set of requests from the parent,
9  * and a NTP mode 7 authenticated request to push back responses.
10  *
11  * A primary goal in rewriting this code was the need to support the
12  * pool configuration directive's requirement to retrieve multiple
13  * addresses resolving a single name, which has previously been
14  * satisfied with blocking resolver calls from the ntpd mainline code.
15  *
16  * A secondary goal is to provide a generic mechanism for other
17  * blocking operations to be delegated to a worker using a common
18  * model for both Unix and Windows ntpd.  ntp_worker.c, work_fork.c,
19  * and work_thread.c implement the generic mechanism.  This file
20  * implements the two current consumers, getaddrinfo_sometime() and the
21  * presently unused getnameinfo_sometime().
22  *
23  * Both routines deliver results to a callback and manage memory
24  * allocation, meaning there is no freeaddrinfo_sometime().
25  *
26  * The initial implementation for Unix uses a pair of unidirectional
27  * pipes, one each for requests and responses, connecting the forked
28  * blocking child worker with the ntpd mainline.  The threaded code
29  * uses arrays of pointers to queue requests and responses.
30  *
31  * The parent drives the process, including scheduling sleeps between
32  * retries.
33  *
34  * Memory is managed differently for a child process, which mallocs
35  * request buffers to read from the pipe into, whereas the threaded
36  * code mallocs a copy of the request to hand off to the worker via
37  * the queueing array.  The resulting request buffer is free()d by
38  * platform-independent code.  A wrinkle is the request needs to be
39  * available to the requestor during response processing.
40  *
41  * Response memory allocation is also platform-dependent.  With a
42  * separate process and pipes, the response is free()d after being
43  * written to the pipe.  With threads, the same memory is handed
44  * over and the requestor frees it after processing is completed.
45  *
46  * The code should be generalized to support threads on Unix using
47  * much of the same code used for Windows initially.
48  *
49  */
50 #ifdef HAVE_CONFIG_H
51 # include <config.h>
52 #endif
53
54 #include "ntp_workimpl.h"
55
56 #ifdef WORKER
57
58 #include <stdio.h>
59 #include <ctype.h>
60 #include <signal.h>
61
62 /**/
63 #ifdef HAVE_SYS_TYPES_H
64 # include <sys/types.h>
65 #endif
66 #ifdef HAVE_NETINET_IN_H
67 #include <netinet/in.h>
68 #endif
69 #include <arpa/inet.h>
70 /**/
71 #ifdef HAVE_SYS_PARAM_H
72 # include <sys/param.h>
73 #endif
74
75 #if !defined(HAVE_RES_INIT) && defined(HAVE___RES_INIT)
76 # define HAVE_RES_INIT
77 #endif
78
79 #if defined(HAVE_RESOLV_H) && defined(HAVE_RES_INIT)
80 # ifdef HAVE_ARPA_NAMESER_H
81 #  include <arpa/nameser.h> /* DNS HEADER struct */
82 # endif
83 # ifdef HAVE_NETDB_H
84 #  include <netdb.h>
85 # endif
86 # include <resolv.h>
87 # ifdef HAVE_INT32_ONLY_WITH_DNS
88 #  define HAVE_INT32
89 # endif
90 # ifdef HAVE_U_INT32_ONLY_WITH_DNS
91 #  define HAVE_U_INT32
92 # endif
93 #endif
94
95 #include "ntp.h"
96 #include "ntp_debug.h"
97 #include "ntp_malloc.h"
98 #include "ntp_syslog.h"
99 #include "ntp_unixtime.h"
100 #include "ntp_intres.h"
101 #include "intreswork.h"
102
103
104 /*
105  * Following are implementations of getaddrinfo_sometime() and
106  * getnameinfo_sometime().  Each is implemented in three routines:
107  *
108  * getaddrinfo_sometime()               getnameinfo_sometime()
109  * blocking_getaddrinfo()               blocking_getnameinfo()
110  * getaddrinfo_sometime_complete()      getnameinfo_sometime_complete()
111  *
112  * The first runs in the parent and marshalls (or serializes) request
113  * parameters into a request blob which is processed in the child by
114  * the second routine, blocking_*(), which serializes the results into
115  * a response blob unpacked by the third routine, *_complete(), which
116  * calls the callback routine provided with the request and frees
117  * _request_ memory allocated by the first routine.  Response memory
118  * is managed by the code which calls the *_complete routines.
119  */
120
121
122 /* === typedefs === */
123 typedef struct blocking_gai_req_tag {   /* marshalled args */
124         size_t                  octets;
125         u_int                   dns_idx;
126         time_t                  scheduled;
127         time_t                  earliest;
128         int                     retry;
129         struct addrinfo         hints;
130         u_int                   qflags;
131         gai_sometime_callback   callback;
132         void *                  context;
133         size_t                  nodesize;
134         size_t                  servsize;
135 } blocking_gai_req;
136
137 typedef struct blocking_gai_resp_tag {
138         size_t                  octets;
139         int                     retcode;
140         int                     retry;
141         int                     gai_errno; /* for EAI_SYSTEM case */
142         int                     ai_count;
143         /*
144          * Followed by ai_count struct addrinfo and then ai_count
145          * sockaddr_u and finally the canonical name strings.
146          */
147 } blocking_gai_resp;
148
149 typedef struct blocking_gni_req_tag {
150         size_t                  octets;
151         u_int                   dns_idx;
152         time_t                  scheduled;
153         time_t                  earliest;
154         int                     retry;
155         size_t                  hostoctets;
156         size_t                  servoctets;
157         int                     flags;
158         gni_sometime_callback   callback;
159         void *                  context;
160         sockaddr_u              socku;
161 } blocking_gni_req;
162
163 typedef struct blocking_gni_resp_tag {
164         size_t                  octets;
165         int                     retcode;
166         int                     gni_errno; /* for EAI_SYSTEM case */
167         int                     retry;
168         size_t                  hostoctets;
169         size_t                  servoctets;
170         /*
171          * Followed by hostoctets bytes of null-terminated host,
172          * then servoctets bytes of null-terminated service.
173          */
174 } blocking_gni_resp;
175
176 /* per-DNS-worker state in parent */
177 typedef struct dnschild_ctx_tag {
178         u_int   index;
179         time_t  next_dns_timeslot;
180 } dnschild_ctx;
181
182 /* per-DNS-worker state in worker */
183 typedef struct dnsworker_ctx_tag {
184         blocking_child *        c;
185         time_t                  ignore_scheduled_before;
186 #ifdef HAVE_RES_INIT
187         time_t  next_res_init;
188 #endif
189 } dnsworker_ctx;
190
191
192 /* === variables === */
193 dnschild_ctx **         dnschild_contexts;              /* parent */
194 u_int                   dnschild_contexts_alloc;
195 dnsworker_ctx **        dnsworker_contexts;             /* child */
196 u_int                   dnsworker_contexts_alloc;
197
198 #ifdef HAVE_RES_INIT
199 static  time_t          next_res_init;
200 #endif
201
202
203 /* === forward declarations === */
204 static  u_int           reserve_dnschild_ctx(void);
205 static  u_int           get_dnschild_ctx(void);
206 static  dnsworker_ctx * get_worker_context(blocking_child *, u_int);
207 static  void            scheduled_sleep(time_t, time_t,
208                                         dnsworker_ctx *);
209 static  void            manage_dns_retry_interval(time_t *, time_t *,
210                                                   int *, time_t *,
211                                                   int/*BOOL*/);
212 static  int             should_retry_dns(int, int);
213 #ifdef HAVE_RES_INIT
214 static  void            reload_resolv_conf(dnsworker_ctx *);
215 #else
216 # define                reload_resolv_conf(wc)          \
217         do {                                            \
218                 (void)(wc);                             \
219         } while (FALSE)
220 #endif
221 static  void            getaddrinfo_sometime_complete(blocking_work_req,
222                                                       void *, size_t,
223                                                       void *);
224 static  void            getnameinfo_sometime_complete(blocking_work_req,
225                                                       void *, size_t,
226                                                       void *);
227
228
229 /* === functions === */
230 /*
231  * getaddrinfo_sometime - uses blocking child to call getaddrinfo then
232  *                        invokes provided callback completion function.
233  */
234 int
235 getaddrinfo_sometime_ex(
236         const char *            node,
237         const char *            service,
238         const struct addrinfo * hints,
239         int                     retry,
240         gai_sometime_callback   callback,
241         void *                  context,
242         u_int                   qflags
243         )
244 {
245         blocking_gai_req *      gai_req;
246         u_int                   idx;
247         dnschild_ctx *          child_ctx;
248         size_t                  req_size;
249         size_t                  nodesize;
250         size_t                  servsize;
251         time_t                  now;
252         
253         REQUIRE(NULL != node);
254         if (NULL != hints) {
255                 REQUIRE(0 == hints->ai_addrlen);
256                 REQUIRE(NULL == hints->ai_addr);
257                 REQUIRE(NULL == hints->ai_canonname);
258                 REQUIRE(NULL == hints->ai_next);
259         }
260
261         idx = get_dnschild_ctx();
262         child_ctx = dnschild_contexts[idx];
263
264         nodesize = strlen(node) + 1;
265         servsize = strlen(service) + 1;
266         req_size = sizeof(*gai_req) + nodesize + servsize;
267
268         gai_req = emalloc_zero(req_size);
269
270         gai_req->octets = req_size;
271         gai_req->dns_idx = idx;
272         now = time(NULL);
273         gai_req->scheduled = now;
274         gai_req->earliest = max(now, child_ctx->next_dns_timeslot);
275         child_ctx->next_dns_timeslot = gai_req->earliest;
276         if (hints != NULL)
277                 gai_req->hints = *hints;
278         gai_req->retry = retry;
279         gai_req->callback = callback;
280         gai_req->context = context;
281         gai_req->nodesize = nodesize;
282         gai_req->servsize = servsize;
283         gai_req->qflags = qflags;
284
285         memcpy((char *)gai_req + sizeof(*gai_req), node, nodesize);
286         memcpy((char *)gai_req + sizeof(*gai_req) + nodesize, service,
287                servsize);
288
289         if (queue_blocking_request(
290                 BLOCKING_GETADDRINFO,
291                 gai_req,
292                 req_size, 
293                 &getaddrinfo_sometime_complete, 
294                 gai_req)) {
295
296                 msyslog(LOG_ERR, "unable to queue getaddrinfo request");
297                 errno = EFAULT;
298                 return -1;
299         }
300
301         return 0;
302 }
303
304 int
305 blocking_getaddrinfo(
306         blocking_child *        c,
307         blocking_pipe_header *  req
308         )
309 {
310         blocking_gai_req *      gai_req;
311         dnsworker_ctx *         worker_ctx;
312         blocking_pipe_header *  resp;
313         blocking_gai_resp *     gai_resp;
314         char *                  node;
315         char *                  service;
316         struct addrinfo *       ai_res;
317         struct addrinfo *       ai;
318         struct addrinfo *       serialized_ai;
319         size_t                  canons_octets;
320         size_t                  this_octets;
321         size_t                  resp_octets;
322         char *                  cp;
323         time_t                  time_now;
324
325         gai_req = (void *)((char *)req + sizeof(*req));
326         node = (char *)gai_req + sizeof(*gai_req);
327         service = node + gai_req->nodesize;
328
329         worker_ctx = get_worker_context(c, gai_req->dns_idx);
330         scheduled_sleep(gai_req->scheduled, gai_req->earliest,
331                         worker_ctx);
332         reload_resolv_conf(worker_ctx);
333
334         /*
335          * Take a shot at the final size, better to overestimate
336          * at first and then realloc to a smaller size.
337          */
338
339         resp_octets = sizeof(*resp) + sizeof(*gai_resp) +
340                       16 * (sizeof(struct addrinfo) +
341                             sizeof(sockaddr_u)) +
342                       256;
343         resp = emalloc_zero(resp_octets);
344         gai_resp = (void *)(resp + 1);
345
346         TRACE(2, ("blocking_getaddrinfo given node %s serv %s fam %d flags %x\n", 
347                   node, service, gai_req->hints.ai_family,
348                   gai_req->hints.ai_flags));
349 #ifdef DEBUG
350         if (debug >= 2)
351                 fflush(stdout);
352 #endif  
353         ai_res = NULL;
354         gai_resp->retcode = getaddrinfo(node, service, &gai_req->hints,
355                                         &ai_res);
356         gai_resp->retry = gai_req->retry;
357 #ifdef EAI_SYSTEM
358         if (EAI_SYSTEM == gai_resp->retcode)
359                 gai_resp->gai_errno = errno;
360 #endif
361         canons_octets = 0;
362
363         if (0 == gai_resp->retcode) {
364                 ai = ai_res;
365                 while (NULL != ai) {
366                         gai_resp->ai_count++;
367                         if (ai->ai_canonname)
368                                 canons_octets += strlen(ai->ai_canonname) + 1;
369                         ai = ai->ai_next;
370                 }
371                 /*
372                  * If this query succeeded only after retrying, DNS may have
373                  * just become responsive.  Ignore previously-scheduled
374                  * retry sleeps once for each pending request, similar to
375                  * the way scheduled_sleep() does when its worker_sleep()
376                  * is interrupted.
377                  */
378                 if (gai_resp->retry > INITIAL_DNS_RETRY) {
379                         time_now = time(NULL);
380                         worker_ctx->ignore_scheduled_before = time_now;
381                         TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
382                                   humantime(time_now)));
383                 }
384         }
385
386         /*
387          * Our response consists of a header, followed by ai_count 
388          * addrinfo structs followed by ai_count sockaddr_storage 
389          * structs followed by the canonical names.
390          */
391         gai_resp->octets = sizeof(*gai_resp)
392                             + gai_resp->ai_count
393                                 * (sizeof(gai_req->hints)
394                                    + sizeof(sockaddr_u))
395                             + canons_octets;
396
397         resp_octets = sizeof(*resp) + gai_resp->octets;
398         resp = erealloc(resp, resp_octets);
399         gai_resp = (void *)(resp + 1);
400
401         /* cp serves as our current pointer while serializing */
402         cp = (void *)(gai_resp + 1);
403         canons_octets = 0;
404
405         if (0 == gai_resp->retcode) {
406                 ai = ai_res;
407                 while (NULL != ai) {
408                         memcpy(cp, ai, sizeof(*ai));
409                         serialized_ai = (void *)cp;
410                         cp += sizeof(*ai);
411
412                         /* transform ai_canonname into offset */
413                         if (NULL != serialized_ai->ai_canonname) {
414                                 serialized_ai->ai_canonname = (char *)canons_octets;
415                                 canons_octets += strlen(ai->ai_canonname) + 1;
416                         }
417                         
418                         /* leave fixup of ai_addr pointer for receiver */
419
420                         ai = ai->ai_next;
421                 }
422
423                 ai = ai_res;
424                 while (NULL != ai) {
425                         INSIST(ai->ai_addrlen <= sizeof(sockaddr_u));
426                         memcpy(cp, ai->ai_addr, ai->ai_addrlen);
427                         cp += sizeof(sockaddr_u);
428
429                         ai = ai->ai_next;
430                 }
431
432                 ai = ai_res;
433                 while (NULL != ai) {
434                         if (NULL != ai->ai_canonname) {
435                                 this_octets = strlen(ai->ai_canonname) + 1;
436                                 memcpy(cp, ai->ai_canonname, this_octets);
437                                 cp += this_octets;
438                         }
439
440                         ai = ai->ai_next;
441                 }
442                 freeaddrinfo(ai_res);
443         }
444
445         /*
446          * make sure our walk and earlier calc match
447          */
448         DEBUG_INSIST((size_t)(cp - (char *)resp) == resp_octets);
449
450         if (queue_blocking_response(c, resp, resp_octets, req)) {
451                 msyslog(LOG_ERR, "blocking_getaddrinfo can not queue response");
452                 return -1;
453         }
454
455         return 0;
456 }
457
458 int
459 getaddrinfo_sometime(
460         const char *            node,
461         const char *            service,
462         const struct addrinfo * hints,
463         int                     retry,
464         gai_sometime_callback   callback,
465         void *                  context
466         )
467 {
468         return getaddrinfo_sometime_ex(node, service, hints, retry,
469                                        callback, context, 0);
470 }
471
472
473 static void
474 getaddrinfo_sometime_complete(
475         blocking_work_req       rtype,
476         void *                  context,
477         size_t                  respsize,
478         void *                  resp
479         )
480 {
481         blocking_gai_req *      gai_req;
482         blocking_gai_resp *     gai_resp;
483         dnschild_ctx *          child_ctx;
484         struct addrinfo *       ai;
485         struct addrinfo *       next_ai;
486         sockaddr_u *            psau;
487         char *                  node;
488         char *                  service;
489         char *                  canon_start;
490         time_t                  time_now;
491         int                     again, noerr;
492         int                     af;
493         const char *            fam_spec;
494         int                     i;
495
496         gai_req = context;
497         gai_resp = resp;
498
499         DEBUG_REQUIRE(BLOCKING_GETADDRINFO == rtype);
500         DEBUG_REQUIRE(respsize == gai_resp->octets);
501
502         node = (char *)gai_req + sizeof(*gai_req);
503         service = node + gai_req->nodesize;
504
505         child_ctx = dnschild_contexts[gai_req->dns_idx];
506
507         if (0 == gai_resp->retcode) {
508                 /*
509                  * If this query succeeded only after retrying, DNS may have
510                  * just become responsive.
511                  */
512                 if (gai_resp->retry > INITIAL_DNS_RETRY) {
513                         time_now = time(NULL);
514                         child_ctx->next_dns_timeslot = time_now;
515                         TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
516                                   gai_req->dns_idx, humantime(time_now)));
517                 }
518         } else {
519                 noerr = !!(gai_req->qflags & GAIR_F_IGNDNSERR);
520                 again = noerr || should_retry_dns(
521                                         gai_resp->retcode, gai_resp->gai_errno);
522                 /*
523                  * exponential backoff of DNS retries to 64s
524                  */
525                 if (gai_req->retry > 0 && again) {
526                         /* log the first retry only */
527                         if (INITIAL_DNS_RETRY == gai_req->retry)
528                                 NLOG(NLOG_SYSINFO) {
529                                         af = gai_req->hints.ai_family;
530                                         fam_spec = (AF_INET6 == af)
531                                                        ? " (AAAA)"
532                                                        : (AF_INET == af)
533                                                              ? " (A)"
534                                                              : "";
535 #ifdef EAI_SYSTEM
536                                         if (EAI_SYSTEM == gai_resp->retcode) {
537                                                 errno = gai_resp->gai_errno;
538                                                 msyslog(LOG_INFO,
539                                                         "retrying DNS %s%s: EAI_SYSTEM %d: %m",
540                                                         node, fam_spec,
541                                                         gai_resp->gai_errno);
542                                         } else
543 #endif
544                                                 msyslog(LOG_INFO,
545                                                         "retrying DNS %s%s: %s (%d)",
546                                                         node, fam_spec,
547                                                         gai_strerror(gai_resp->retcode),
548                                                         gai_resp->retcode);
549                                 }
550                         manage_dns_retry_interval(
551                                 &gai_req->scheduled, &gai_req->earliest,
552                                 &gai_req->retry, &child_ctx->next_dns_timeslot,
553                                 noerr);
554                         if (!queue_blocking_request(
555                                         BLOCKING_GETADDRINFO,
556                                         gai_req,
557                                         gai_req->octets,
558                                         &getaddrinfo_sometime_complete,
559                                         gai_req))
560                                 return;
561                         else
562                                 msyslog(LOG_ERR,
563                                         "unable to retry hostname %s",
564                                         node);
565                 }
566         }
567
568         /*
569          * fixup pointers in returned addrinfo array
570          */
571         ai = (void *)((char *)gai_resp + sizeof(*gai_resp));
572         next_ai = NULL;
573         for (i = gai_resp->ai_count - 1; i >= 0; i--) {
574                 ai[i].ai_next = next_ai;
575                 next_ai = &ai[i];
576         }
577
578         psau = (void *)((char *)ai + gai_resp->ai_count * sizeof(*ai));
579         canon_start = (char *)psau + gai_resp->ai_count * sizeof(*psau);
580
581         for (i = 0; i < gai_resp->ai_count; i++) {
582                 if (NULL != ai[i].ai_addr)
583                         ai[i].ai_addr = &psau->sa;
584                 psau++;
585                 if (NULL != ai[i].ai_canonname)
586                         ai[i].ai_canonname += (size_t)canon_start;
587         }
588
589         ENSURE((char *)psau == canon_start);
590
591         if (!gai_resp->ai_count)
592                 ai = NULL;
593         
594         (*gai_req->callback)(gai_resp->retcode, gai_resp->gai_errno,
595                              gai_req->context, node, service, 
596                              &gai_req->hints, ai);
597
598         free(gai_req);
599         /* gai_resp is part of block freed by process_blocking_resp() */
600 }
601
602
603 #ifdef TEST_BLOCKING_WORKER
604 void gai_test_callback(int rescode, int gai_errno, void *context, const char *name, const char *service, const struct addrinfo *hints, const struct addrinfo *ai_res)
605 {
606         sockaddr_u addr;
607
608         if (rescode) {
609                 TRACE(1, ("gai_test_callback context %p error rescode %d %s serv %s\n",
610                           context, rescode, name, service));
611                 return;
612         }
613         while (!rescode && NULL != ai_res) {
614                 ZERO_SOCK(&addr);
615                 memcpy(&addr, ai_res->ai_addr, ai_res->ai_addrlen);
616                 TRACE(1, ("ctx %p fam %d addr %s canon '%s' type %s at %p ai_addr %p ai_next %p\n", 
617                           context,
618                           AF(&addr),
619                           stoa(&addr), 
620                           (ai_res->ai_canonname)
621                               ? ai_res->ai_canonname
622                               : "",
623                           (SOCK_DGRAM == ai_res->ai_socktype) 
624                               ? "DGRAM" 
625                               : (SOCK_STREAM == ai_res->ai_socktype) 
626                                     ? "STREAM" 
627                                     : "(other)",
628                           ai_res,
629                           ai_res->ai_addr,
630                           ai_res->ai_next));
631
632                 getnameinfo_sometime((sockaddr_u *)ai_res->ai_addr, 128, 32, 0, gni_test_callback, context);
633
634                 ai_res = ai_res->ai_next;
635         }
636 }
637 #endif  /* TEST_BLOCKING_WORKER */
638
639
640 int
641 getnameinfo_sometime(
642         sockaddr_u *            psau,
643         size_t                  hostoctets,
644         size_t                  servoctets,
645         int                     flags,
646         gni_sometime_callback   callback,
647         void *                  context
648         )
649 {
650         blocking_gni_req *      gni_req;
651         u_int                   idx;
652         dnschild_ctx *          child_ctx;
653         time_t                  time_now;
654         
655         REQUIRE(hostoctets);
656         REQUIRE(hostoctets + servoctets < 1024);
657
658         idx = get_dnschild_ctx();
659         child_ctx = dnschild_contexts[idx];
660
661         gni_req = emalloc_zero(sizeof(*gni_req));
662
663         gni_req->octets = sizeof(*gni_req);
664         gni_req->dns_idx = idx;
665         time_now = time(NULL);
666         gni_req->scheduled = time_now;
667         gni_req->earliest = max(time_now, child_ctx->next_dns_timeslot);
668         child_ctx->next_dns_timeslot = gni_req->earliest;
669         memcpy(&gni_req->socku, psau, SOCKLEN(psau));
670         gni_req->hostoctets = hostoctets;
671         gni_req->servoctets = servoctets;
672         gni_req->flags = flags;
673         gni_req->retry = INITIAL_DNS_RETRY;
674         gni_req->callback = callback;
675         gni_req->context = context;
676
677         if (queue_blocking_request(
678                 BLOCKING_GETNAMEINFO,
679                 gni_req,
680                 sizeof(*gni_req), 
681                 &getnameinfo_sometime_complete, 
682                 gni_req)) {
683
684                 msyslog(LOG_ERR, "unable to queue getnameinfo request");
685                 errno = EFAULT;
686                 return -1;
687         }
688
689         return 0;
690 }
691
692
693 int
694 blocking_getnameinfo(
695         blocking_child *        c,
696         blocking_pipe_header *  req
697         )
698 {
699         blocking_gni_req *      gni_req;
700         dnsworker_ctx *         worker_ctx;
701         blocking_pipe_header *  resp;
702         blocking_gni_resp *     gni_resp;
703         size_t                  octets;
704         size_t                  resp_octets;
705         char *                  service;
706         char *                  cp;
707         int                     rc;
708         time_t                  time_now;
709         char                    host[1024];
710
711         gni_req = (void *)((char *)req + sizeof(*req));
712
713         octets = gni_req->hostoctets + gni_req->servoctets;
714
715         /*
716          * Some alloca() implementations are fragile regarding
717          * large allocations.  We only need room for the host
718          * and service names.
719          */
720         REQUIRE(octets < sizeof(host));
721         service = host + gni_req->hostoctets;
722
723         worker_ctx = get_worker_context(c, gni_req->dns_idx);
724         scheduled_sleep(gni_req->scheduled, gni_req->earliest,
725                         worker_ctx);
726         reload_resolv_conf(worker_ctx);
727
728         /*
729          * Take a shot at the final size, better to overestimate
730          * then realloc to a smaller size.
731          */
732
733         resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
734         resp = emalloc_zero(resp_octets);
735         gni_resp = (void *)((char *)resp + sizeof(*resp));
736
737         TRACE(2, ("blocking_getnameinfo given addr %s flags 0x%x hostlen %lu servlen %lu\n",
738                   stoa(&gni_req->socku), gni_req->flags,
739                   (u_long)gni_req->hostoctets, (u_long)gni_req->servoctets));
740         
741         gni_resp->retcode = getnameinfo(&gni_req->socku.sa,
742                                         SOCKLEN(&gni_req->socku),
743                                         host,
744                                         gni_req->hostoctets,
745                                         service,
746                                         gni_req->servoctets,
747                                         gni_req->flags);
748         gni_resp->retry = gni_req->retry;
749 #ifdef EAI_SYSTEM
750         if (EAI_SYSTEM == gni_resp->retcode)
751                 gni_resp->gni_errno = errno;
752 #endif
753
754         if (0 != gni_resp->retcode) {
755                 gni_resp->hostoctets = 0;
756                 gni_resp->servoctets = 0;
757         } else {
758                 gni_resp->hostoctets = strlen(host) + 1;
759                 gni_resp->servoctets = strlen(service) + 1;
760                 /*
761                  * If this query succeeded only after retrying, DNS may have
762                  * just become responsive.  Ignore previously-scheduled
763                  * retry sleeps once for each pending request, similar to
764                  * the way scheduled_sleep() does when its worker_sleep()
765                  * is interrupted.
766                  */
767                 if (gni_req->retry > INITIAL_DNS_RETRY) {
768                         time_now = time(NULL);
769                         worker_ctx->ignore_scheduled_before = time_now;
770                         TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
771                                 humantime(time_now)));
772                 }
773         }
774         octets = gni_resp->hostoctets + gni_resp->servoctets;
775         /*
776          * Our response consists of a header, followed by the host and
777          * service strings, each null-terminated.
778          */
779         resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
780
781         resp = erealloc(resp, resp_octets);
782         gni_resp = (void *)(resp + 1);
783
784         gni_resp->octets = sizeof(*gni_resp) + octets;
785
786         /* cp serves as our current pointer while serializing */
787         cp = (void *)(gni_resp + 1);
788
789         if (0 == gni_resp->retcode) {
790                 memcpy(cp, host, gni_resp->hostoctets);
791                 cp += gni_resp->hostoctets;
792                 memcpy(cp, service, gni_resp->servoctets);
793                 cp += gni_resp->servoctets;
794         }
795
796         INSIST((size_t)(cp - (char *)resp) == resp_octets);
797         INSIST(resp_octets - sizeof(*resp) == gni_resp->octets);
798
799         rc = queue_blocking_response(c, resp, resp_octets, req);
800         if (rc)
801                 msyslog(LOG_ERR, "blocking_getnameinfo unable to queue response");
802         return rc;
803 }
804
805
806 static void
807 getnameinfo_sometime_complete(
808         blocking_work_req       rtype,
809         void *                  context,
810         size_t                  respsize,
811         void *                  resp
812         )
813 {
814         blocking_gni_req *      gni_req;
815         blocking_gni_resp *     gni_resp;
816         dnschild_ctx *          child_ctx;
817         char *                  host;
818         char *                  service;
819         time_t                  time_now;
820         int                     again;
821
822         gni_req = context;
823         gni_resp = resp;
824
825         DEBUG_REQUIRE(BLOCKING_GETNAMEINFO == rtype);
826         DEBUG_REQUIRE(respsize == gni_resp->octets);
827
828         child_ctx = dnschild_contexts[gni_req->dns_idx];
829
830         if (0 == gni_resp->retcode) {
831                 /*
832                  * If this query succeeded only after retrying, DNS may have
833                  * just become responsive.
834                  */
835                 if (gni_resp->retry > INITIAL_DNS_RETRY) {
836                         time_now = time(NULL);
837                         child_ctx->next_dns_timeslot = time_now;
838                         TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
839                                   gni_req->dns_idx, humantime(time_now)));
840                 }
841         } else {
842                 again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
843                 /*
844                  * exponential backoff of DNS retries to 64s
845                  */
846                 if (gni_req->retry > 0)
847                         manage_dns_retry_interval(&gni_req->scheduled,
848                             &gni_req->earliest, &gni_req->retry,
849                                                   &child_ctx->next_dns_timeslot, FALSE);
850
851                 if (gni_req->retry > 0 && again) {
852                         if (!queue_blocking_request(
853                                 BLOCKING_GETNAMEINFO,
854                                 gni_req,
855                                 gni_req->octets, 
856                                 &getnameinfo_sometime_complete, 
857                                 gni_req))
858                                 return;
859
860                         msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
861                 }
862         }
863
864         if (!gni_resp->hostoctets) {
865                 host = NULL;
866                 service = NULL;
867         } else {
868                 host = (char *)gni_resp + sizeof(*gni_resp);
869                 service = (gni_resp->servoctets) 
870                               ? host + gni_resp->hostoctets
871                               : NULL;
872         }
873
874         (*gni_req->callback)(gni_resp->retcode, gni_resp->gni_errno,
875                              &gni_req->socku, gni_req->flags, host,
876                              service, gni_req->context);
877
878         free(gni_req);
879         /* gni_resp is part of block freed by process_blocking_resp() */
880 }
881
882
883 #ifdef TEST_BLOCKING_WORKER
884 void gni_test_callback(int rescode, int gni_errno, sockaddr_u *psau, int flags, const char *host, const char *service, void *context)
885 {
886         if (!rescode)
887                 TRACE(1, ("gni_test_callback got host '%s' serv '%s' for addr %s context %p\n", 
888                           host, service, stoa(psau), context));
889         else
890                 TRACE(1, ("gni_test_callback context %p rescode %d gni_errno %d flags 0x%x addr %s\n",
891                           context, rescode, gni_errno, flags, stoa(psau)));
892 }
893 #endif  /* TEST_BLOCKING_WORKER */
894
895
896 #ifdef HAVE_RES_INIT
897 static void
898 reload_resolv_conf(
899         dnsworker_ctx * worker_ctx
900         )
901 {
902         time_t  time_now;
903
904         /*
905          * This is ad-hoc.  Reload /etc/resolv.conf once per minute
906          * to pick up on changes from the DHCP client.  [Bug 1226]
907          * When using threads for the workers, this needs to happen
908          * only once per minute process-wide.
909          */
910         time_now = time(NULL);
911 # ifdef WORK_THREAD
912         worker_ctx->next_res_init = next_res_init;
913 # endif
914         if (worker_ctx->next_res_init <= time_now) {
915                 if (worker_ctx->next_res_init != 0)
916                         res_init();
917                 worker_ctx->next_res_init = time_now + 60;
918 # ifdef WORK_THREAD
919                 next_res_init = worker_ctx->next_res_init;
920 # endif
921         }
922 }
923 #endif  /* HAVE_RES_INIT */
924
925
926 static u_int
927 reserve_dnschild_ctx(void)
928 {
929         const size_t    ps = sizeof(dnschild_contexts[0]);
930         const size_t    cs = sizeof(*dnschild_contexts[0]);
931         u_int           c;
932         u_int           new_alloc;
933         size_t          octets;
934         size_t          new_octets;
935
936         c = 0;
937         while (TRUE) {
938                 for ( ; c < dnschild_contexts_alloc; c++) {
939                         if (NULL == dnschild_contexts[c]) {
940                                 dnschild_contexts[c] = emalloc_zero(cs);
941
942                                 return c;
943                         }
944                 }
945                 new_alloc = dnschild_contexts_alloc + 20;
946                 new_octets = new_alloc * ps;
947                 octets = dnschild_contexts_alloc * ps;
948                 dnschild_contexts = erealloc_zero(dnschild_contexts,
949                                                   new_octets, octets);
950                 dnschild_contexts_alloc = new_alloc;
951         }
952 }
953
954
955 static u_int
956 get_dnschild_ctx(void)
957 {
958         static u_int    shared_ctx = UINT_MAX;
959
960         if (worker_per_query)
961                 return reserve_dnschild_ctx();
962
963         if (UINT_MAX == shared_ctx)
964                 shared_ctx = reserve_dnschild_ctx();
965
966         return shared_ctx;
967 }
968
969
970 static dnsworker_ctx *
971 get_worker_context(
972         blocking_child *        c,
973         u_int                   idx
974         )
975 {
976         u_int           min_new_alloc;
977         u_int           new_alloc;
978         size_t          octets;
979         size_t          new_octets;
980         dnsworker_ctx * retv;
981
982         worker_global_lock(TRUE);
983         
984         if (dnsworker_contexts_alloc <= idx) {
985                 min_new_alloc = 1 + idx;
986                 /* round new_alloc up to nearest multiple of 4 */
987                 new_alloc = (min_new_alloc + 4) & ~(4 - 1);
988                 new_octets = new_alloc * sizeof(dnsworker_ctx*);
989                 octets = dnsworker_contexts_alloc * sizeof(dnsworker_ctx*);
990                 dnsworker_contexts = erealloc_zero(dnsworker_contexts,
991                                                    new_octets, octets);
992                 dnsworker_contexts_alloc = new_alloc;
993                 retv = emalloc_zero(sizeof(dnsworker_ctx));
994                 dnsworker_contexts[idx] = retv;
995         } else if (NULL == (retv = dnsworker_contexts[idx])) {
996                 retv = emalloc_zero(sizeof(dnsworker_ctx));
997                 dnsworker_contexts[idx] = retv;
998         }
999         
1000         worker_global_lock(FALSE);
1001         
1002         ZERO(*retv);
1003         retv->c = c;
1004         return retv;
1005 }
1006
1007
1008 static void
1009 scheduled_sleep(
1010         time_t          scheduled,
1011         time_t          earliest,
1012         dnsworker_ctx * worker_ctx
1013         )
1014 {
1015         time_t now;
1016
1017         if (scheduled < worker_ctx->ignore_scheduled_before) {
1018                 TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
1019                           humantime(earliest), humantime(scheduled),
1020                           humantime(worker_ctx->ignore_scheduled_before)));
1021                 return;
1022         }
1023
1024         now = time(NULL);
1025
1026         if (now < earliest) {
1027                 TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
1028                           humantime(earliest), humantime(scheduled),
1029                           humantime(worker_ctx->ignore_scheduled_before)));
1030                 if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
1031                         /* our sleep was interrupted */
1032                         now = time(NULL);
1033                         worker_ctx->ignore_scheduled_before = now;
1034 #ifdef HAVE_RES_INIT
1035                         worker_ctx->next_res_init = now + 60;
1036                         next_res_init = worker_ctx->next_res_init;
1037                         res_init();
1038 #endif
1039                         TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
1040                                   humantime(worker_ctx->ignore_scheduled_before)));
1041                 }
1042         }
1043 }
1044
1045
1046 /*
1047  * manage_dns_retry_interval is a helper used by
1048  * getaddrinfo_sometime_complete and getnameinfo_sometime_complete
1049  * to calculate the new retry interval and schedule the next query.
1050  */
1051 static void
1052 manage_dns_retry_interval(
1053         time_t *        pscheduled,
1054         time_t *        pwhen,
1055         int *           pretry,
1056         time_t *        pnext_timeslot,
1057         int             forever
1058         )
1059 {
1060         time_t  now;
1061         time_t  when;
1062         int     retry;
1063         int     retmax;
1064                 
1065         now = time(NULL);
1066         retry = *pretry;
1067         when = max(now + retry, *pnext_timeslot);
1068         *pnext_timeslot = when;
1069
1070         /* this exponential backoff is slower than doubling up: The
1071          * sequence goes 2-3-4-6-8-12-16-24-32... and the upper limit is
1072          * 64 seconds for things that should not repeat forever, and
1073          * 1024 when repeated forever.
1074          */
1075         retmax = forever ? 1024 : 64;
1076         retry <<= 1;
1077         if (retry & (retry - 1))
1078                 retry &= (retry - 1);
1079         else
1080                 retry -= (retry >> 2);
1081         retry = min(retmax, retry);
1082
1083         *pscheduled = now;
1084         *pwhen = when;
1085         *pretry = retry;
1086 }
1087
1088 /*
1089  * should_retry_dns is a helper used by getaddrinfo_sometime_complete
1090  * and getnameinfo_sometime_complete which implements ntpd's DNS retry
1091  * policy.
1092  */
1093 static int
1094 should_retry_dns(
1095         int     rescode,
1096         int     res_errno
1097         )
1098 {
1099         static int      eai_again_seen;
1100         int             again;
1101 #if defined (EAI_SYSTEM) && defined(DEBUG)
1102         char            msg[256];
1103 #endif
1104
1105         /*
1106          * If the resolver failed, see if the failure is
1107          * temporary. If so, return success.
1108          */
1109         again = 0;
1110
1111         switch (rescode) {
1112
1113         case EAI_FAIL:
1114                 again = 1;
1115                 break;
1116
1117         case EAI_AGAIN:
1118                 again = 1;
1119                 eai_again_seen = 1;             /* [Bug 1178] */
1120                 break;
1121
1122         case EAI_NONAME:
1123 #if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
1124         case EAI_NODATA:
1125 #endif
1126                 again = !eai_again_seen;        /* [Bug 1178] */
1127                 break;
1128
1129 #ifdef EAI_SYSTEM
1130         case EAI_SYSTEM:
1131                 /* 
1132                  * EAI_SYSTEM means the real error is in errno.  We should be more
1133                  * discriminating about which errno values require retrying, but
1134                  * this matches existing behavior.
1135                  */
1136                 again = 1;
1137 # ifdef DEBUG
1138                 errno_to_str(res_errno, msg, sizeof(msg));
1139                 TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
1140                           res_errno, msg));
1141 # endif
1142                 break;
1143 #endif
1144         }
1145
1146         TRACE(2, ("intres: resolver returned: %s (%d), %sretrying\n",
1147                   gai_strerror(rescode), rescode, again ? "" : "not "));
1148
1149         return again;
1150 }
1151
1152 #else   /* !WORKER follows */
1153 int ntp_intres_nonempty_compilation_unit;
1154 #endif