]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/ntp/ntpd/ntp_io.c
Fix multiple denial of service in ntpd.
[FreeBSD/FreeBSD.git] / contrib / ntp / ntpd / ntp_io.c
1 /*
2  * ntp_io.c - input/output routines for ntpd.   The socket-opening code
3  *                 was shamelessly stolen from ntpd.
4  */
5
6 #ifdef HAVE_CONFIG_H
7 # include <config.h>
8 #endif
9
10 #include <stdio.h>
11 #include <signal.h>
12 #ifdef HAVE_FNMATCH_H
13 # include <fnmatch.h>
14 # if !defined(FNM_CASEFOLD) && defined(FNM_IGNORECASE)
15 #  define FNM_CASEFOLD FNM_IGNORECASE
16 # endif
17 #endif
18 #ifdef HAVE_SYS_PARAM_H
19 # include <sys/param.h>
20 #endif
21 #ifdef HAVE_SYS_IOCTL_H
22 # include <sys/ioctl.h>
23 #endif
24 #ifdef HAVE_SYS_SOCKIO_H        /* UXPV: SIOC* #defines (Frank Vance <fvance@waii.com>) */
25 # include <sys/sockio.h>
26 #endif
27 #ifdef HAVE_SYS_UIO_H
28 # include <sys/uio.h>
29 #endif
30
31 #include "ntp_machine.h"
32 #include "ntpd.h"
33 #include "ntp_io.h"
34 #include "iosignal.h"
35 #include "ntp_lists.h"
36 #include "ntp_refclock.h"
37 #include "ntp_stdlib.h"
38 #include "ntp_worker.h"
39 #include "ntp_request.h"
40 #include "ntp_assert.h"
41 #include "timevalops.h"
42 #include "timespecops.h"
43 #include "ntpd-opts.h"
44 #include "safecast.h"
45
46 /* Don't include ISC's version of IPv6 variables and structures */
47 #define ISC_IPV6_H 1
48 #include <isc/mem.h>
49 #include <isc/interfaceiter.h>
50 #include <isc/netaddr.h>
51 #include <isc/result.h>
52 #include <isc/sockaddr.h>
53
54 #ifdef SIM
55 #include "ntpsim.h"
56 #endif
57
58 #ifdef HAS_ROUTING_SOCKET
59 # include <net/route.h>
60 # ifdef HAVE_RTNETLINK
61 #  include <linux/rtnetlink.h>
62 # endif
63 #endif
64
65 /*
66  * setsockopt does not always have the same arg declaration
67  * across all platforms. If it's not defined we make it empty
68  */
69
70 #ifndef SETSOCKOPT_ARG_CAST
71 #define SETSOCKOPT_ARG_CAST
72 #endif
73
74 extern int listen_to_virtual_ips;
75
76 #ifndef IPTOS_DSCP_EF
77 #define IPTOS_DSCP_EF 0xb8
78 #endif
79 int qos = IPTOS_DSCP_EF;        /* QoS RFC3246 */
80
81 #ifdef LEAP_SMEAR
82 /* TODO burnicki: This should be moved to ntp_timer.c, but if we do so
83  * we get a linker error. Since we're running out of time before the leap
84  * second occurs, we let it here where it just works.
85  */
86 int leap_smear_intv;
87 #endif
88
89 /*
90  * NIC rule entry
91  */
92 typedef struct nic_rule_tag nic_rule;
93
94 struct nic_rule_tag {
95         nic_rule *      next;
96         nic_rule_action action;
97         nic_rule_match  match_type;
98         char *          if_name;
99         sockaddr_u      addr;
100         int             prefixlen;
101 };
102
103 /*
104  * NIC rule listhead.  Entries are added at the head so that the first
105  * match in the list is the last matching rule specified.
106  */
107 nic_rule *nic_rule_list;
108
109
110 #if defined(SO_BINTIME) && defined(SCM_BINTIME) && defined(CMSG_FIRSTHDR)
111 #  define HAVE_PACKET_TIMESTAMP
112 #  define HAVE_BINTIME
113 #  ifdef BINTIME_CTLMSGBUF_SIZE
114 #   define CMSG_BUFSIZE BINTIME_CTLMSGBUF_SIZE
115 #  else
116 #   define CMSG_BUFSIZE  1536 /* moderate default */
117 #  endif
118 #elif defined(SO_TIMESTAMPNS) && defined(SCM_TIMESTAMPNS) && defined(CMSG_FIRSTHDR)
119 #  define HAVE_PACKET_TIMESTAMP
120 #  define HAVE_TIMESTAMPNS
121 #  ifdef TIMESTAMPNS_CTLMSGBUF_SIZE
122 #   define CMSG_BUFSIZE TIMESTAMPNS_CTLMSGBUF_SIZE
123 #  else
124 #   define CMSG_BUFSIZE  1536 /* moderate default */
125 #  endif
126 #elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP) && defined(CMSG_FIRSTHDR)
127 #  define HAVE_PACKET_TIMESTAMP
128 #  define HAVE_TIMESTAMP
129 #  ifdef TIMESTAMP_CTLMSGBUF_SIZE
130 #   define CMSG_BUFSIZE TIMESTAMP_CTLMSGBUF_SIZE
131 #  else
132 #   define CMSG_BUFSIZE  1536 /* moderate default */
133 #  endif
134 #else
135 /* fill in for old/other timestamp interfaces */
136 #endif
137
138 #if defined(SYS_WINNT)
139 #include "win32_io.h"
140 #include <isc/win32os.h>
141 #endif
142
143 /*
144  * We do asynchronous input using the SIGIO facility.  A number of
145  * recvbuf buffers are preallocated for input.  In the signal
146  * handler we poll to see which sockets are ready and read the
147  * packets from them into the recvbuf's along with a time stamp and
148  * an indication of the source host and the interface it was received
149  * through.  This allows us to get as accurate receive time stamps
150  * as possible independent of other processing going on.
151  *
152  * We watch the number of recvbufs available to the signal handler
153  * and allocate more when this number drops below the low water
154  * mark.  If the signal handler should run out of buffers in the
155  * interim it will drop incoming frames, the idea being that it is
156  * better to drop a packet than to be inaccurate.
157  */
158
159
160 /*
161  * Other statistics of possible interest
162  */
163 volatile u_long packets_dropped;        /* total number of packets dropped on reception */
164 volatile u_long packets_ignored;        /* packets received on wild card interface */
165 volatile u_long packets_received;       /* total number of packets received */
166          u_long packets_sent;           /* total number of packets sent */
167          u_long packets_notsent;        /* total number of packets which couldn't be sent */
168
169 volatile u_long handler_calls;  /* number of calls to interrupt handler */
170 volatile u_long handler_pkts;   /* number of pkts received by handler */
171 u_long io_timereset;            /* time counters were reset */
172
173 /*
174  * Interface stuff
175  */
176 endpt * any_interface;          /* wildcard ipv4 interface */
177 endpt * any6_interface;         /* wildcard ipv6 interface */
178 endpt * loopback_interface;     /* loopback ipv4 interface */
179
180 static isc_boolean_t broadcast_client_enabled;  /* is broadcast client enabled */
181 u_int sys_ifnum;                        /* next .ifnum to assign */
182 int ninterfaces;                        /* Total number of interfaces */
183
184 int disable_dynamic_updates;            /* scan interfaces once only */
185
186 #ifdef REFCLOCK
187 /*
188  * Refclock stuff.      We keep a chain of structures with data concerning
189  * the guys we are doing I/O for.
190  */
191 static  struct refclockio *refio;
192 #endif /* REFCLOCK */
193
194 /*
195  * File descriptor masks etc. for call to select
196  * Not needed for I/O Completion Ports or anything outside this file
197  */
198 static fd_set activefds;
199 static int maxactivefd;
200
201 /*
202  * bit alternating value to detect verified interfaces during an update cycle
203  */
204 static  u_short         sys_interphase = 0;
205
206 static endpt *  new_interface(endpt *);
207 static void     add_interface(endpt *);
208 static int      update_interfaces(u_short, interface_receiver_t,
209                                   void *);
210 static void     remove_interface(endpt *);
211 static endpt *  create_interface(u_short, endpt *);
212
213 static int      is_wildcard_addr        (const sockaddr_u *);
214
215 /*
216  * Multicast functions
217  */
218 static  isc_boolean_t   addr_ismulticast        (sockaddr_u *);
219 static  isc_boolean_t   is_anycast              (sockaddr_u *,
220                                                  const char *);
221
222 /*
223  * Not all platforms support multicast
224  */
225 #ifdef MCAST
226 static  isc_boolean_t   socket_multicast_enable (endpt *, sockaddr_u *);
227 static  isc_boolean_t   socket_multicast_disable(endpt *, sockaddr_u *);
228 #endif
229
230 #ifdef DEBUG
231 static void interface_dump      (const endpt *);
232 static void sockaddr_dump       (const sockaddr_u *);
233 static void print_interface     (const endpt *, const char *, const char *);
234 #define DPRINT_INTERFACE(level, args) do { if (debug >= (level)) { print_interface args; } } while (0)
235 #else
236 #define DPRINT_INTERFACE(level, args) do {} while (0)
237 #endif
238
239 typedef struct vsock vsock_t;
240 enum desc_type { FD_TYPE_SOCKET, FD_TYPE_FILE };
241
242 struct vsock {
243         vsock_t *       link;
244         SOCKET          fd;
245         enum desc_type  type;
246 };
247
248 vsock_t *fd_list;
249
250 #if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
251 /*
252  * async notification processing (e. g. routing sockets)
253  */
254 /*
255  * support for receiving data on fd that is not a refclock or a socket
256  * like e. g. routing sockets
257  */
258 struct asyncio_reader {
259         struct asyncio_reader *link;                /* the list this is being kept in */
260         SOCKET fd;                                  /* fd to be read */
261         void  *data;                                /* possibly local data */
262         void (*receiver)(struct asyncio_reader *);  /* input handler */
263 };
264
265 struct asyncio_reader *asyncio_reader_list;
266
267 static void delete_asyncio_reader (struct asyncio_reader *);
268 static struct asyncio_reader *new_asyncio_reader (void);
269 static void add_asyncio_reader (struct asyncio_reader *, enum desc_type);
270 static void remove_asyncio_reader (struct asyncio_reader *);
271
272 #endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
273
274 static void init_async_notifications (void);
275
276 static  int     addr_eqprefix   (const sockaddr_u *, const sockaddr_u *,
277                                  int);
278 static int      addr_samesubnet (const sockaddr_u *, const sockaddr_u *,
279                                  const sockaddr_u *, const sockaddr_u *);
280 static  int     create_sockets  (u_short);
281 static  SOCKET  open_socket     (sockaddr_u *, int, int, endpt *);
282 static  void    set_reuseaddr   (int);
283 static  isc_boolean_t   socket_broadcast_enable  (struct interface *, SOCKET, sockaddr_u *);
284
285 #if !defined(HAVE_IO_COMPLETION_PORT) && !defined(HAVE_SIGNALED_IO)
286 static  char *  fdbits          (int, const fd_set *);
287 #endif
288 #ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
289 static  isc_boolean_t   socket_broadcast_disable (struct interface *, sockaddr_u *);
290 #endif
291
292 typedef struct remaddr remaddr_t;
293
294 struct remaddr {
295         remaddr_t *             link;
296         sockaddr_u              addr;
297         endpt *                 ep;
298 };
299
300 remaddr_t *     remoteaddr_list;
301 endpt *         ep_list;        /* complete endpt list */
302 endpt *         mc4_list;       /* IPv4 mcast-capable unicast endpts */
303 endpt *         mc6_list;       /* IPv6 mcast-capable unicast endpts */
304
305 static endpt *  wildipv4;
306 static endpt *  wildipv6;
307
308 #ifdef SYS_WINNT
309 int accept_wildcard_if_for_winnt;
310 #else
311 const int accept_wildcard_if_for_winnt = FALSE;
312 #endif
313
314 static void     add_fd_to_list          (SOCKET, enum desc_type);
315 static endpt *  find_addr_in_list       (sockaddr_u *);
316 static endpt *  find_flagged_addr_in_list(sockaddr_u *, u_int32);
317 static void     delete_addr_from_list   (sockaddr_u *);
318 static void     delete_interface_from_list(endpt *);
319 static void     close_and_delete_fd_from_list(SOCKET);
320 static void     add_addr_to_list        (sockaddr_u *, endpt *);
321 static void     create_wildcards        (u_short);
322 static endpt *  findlocalinterface      (sockaddr_u *, int, int);
323 static endpt *  findclosestinterface    (sockaddr_u *, int);
324 #ifdef DEBUG
325 static const char *     action_text     (nic_rule_action);
326 #endif
327 static nic_rule_action  interface_action(char *, sockaddr_u *, u_int32);
328 static void             convert_isc_if  (isc_interface_t *,
329                                          endpt *, u_short);
330 static void             calc_addr_distance(sockaddr_u *,
331                                            const sockaddr_u *,
332                                            const sockaddr_u *);
333 static int              cmp_addr_distance(const sockaddr_u *,
334                                           const sockaddr_u *);
335
336 /*
337  * Routines to read the ntp packets
338  */
339 #if !defined(HAVE_IO_COMPLETION_PORT)
340 static inline int       read_network_packet     (SOCKET, struct interface *, l_fp);
341 static void             ntpd_addremove_io_fd    (int, int, int);
342 static void             input_handler_scan      (const l_fp*, const fd_set*);
343 static int/*BOOL*/      sanitize_fdset          (int errc);
344 #ifdef REFCLOCK
345 static inline int       read_refclock_packet    (SOCKET, struct refclockio *, l_fp);
346 #endif
347 #ifdef HAVE_SIGNALED_IO
348 static void             input_handler           (l_fp*);
349 #endif
350 #endif
351
352
353 #ifndef HAVE_IO_COMPLETION_PORT
354 void
355 maintain_activefds(
356         int fd,
357         int closing
358         )
359 {
360         int i;
361
362         if (fd < 0 || fd >= FD_SETSIZE) {
363                 msyslog(LOG_ERR,
364                         "Too many sockets in use, FD_SETSIZE %d exceeded by fd %d",
365                         FD_SETSIZE, fd);
366                 exit(1);
367         }
368
369         if (!closing) {
370                 FD_SET(fd, &activefds);
371                 maxactivefd = max(fd, maxactivefd);
372         } else {
373                 FD_CLR(fd, &activefds);
374                 if (maxactivefd && fd == maxactivefd) {
375                         for (i = maxactivefd - 1; i >= 0; i--)
376                                 if (FD_ISSET(i, &activefds)) {
377                                         maxactivefd = i;
378                                         break;
379                                 }
380                         INSIST(fd != maxactivefd);
381                 }
382         }
383 }
384 #endif  /* !HAVE_IO_COMPLETION_PORT */
385
386
387 #ifdef DEBUG_TIMING
388 /*
389  * collect timing information for various processing
390  * paths. currently we only pass them on to the file
391  * for later processing. this could also do histogram
392  * based analysis in other to reduce the load (and skew)
393  * dur to the file output
394  */
395 void
396 collect_timing(struct recvbuf *rb, const char *tag, int count, l_fp *dts)
397 {
398         char buf[256];
399
400         snprintf(buf, sizeof(buf), "%s %d %s %s",
401                  (rb != NULL)
402                      ? ((rb->dstadr != NULL)
403                             ? stoa(&rb->recv_srcadr)
404                             : "-REFCLOCK-")
405                      : "-",
406                  count, lfptoa(dts, 9), tag);
407         record_timing_stats(buf);
408 }
409 #endif
410
411 /*
412  * About dynamic interfaces, sockets, reception and more...
413  *
414  * the code solves following tasks:
415  *
416  *   - keep a current list of active interfaces in order
417  *     to bind to to the interface address on NTP_PORT so that
418  *     all wild and specific bindings for NTP_PORT are taken by ntpd
419  *     to avoid other daemons messing with the time or sockets.
420  *   - all interfaces keep a list of peers that are referencing
421  *     the interface in order to quickly re-assign the peers to
422  *     new interface in case an interface is deleted (=> gone from system or
423  *     down)
424  *   - have a preconfigured socket ready with the right local address
425  *     for transmission and reception
426  *   - have an address list for all destination addresses used within ntpd
427  *     to find the "right" preconfigured socket.
428  *   - facilitate updating the internal interface list with respect to
429  *     the current kernel state
430  *
431  * special issues:
432  *
433  *   - mapping of multicast addresses to the interface affected is not always
434  *     one to one - especially on hosts with multiple interfaces
435  *     the code here currently allocates a separate interface entry for those
436  *     multicast addresses
437  *     iff it is able to bind to a *new* socket with the multicast address (flags |= MCASTIF)
438  *     in case of failure the multicast address is bound to an existing interface.
439  *   - on some systems it is perfectly legal to assign the same address to
440  *     multiple interfaces. Therefore this code does not keep a list of interfaces
441  *     but a list of interfaces that represent a unique address as determined by the kernel
442  *     by the procedure in findlocalinterface. Thus it is perfectly legal to see only
443  *     one representative of a group of real interfaces if they share the same address.
444  *
445  * Frank Kardel 20050910
446  */
447
448 /*
449  * init_io - initialize I/O module.
450  */
451 void
452 init_io(void)
453 {
454         /* Init buffer free list and stat counters */
455         init_recvbuff(RECV_INIT);
456         /* update interface every 5 minutes as default */
457         interface_interval = 300;
458
459 #ifdef WORK_PIPE
460         addremove_io_fd = &ntpd_addremove_io_fd;
461 #endif
462
463 #if defined(SYS_WINNT)
464         init_io_completion_port();
465 #elif defined(HAVE_SIGNALED_IO)
466         (void) set_signal(input_handler);
467 #endif
468 }
469
470
471 static void
472 ntpd_addremove_io_fd(
473         int     fd,
474         int     is_pipe,
475         int     remove_it
476         )
477 {
478         UNUSED_ARG(is_pipe);
479
480 #ifdef HAVE_SIGNALED_IO
481         if (!remove_it)
482                 init_socket_sig(fd);
483 #endif /* not HAVE_SIGNALED_IO */
484
485         maintain_activefds(fd, remove_it);
486 }
487
488
489 /*
490  * io_open_sockets - call socket creation routine
491  */
492 void
493 io_open_sockets(void)
494 {
495         static int already_opened;
496
497         if (already_opened || HAVE_OPT( SAVECONFIGQUIT ))
498                 return;
499
500         already_opened = 1;
501
502         /*
503          * Create the sockets
504          */
505         BLOCKIO();
506         create_sockets(NTP_PORT);
507         UNBLOCKIO();
508
509         init_async_notifications();
510
511         DPRINTF(3, ("io_open_sockets: maxactivefd %d\n", maxactivefd));
512 }
513
514
515 #ifdef DEBUG
516 /*
517  * function to dump the contents of the interface structure
518  * for debugging use only.
519  * We face a dilemma here -- sockets are FDs under POSIX and
520  * actually HANDLES under Windows. So we use '%lld' as format
521  * and cast the value to 'long long'; this should not hurt
522  * with UNIX-like systems and does not truncate values on Win64.
523  */
524 void
525 interface_dump(const endpt *itf)
526 {
527         printf("Dumping interface: %p\n", itf);
528         printf("fd = %lld\n", (long long)itf->fd);
529         printf("bfd = %lld\n", (long long)itf->bfd);
530         printf("sin = %s,\n", stoa(&itf->sin));
531         sockaddr_dump(&itf->sin);
532         printf("bcast = %s,\n", stoa(&itf->bcast));
533         sockaddr_dump(&itf->bcast);
534         printf("mask = %s,\n", stoa(&itf->mask));
535         sockaddr_dump(&itf->mask);
536         printf("name = %s\n", itf->name);
537         printf("flags = 0x%08x\n", itf->flags);
538         printf("last_ttl = %d\n", itf->last_ttl);
539         printf("addr_refid = %08x\n", itf->addr_refid);
540         printf("num_mcast = %d\n", itf->num_mcast);
541         printf("received = %ld\n", itf->received);
542         printf("sent = %ld\n", itf->sent);
543         printf("notsent = %ld\n", itf->notsent);
544         printf("ifindex = %u\n", itf->ifindex);
545         printf("peercnt = %u\n", itf->peercnt);
546         printf("phase = %u\n", itf->phase);
547 }
548
549 /*
550  * sockaddr_dump - hex dump the start of a sockaddr_u
551  */
552 static void
553 sockaddr_dump(const sockaddr_u *psau)
554 {
555         /* Limit the size of the sockaddr_in6 hex dump */
556         const int maxsize = min(32, sizeof(psau->sa6));
557         const u_char *  cp;
558         int             i;
559
560         /* XXX: Should we limit maxsize based on psau->saX.sin_family? */
561         cp = (const void *)&psau->sa6;
562
563         for(i = 0; i < maxsize; i++) {
564                 printf("%02x", *cp++);
565                 if (!((i + 1) % 4))
566                         printf(" ");
567         }
568         printf("\n");
569 }
570
571 /*
572  * print_interface - helper to output debug information
573  */
574 static void
575 print_interface(const endpt *iface, const char *pfx, const char *sfx)
576 {
577         printf("%sinterface #%d: fd=%lld, bfd=%lld, name=%s, flags=0x%x, ifindex=%u, sin=%s",
578                pfx,
579                iface->ifnum,
580                (long long)iface->fd,
581                (long long)iface->bfd,
582                iface->name,
583                iface->flags,
584                iface->ifindex,
585                stoa(&iface->sin));
586         if (AF_INET == iface->family) {
587                 if (iface->flags & INT_BROADCAST)
588                         printf(", bcast=%s", stoa(&iface->bcast));
589                 printf(", mask=%s", stoa(&iface->mask));
590         }
591         printf(", %s:%s",
592                (iface->ignore_packets)
593                    ? "Disabled"
594                    : "Enabled",
595                sfx);
596         if (debug > 4)  /* in-depth debugging only */
597                 interface_dump(iface);
598 }
599 #endif
600
601 #if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
602 /*
603  * create an asyncio_reader structure
604  */
605 static struct asyncio_reader *
606 new_asyncio_reader(void)
607 {
608         struct asyncio_reader *reader;
609
610         reader = emalloc_zero(sizeof(*reader));
611         reader->fd = INVALID_SOCKET;
612
613         return reader;
614 }
615
616 /*
617  * delete a reader
618  */
619 static void
620 delete_asyncio_reader(
621         struct asyncio_reader *reader
622         )
623 {
624         free(reader);
625 }
626
627 /*
628  * add asynchio_reader
629  */
630 static void
631 add_asyncio_reader(
632         struct asyncio_reader * reader,
633         enum desc_type          type)
634 {
635         LINK_SLIST(asyncio_reader_list, reader, link);
636         add_fd_to_list(reader->fd, type);
637 }
638
639 /*
640  * remove asynchio_reader
641  */
642 static void
643 remove_asyncio_reader(
644         struct asyncio_reader *reader
645         )
646 {
647         struct asyncio_reader *unlinked;
648
649         UNLINK_SLIST(unlinked, asyncio_reader_list, reader, link,
650             struct asyncio_reader);
651
652         if (reader->fd != INVALID_SOCKET)
653                 close_and_delete_fd_from_list(reader->fd);
654
655         reader->fd = INVALID_SOCKET;
656 }
657 #endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
658
659
660 /* compare two sockaddr prefixes */
661 static int
662 addr_eqprefix(
663         const sockaddr_u *      a,
664         const sockaddr_u *      b,
665         int                     prefixlen
666         )
667 {
668         isc_netaddr_t           isc_a;
669         isc_netaddr_t           isc_b;
670         isc_sockaddr_t          isc_sa;
671
672         ZERO(isc_sa);
673         memcpy(&isc_sa.type, a, min(sizeof(isc_sa.type), sizeof(*a)));
674         isc_netaddr_fromsockaddr(&isc_a, &isc_sa);
675
676         ZERO(isc_sa);
677         memcpy(&isc_sa.type, b, min(sizeof(isc_sa.type), sizeof(*b)));
678         isc_netaddr_fromsockaddr(&isc_b, &isc_sa);
679
680         return (int)isc_netaddr_eqprefix(&isc_a, &isc_b,
681                                          (u_int)prefixlen);
682 }
683
684
685 static int
686 addr_samesubnet(
687         const sockaddr_u *      a,
688         const sockaddr_u *      a_mask,
689         const sockaddr_u *      b,
690         const sockaddr_u *      b_mask
691         )
692 {
693         const u_int32 * pa;
694         const u_int32 * pa_limit;
695         const u_int32 * pb;
696         const u_int32 * pm;
697         size_t          loops;
698
699         REQUIRE(AF(a) == AF(a_mask));
700         REQUIRE(AF(b) == AF(b_mask));
701         /*
702          * With address and mask families verified to match, comparing
703          * the masks also validates the address's families match.
704          */
705         if (!SOCK_EQ(a_mask, b_mask))
706                 return FALSE;
707
708         if (IS_IPV6(a)) {
709                 loops = sizeof(NSRCADR6(a)) / sizeof(*pa);
710                 pa = (const void *)&NSRCADR6(a);
711                 pb = (const void *)&NSRCADR6(b);
712                 pm = (const void *)&NSRCADR6(a_mask);
713         } else {
714                 loops = sizeof(NSRCADR(a)) / sizeof(*pa);
715                 pa = (const void *)&NSRCADR(a);
716                 pb = (const void *)&NSRCADR(b);
717                 pm = (const void *)&NSRCADR(a_mask);
718         }
719         for (pa_limit = pa + loops; pa < pa_limit; pa++, pb++, pm++)
720                 if ((*pa & *pm) != (*pb & *pm))
721                         return FALSE;
722
723         return TRUE;
724 }
725
726
727 /*
728  * interface list enumerator - visitor pattern
729  */
730 void
731 interface_enumerate(
732         interface_receiver_t    receiver,
733         void *                  data
734         )
735 {
736         interface_info_t ifi;
737
738         ifi.action = IFS_EXISTS;
739         for (ifi.ep = ep_list; ifi.ep != NULL; ifi.ep = ifi.ep->elink)
740                 (*receiver)(data, &ifi);
741 }
742
743 /*
744  * do standard initialization of interface structure
745  */
746 static void
747 init_interface(
748         endpt *ep
749         )
750 {
751         ZERO(*ep);
752         ep->fd = INVALID_SOCKET;
753         ep->bfd = INVALID_SOCKET;
754         ep->phase = sys_interphase;
755 }
756
757
758 /*
759  * create new interface structure initialize from
760  * template structure or via standard initialization
761  * function
762  */
763 static struct interface *
764 new_interface(
765         struct interface *interface
766         )
767 {
768         struct interface *      iface;
769
770         iface = emalloc(sizeof(*iface));
771
772         if (NULL == interface)
773                 init_interface(iface);
774         else                            /* use the template */
775                 memcpy(iface, interface, sizeof(*iface));
776
777         /* count every new instance of an interface in the system */
778         iface->ifnum = sys_ifnum++;
779         iface->starttime = current_time;
780
781 #   ifdef HAVE_IO_COMPLETION_PORT
782         if (!io_completion_port_add_interface(iface)) {
783                 msyslog(LOG_EMERG, "cannot register interface with IO engine -- will exit now");
784                 exit(1);
785         }
786 #   endif
787         return iface;
788 }
789
790
791 /*
792  * return interface storage into free memory pool
793  */
794 static void
795 delete_interface(
796         endpt *ep
797         )
798 {
799 #    ifdef HAVE_IO_COMPLETION_PORT
800         io_completion_port_remove_interface(ep);
801 #    endif
802         free(ep);
803 }
804
805
806 /*
807  * link interface into list of known interfaces
808  */
809 static void
810 add_interface(
811         endpt * ep
812         )
813 {
814         endpt **        pmclisthead;
815         endpt *         scan;
816         endpt *         scan_next;
817         endpt *         unlinked;
818         sockaddr_u *    addr;
819         int             ep_local;
820         int             scan_local;
821         int             same_subnet;
822         int             ep_univ_iid;    /* iface ID from MAC address */
823         int             scan_univ_iid;  /* see RFC 4291 */
824         int             ep_privacy;     /* random local iface ID */
825         int             scan_privacy;   /* see RFC 4941 */
826         int             rc;
827
828         /* Calculate the refid */
829         ep->addr_refid = addr2refid(&ep->sin);
830         /* link at tail so ntpdc -c ifstats index increases each row */
831         LINK_TAIL_SLIST(ep_list, ep, elink, endpt);
832         ninterfaces++;
833 #ifdef MCAST
834         /* the rest is for enabled multicast-capable addresses only */
835         if (ep->ignore_packets || !(INT_MULTICAST & ep->flags) ||
836             INT_LOOPBACK & ep->flags)
837                 return;
838 # ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
839         if (AF_INET6 == ep->family)
840                 return;
841 # endif
842         pmclisthead = (AF_INET == ep->family)
843                          ? &mc4_list
844                          : &mc6_list;
845
846         if (AF_INET6 == ep->family) {
847                 ep_local =
848                     IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&ep->sin)) ||
849                     IN6_IS_ADDR_SITELOCAL(PSOCK_ADDR6(&ep->sin));
850                 ep_univ_iid = IS_IID_UNIV(&ep->sin);
851                 ep_privacy = !!(INT_PRIVACY & ep->flags);
852         } else {
853                 ep_local = FALSE;
854                 ep_univ_iid = FALSE;
855                 ep_privacy = FALSE;
856         }
857         DPRINTF(4, ("add_interface mcast-capable %s%s%s%s\n",
858                     stoa(&ep->sin),
859                     (ep_local) ? " link/scope-local" : "",
860                     (ep_univ_iid) ? " univ-IID" : "",
861                     (ep_privacy) ? " privacy" : ""));
862         /*
863          * If we have multiple local addresses on the same network
864          * interface, and some are link- or site-local, do not multicast
865          * out from the link-/site-local addresses by default, to avoid
866          * duplicate manycastclient associations between v6 peers using
867          * link-local and global addresses.  link-local can still be
868          * chosen using "nic ignore myv6globalprefix::/64".
869          * Similarly, if we have multiple global addresses from the same
870          * prefix on the same network interface, multicast from one,
871          * preferring EUI-64, then static, then least RFC 4941 privacy
872          * addresses.
873          */
874         for (scan = *pmclisthead; scan != NULL; scan = scan_next) {
875                 scan_next = scan->mclink;
876                 if (ep->family != scan->family)
877                         continue;
878                 if (strcmp(ep->name, scan->name))
879                         continue;
880                 same_subnet = addr_samesubnet(&ep->sin, &ep->mask,
881                                               &scan->sin, &scan->mask);
882                 if (AF_INET6 == ep->family) {
883                         addr = &scan->sin;
884                         scan_local =
885                             IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(addr)) ||
886                             IN6_IS_ADDR_SITELOCAL(PSOCK_ADDR6(addr));
887                         scan_univ_iid = IS_IID_UNIV(addr);
888                         scan_privacy = !!(INT_PRIVACY & scan->flags);
889                 } else {
890                         scan_local = FALSE;
891                         scan_univ_iid = FALSE;
892                         scan_privacy = FALSE;
893                 }
894                 DPRINTF(4, ("add_interface mcast-capable scan %s%s%s%s\n",
895                             stoa(&scan->sin),
896                             (scan_local) ? " link/scope-local" : "",
897                             (scan_univ_iid) ? " univ-IID" : "",
898                             (scan_privacy) ? " privacy" : ""));
899                 if ((ep_local && !scan_local) || (same_subnet &&
900                     ((ep_privacy && !scan_privacy) ||
901                      (!ep_univ_iid && scan_univ_iid)))) {
902                         DPRINTF(4, ("did not add %s to %s of IPv6 multicast-capable list which already has %s\n",
903                                 stoa(&ep->sin),
904                                 (ep_local)
905                                     ? "tail"
906                                     : "head",
907                                 stoa(&scan->sin)));
908                         return;
909                 }
910                 if ((scan_local && !ep_local) || (same_subnet &&
911                     ((scan_privacy && !ep_privacy) ||
912                      (!scan_univ_iid && ep_univ_iid)))) {
913                         UNLINK_SLIST(unlinked, *pmclisthead,
914                                      scan, mclink, endpt);
915                         DPRINTF(4, ("%s %s from IPv6 multicast-capable list to add %s\n",
916                                 (unlinked != scan)
917                                     ? "Failed to remove"
918                                     : "removed",
919                                 stoa(&scan->sin), stoa(&ep->sin)));
920                 }
921         }
922         /*
923          * Add link/site local at the tail of the multicast-
924          * capable unicast interfaces list, so that ntpd will
925          * send from global addresses before link-/site-local
926          * ones.
927          */
928         if (ep_local)
929                 LINK_TAIL_SLIST(*pmclisthead, ep, mclink, endpt);
930         else
931                 LINK_SLIST(*pmclisthead, ep, mclink);
932         DPRINTF(4, ("added %s to %s of IPv%s multicast-capable unicast local address list\n",
933                 stoa(&ep->sin),
934                 (ep_local)
935                     ? "tail"
936                     : "head",
937                 (AF_INET == ep->family)
938                     ? "4"
939                     : "6"));
940
941         if (INVALID_SOCKET == ep->fd)
942                 return;
943
944         /*
945          * select the local address from which to send to multicast.
946          */
947         switch (AF(&ep->sin)) {
948
949         case AF_INET :
950                 rc = setsockopt(ep->fd, IPPROTO_IP,
951                                 IP_MULTICAST_IF,
952                                 (void *)&NSRCADR(&ep->sin),
953                                 sizeof(NSRCADR(&ep->sin)));
954                 if (rc)
955                         msyslog(LOG_ERR,
956                                 "setsockopt IP_MULTICAST_IF %s fails: %m",
957                                 stoa(&ep->sin));
958                 break;
959
960 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
961         case AF_INET6 :
962                 rc = setsockopt(ep->fd, IPPROTO_IPV6,
963                                  IPV6_MULTICAST_IF,
964                                  (void *)&ep->ifindex,
965                                  sizeof(ep->ifindex));
966                 /* do not complain if bound addr scope is ifindex */
967                 if (rc && ep->ifindex != SCOPE(&ep->sin))
968                         msyslog(LOG_ERR,
969                                 "setsockopt IPV6_MULTICAST_IF %u for %s fails: %m",
970                                 ep->ifindex, stoa(&ep->sin));
971                 break;
972 # endif
973         }
974 #endif  /* MCAST */
975 }
976
977
978 /*
979  * remove interface from known interface list and clean up
980  * associated resources
981  */
982 static void
983 remove_interface(
984         endpt * ep
985         )
986 {
987         endpt *         unlinked;
988         endpt **        pmclisthead;
989         sockaddr_u      resmask;
990
991         UNLINK_SLIST(unlinked, ep_list, ep, elink, endpt);
992         if (!ep->ignore_packets && INT_MULTICAST & ep->flags) {
993                 pmclisthead = (AF_INET == ep->family)
994                                  ? &mc4_list
995                                  : &mc6_list;
996                 UNLINK_SLIST(unlinked, *pmclisthead, ep, mclink, endpt);
997                 DPRINTF(4, ("%s %s IPv%s multicast-capable unicast local address list\n",
998                         stoa(&ep->sin),
999                         (unlinked != NULL)
1000                             ? "removed from"
1001                             : "not found on",
1002                         (AF_INET == ep->family)
1003                             ? "4"
1004                             : "6"));
1005         }
1006         delete_interface_from_list(ep);
1007
1008         if (ep->fd != INVALID_SOCKET) {
1009                 msyslog(LOG_INFO,
1010                         "Deleting interface #%d %s, %s#%d, interface stats: received=%ld, sent=%ld, dropped=%ld, active_time=%ld secs",
1011                         ep->ifnum,
1012                         ep->name,
1013                         stoa(&ep->sin),
1014                         SRCPORT(&ep->sin),
1015                         ep->received,
1016                         ep->sent,
1017                         ep->notsent,
1018                         current_time - ep->starttime);
1019 #           ifdef HAVE_IO_COMPLETION_PORT
1020                 io_completion_port_remove_socket(ep->fd, ep);
1021 #           endif
1022                 close_and_delete_fd_from_list(ep->fd);
1023                 ep->fd = INVALID_SOCKET;
1024         }
1025
1026         if (ep->bfd != INVALID_SOCKET) {
1027                 msyslog(LOG_INFO,
1028                         "stop listening for broadcasts to %s on interface #%d %s",
1029                         stoa(&ep->bcast), ep->ifnum, ep->name);
1030 #           ifdef HAVE_IO_COMPLETION_PORT
1031                 io_completion_port_remove_socket(ep->bfd, ep);
1032 #           endif
1033                 close_and_delete_fd_from_list(ep->bfd);
1034                 ep->bfd = INVALID_SOCKET;
1035         }
1036 #   ifdef HAVE_IO_COMPLETION_PORT
1037         io_completion_port_remove_interface(ep);
1038 #   endif
1039
1040         ninterfaces--;
1041         mon_clearinterface(ep);
1042
1043         /* remove restrict interface entry */
1044         SET_HOSTMASK(&resmask, AF(&ep->sin));
1045         hack_restrict(RESTRICT_REMOVEIF, &ep->sin, &resmask,
1046                       -3, RESM_NTPONLY | RESM_INTERFACE, RES_IGNORE, 0);
1047 }
1048
1049
1050 static void
1051 log_listen_address(
1052         endpt * ep
1053         )
1054 {
1055         msyslog(LOG_INFO, "%s on %d %s %s",
1056                 (ep->ignore_packets)
1057                     ? "Listen and drop"
1058                     : "Listen normally",
1059                 ep->ifnum,
1060                 ep->name,
1061                 sptoa(&ep->sin));
1062 }
1063
1064
1065 static void
1066 create_wildcards(
1067         u_short port
1068         )
1069 {
1070         int                     v4wild;
1071 #ifdef INCLUDE_IPV6_SUPPORT
1072         int                     v6wild;
1073 #endif
1074         sockaddr_u              wildaddr;
1075         nic_rule_action         action;
1076         struct interface *      wildif;
1077
1078         /*
1079          * silence "potentially uninitialized" warnings from VC9
1080          * failing to follow the logic.  Ideally action could remain
1081          * uninitialized, and the memset be the first statement under
1082          * the first if (v4wild).
1083          */
1084         action = ACTION_LISTEN;
1085         ZERO(wildaddr);
1086
1087 #ifdef INCLUDE_IPV6_SUPPORT
1088         /*
1089          * create pseudo-interface with wildcard IPv6 address
1090          */
1091         v6wild = ipv6_works;
1092         if (v6wild) {
1093                 /* set wildaddr to the v6 wildcard address :: */
1094                 ZERO(wildaddr);
1095                 AF(&wildaddr) = AF_INET6;
1096                 SET_ADDR6N(&wildaddr, in6addr_any);
1097                 SET_PORT(&wildaddr, port);
1098                 SET_SCOPE(&wildaddr, 0);
1099
1100                 /* check for interface/nic rules affecting the wildcard */
1101                 action = interface_action(NULL, &wildaddr, 0);
1102                 v6wild = (ACTION_IGNORE != action);
1103         }
1104         if (v6wild) {
1105                 wildif = new_interface(NULL);
1106
1107                 strlcpy(wildif->name, "v6wildcard", sizeof(wildif->name));
1108                 memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1109                 wildif->family = AF_INET6;
1110                 AF(&wildif->mask) = AF_INET6;
1111                 SET_ONESMASK(&wildif->mask);
1112
1113                 wildif->flags = INT_UP | INT_WILDCARD;
1114                 wildif->ignore_packets = (ACTION_DROP == action);
1115
1116                 wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1117
1118                 if (wildif->fd != INVALID_SOCKET) {
1119                         wildipv6 = wildif;
1120                         any6_interface = wildif;
1121                         add_addr_to_list(&wildif->sin, wildif);
1122                         add_interface(wildif);
1123                         log_listen_address(wildif);
1124                 } else {
1125                         msyslog(LOG_ERR,
1126                                 "unable to bind to wildcard address %s - another process may be running - EXITING",
1127                                 stoa(&wildif->sin));
1128                         exit(1);
1129                 }
1130                 DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1131         }
1132 #endif
1133
1134         /*
1135          * create pseudo-interface with wildcard IPv4 address
1136          */
1137         v4wild = ipv4_works;
1138         if (v4wild) {
1139                 /* set wildaddr to the v4 wildcard address 0.0.0.0 */
1140                 AF(&wildaddr) = AF_INET;
1141                 SET_ADDR4N(&wildaddr, INADDR_ANY);
1142                 SET_PORT(&wildaddr, port);
1143
1144                 /* check for interface/nic rules affecting the wildcard */
1145                 action = interface_action(NULL, &wildaddr, 0);
1146                 v4wild = (ACTION_IGNORE != action);
1147         }
1148         if (v4wild) {
1149                 wildif = new_interface(NULL);
1150
1151                 strlcpy(wildif->name, "v4wildcard", sizeof(wildif->name));
1152                 memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1153                 wildif->family = AF_INET;
1154                 AF(&wildif->mask) = AF_INET;
1155                 SET_ONESMASK(&wildif->mask);
1156
1157                 wildif->flags = INT_BROADCAST | INT_UP | INT_WILDCARD;
1158                 wildif->ignore_packets = (ACTION_DROP == action);
1159 #if defined(MCAST)
1160                 /*
1161                  * enable multicast reception on the broadcast socket
1162                  */
1163                 AF(&wildif->bcast) = AF_INET;
1164                 SET_ADDR4N(&wildif->bcast, INADDR_ANY);
1165                 SET_PORT(&wildif->bcast, port);
1166 #endif /* MCAST */
1167                 wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1168
1169                 if (wildif->fd != INVALID_SOCKET) {
1170                         wildipv4 = wildif;
1171                         any_interface = wildif;
1172
1173                         add_addr_to_list(&wildif->sin, wildif);
1174                         add_interface(wildif);
1175                         log_listen_address(wildif);
1176                 } else {
1177                         msyslog(LOG_ERR,
1178                                 "unable to bind to wildcard address %s - another process may be running - EXITING",
1179                                 stoa(&wildif->sin));
1180                         exit(1);
1181                 }
1182                 DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1183         }
1184 }
1185
1186
1187 /*
1188  * add_nic_rule() -- insert a rule entry at the head of nic_rule_list.
1189  */
1190 void
1191 add_nic_rule(
1192         nic_rule_match  match_type,
1193         const char *    if_name,        /* interface name or numeric address */
1194         int             prefixlen,
1195         nic_rule_action action
1196         )
1197 {
1198         nic_rule *      rule;
1199         isc_boolean_t   is_ip;
1200
1201         rule = emalloc_zero(sizeof(*rule));
1202         rule->match_type = match_type;
1203         rule->prefixlen = prefixlen;
1204         rule->action = action;
1205
1206         if (MATCH_IFNAME == match_type) {
1207                 REQUIRE(NULL != if_name);
1208                 rule->if_name = estrdup(if_name);
1209         } else if (MATCH_IFADDR == match_type) {
1210                 REQUIRE(NULL != if_name);
1211                 /* set rule->addr */
1212                 is_ip = is_ip_address(if_name, AF_UNSPEC, &rule->addr);
1213                 REQUIRE(is_ip);
1214         } else
1215                 REQUIRE(NULL == if_name);
1216
1217         LINK_SLIST(nic_rule_list, rule, next);
1218 }
1219
1220
1221 #ifdef DEBUG
1222 static const char *
1223 action_text(
1224         nic_rule_action action
1225         )
1226 {
1227         const char *t;
1228
1229         switch (action) {
1230
1231         default:
1232                 t = "ERROR";    /* quiet uninit warning */
1233                 DPRINTF(1, ("fatal: unknown nic_rule_action %d\n",
1234                             action));
1235                 ENSURE(0);
1236                 break;
1237
1238         case ACTION_LISTEN:
1239                 t = "listen";
1240                 break;
1241
1242         case ACTION_IGNORE:
1243                 t = "ignore";
1244                 break;
1245
1246         case ACTION_DROP:
1247                 t = "drop";
1248                 break;
1249         }
1250
1251         return t;
1252 }
1253 #endif  /* DEBUG */
1254
1255
1256 static nic_rule_action
1257 interface_action(
1258         char *          if_name,
1259         sockaddr_u *    if_addr,
1260         u_int32         if_flags
1261         )
1262 {
1263         nic_rule *      rule;
1264         int             isloopback;
1265         int             iswildcard;
1266
1267         DPRINTF(4, ("interface_action: interface %s ",
1268                     (if_name != NULL) ? if_name : "wildcard"));
1269
1270         iswildcard = is_wildcard_addr(if_addr);
1271         isloopback = !!(INT_LOOPBACK & if_flags);
1272
1273         /*
1274          * Find any matching NIC rule from --interface / -I or ntp.conf
1275          * interface/nic rules.
1276          */
1277         for (rule = nic_rule_list; rule != NULL; rule = rule->next) {
1278
1279                 switch (rule->match_type) {
1280
1281                 case MATCH_ALL:
1282                         /* loopback and wildcard excluded from "all" */
1283                         if (isloopback || iswildcard)
1284                                 break;
1285                         DPRINTF(4, ("nic all %s\n",
1286                             action_text(rule->action)));
1287                         return rule->action;
1288
1289                 case MATCH_IPV4:
1290                         if (IS_IPV4(if_addr)) {
1291                                 DPRINTF(4, ("nic ipv4 %s\n",
1292                                     action_text(rule->action)));
1293                                 return rule->action;
1294                         }
1295                         break;
1296
1297                 case MATCH_IPV6:
1298                         if (IS_IPV6(if_addr)) {
1299                                 DPRINTF(4, ("nic ipv6 %s\n",
1300                                     action_text(rule->action)));
1301                                 return rule->action;
1302                         }
1303                         break;
1304
1305                 case MATCH_WILDCARD:
1306                         if (iswildcard) {
1307                                 DPRINTF(4, ("nic wildcard %s\n",
1308                                     action_text(rule->action)));
1309                                 return rule->action;
1310                         }
1311                         break;
1312
1313                 case MATCH_IFADDR:
1314                         if (rule->prefixlen != -1) {
1315                                 if (addr_eqprefix(if_addr, &rule->addr,
1316                                                   rule->prefixlen)) {
1317
1318                                         DPRINTF(4, ("subnet address match - %s\n",
1319                                             action_text(rule->action)));
1320                                         return rule->action;
1321                                 }
1322                         } else
1323                                 if (SOCK_EQ(if_addr, &rule->addr)) {
1324
1325                                         DPRINTF(4, ("address match - %s\n",
1326                                             action_text(rule->action)));
1327                                         return rule->action;
1328                                 }
1329                         break;
1330
1331                 case MATCH_IFNAME:
1332                         if (if_name != NULL
1333 #if defined(HAVE_FNMATCH) && defined(FNM_CASEFOLD)
1334                             && !fnmatch(rule->if_name, if_name, FNM_CASEFOLD)
1335 #else
1336                             && !strcasecmp(if_name, rule->if_name)
1337 #endif
1338                             ) {
1339
1340                                 DPRINTF(4, ("interface name match - %s\n",
1341                                     action_text(rule->action)));
1342                                 return rule->action;
1343                         }
1344                         break;
1345                 }
1346         }
1347
1348         /*
1349          * Unless explicitly disabled such as with "nic ignore ::1"
1350          * listen on loopback addresses.  Since ntpq and ntpdc query
1351          * "localhost" by default, which typically resolves to ::1 and
1352          * 127.0.0.1, it's useful to default to listening on both.
1353          */
1354         if (isloopback) {
1355                 DPRINTF(4, ("default loopback listen\n"));
1356                 return ACTION_LISTEN;
1357         }
1358
1359         /*
1360          * Treat wildcard addresses specially.  If there is no explicit
1361          * "nic ... wildcard" or "nic ... 0.0.0.0" or "nic ... ::" rule
1362          * default to drop.
1363          */
1364         if (iswildcard) {
1365                 DPRINTF(4, ("default wildcard drop\n"));
1366                 return ACTION_DROP;
1367         }
1368
1369         /*
1370          * Check for "virtual IP" (colon in the interface name) after
1371          * the rules so that "ntpd --interface eth0:1 -novirtualips"
1372          * does indeed listen on eth0:1's addresses.
1373          */
1374         if (!listen_to_virtual_ips && if_name != NULL
1375             && (strchr(if_name, ':') != NULL)) {
1376
1377                 DPRINTF(4, ("virtual ip - ignore\n"));
1378                 return ACTION_IGNORE;
1379         }
1380
1381         /*
1382          * If there are no --interface/-I command-line options and no
1383          * interface/nic rules in ntp.conf, the default action is to
1384          * listen.  In the presence of rules from either, the default
1385          * is to ignore.  This implements ntpd's traditional listen-
1386          * every default with no interface listen configuration, and
1387          * ensures a single -I eth0 or "nic listen eth0" means do not
1388          * listen on any other addresses.
1389          */
1390         if (NULL == nic_rule_list) {
1391                 DPRINTF(4, ("default listen\n"));
1392                 return ACTION_LISTEN;
1393         }
1394
1395         DPRINTF(4, ("implicit ignore\n"));
1396         return ACTION_IGNORE;
1397 }
1398
1399
1400 static void
1401 convert_isc_if(
1402         isc_interface_t *isc_if,
1403         endpt *itf,
1404         u_short port
1405         )
1406 {
1407         const u_char v6loop[16] = {0, 0, 0, 0, 0, 0, 0, 0,
1408                                    0, 0, 0, 0, 0, 0, 0, 1};
1409
1410         strlcpy(itf->name, isc_if->name, sizeof(itf->name));
1411         itf->ifindex = isc_if->ifindex;
1412         itf->family = (u_short)isc_if->af;
1413         AF(&itf->sin) = itf->family;
1414         AF(&itf->mask) = itf->family;
1415         AF(&itf->bcast) = itf->family;
1416         SET_PORT(&itf->sin, port);
1417         SET_PORT(&itf->mask, port);
1418         SET_PORT(&itf->bcast, port);
1419
1420         if (IS_IPV4(&itf->sin)) {
1421                 NSRCADR(&itf->sin) = isc_if->address.type.in.s_addr;
1422                 NSRCADR(&itf->mask) = isc_if->netmask.type.in.s_addr;
1423
1424                 if (isc_if->flags & INTERFACE_F_BROADCAST) {
1425                         itf->flags |= INT_BROADCAST;
1426                         NSRCADR(&itf->bcast) =
1427                             isc_if->broadcast.type.in.s_addr;
1428                 }
1429         }
1430 #ifdef INCLUDE_IPV6_SUPPORT
1431         else if (IS_IPV6(&itf->sin)) {
1432                 SET_ADDR6N(&itf->sin, isc_if->address.type.in6);
1433                 SET_ADDR6N(&itf->mask, isc_if->netmask.type.in6);
1434
1435                 SET_SCOPE(&itf->sin, isc_if->address.zone);
1436         }
1437 #endif /* INCLUDE_IPV6_SUPPORT */
1438
1439
1440         /* Process the rest of the flags */
1441
1442         itf->flags |=
1443                   ((INTERFACE_F_UP & isc_if->flags)
1444                         ? INT_UP : 0)
1445                 | ((INTERFACE_F_LOOPBACK & isc_if->flags)
1446                         ? INT_LOOPBACK : 0)
1447                 | ((INTERFACE_F_POINTTOPOINT & isc_if->flags)
1448                         ? INT_PPP : 0)
1449                 | ((INTERFACE_F_MULTICAST & isc_if->flags)
1450                         ? INT_MULTICAST : 0)
1451                 | ((INTERFACE_F_PRIVACY & isc_if->flags)
1452                         ? INT_PRIVACY : 0)
1453                 ;
1454
1455         /*
1456          * Clear the loopback flag if the address is not localhost.
1457          * http://bugs.ntp.org/1683
1458          */
1459         if (INT_LOOPBACK & itf->flags) {
1460                 if (AF_INET == itf->family) {
1461                         if (127 != (SRCADR(&itf->sin) >> 24))
1462                                 itf->flags &= ~INT_LOOPBACK;
1463                 } else {
1464                         if (memcmp(v6loop, NSRCADR6(&itf->sin),
1465                                    sizeof(NSRCADR6(&itf->sin))))
1466                                 itf->flags &= ~INT_LOOPBACK;
1467                 }
1468         }
1469 }
1470
1471
1472 /*
1473  * refresh_interface
1474  *
1475  * some OSes have been observed to keep
1476  * cached routes even when more specific routes
1477  * become available.
1478  * this can be mitigated by re-binding
1479  * the socket.
1480  */
1481 static int
1482 refresh_interface(
1483         struct interface * interface
1484         )
1485 {
1486 #ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
1487         if (interface->fd != INVALID_SOCKET) {
1488                 int bcast = (interface->flags & INT_BCASTXMIT) != 0;
1489                 /* as we forcibly close() the socket remove the
1490                    broadcast permission indication */
1491                 if (bcast)
1492                         socket_broadcast_disable(interface, &interface->sin);
1493
1494                 close_and_delete_fd_from_list(interface->fd);
1495
1496                 /* create new socket picking up a new first hop binding
1497                    at connect() time */
1498                 interface->fd = open_socket(&interface->sin,
1499                                             bcast, 0, interface);
1500                  /*
1501                   * reset TTL indication so TTL is is set again
1502                   * next time around
1503                   */
1504                 interface->last_ttl = 0;
1505                 return (interface->fd != INVALID_SOCKET);
1506         } else
1507                 return 0;       /* invalid sockets are not refreshable */
1508 #else /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1509         return (interface->fd != INVALID_SOCKET);
1510 #endif /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1511 }
1512
1513 /*
1514  * interface_update - externally callable update function
1515  */
1516 void
1517 interface_update(
1518         interface_receiver_t    receiver,
1519         void *                  data)
1520 {
1521         int new_interface_found;
1522
1523         if (disable_dynamic_updates)
1524                 return;
1525
1526         BLOCKIO();
1527         new_interface_found = update_interfaces(NTP_PORT, receiver, data);
1528         UNBLOCKIO();
1529
1530         if (!new_interface_found)
1531                 return;
1532
1533 #ifdef DEBUG
1534         msyslog(LOG_DEBUG, "new interface(s) found: waking up resolver");
1535 #endif
1536         interrupt_worker_sleep();
1537 }
1538
1539
1540 /*
1541  * sau_from_netaddr() - convert network address on-wire formats.
1542  * Convert from libisc's isc_netaddr_t to NTP's sockaddr_u
1543  */
1544 void
1545 sau_from_netaddr(
1546         sockaddr_u *psau,
1547         const isc_netaddr_t *pna
1548         )
1549 {
1550         ZERO_SOCK(psau);
1551         AF(psau) = (u_short)pna->family;
1552         switch (pna->family) {
1553
1554         case AF_INET:
1555                 memcpy(&psau->sa4.sin_addr, &pna->type.in,
1556                        sizeof(psau->sa4.sin_addr));
1557                 break;
1558
1559         case AF_INET6:
1560                 memcpy(&psau->sa6.sin6_addr, &pna->type.in6,
1561                        sizeof(psau->sa6.sin6_addr));
1562                 break;
1563         }
1564 }
1565
1566
1567 static int
1568 is_wildcard_addr(
1569         const sockaddr_u *psau
1570         )
1571 {
1572         if (IS_IPV4(psau) && !NSRCADR(psau))
1573                 return 1;
1574
1575 #ifdef INCLUDE_IPV6_SUPPORT
1576         if (IS_IPV6(psau) && S_ADDR6_EQ(psau, &in6addr_any))
1577                 return 1;
1578 #endif
1579
1580         return 0;
1581 }
1582
1583
1584 #ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
1585 /*
1586  * enable/disable re-use of wildcard address socket
1587  */
1588 static void
1589 set_wildcard_reuse(
1590         u_short family,
1591         int     on
1592         )
1593 {
1594         struct interface *any;
1595         SOCKET fd = INVALID_SOCKET;
1596
1597         any = ANY_INTERFACE_BYFAM(family);
1598         if (any != NULL)
1599                 fd = any->fd;
1600
1601         if (fd != INVALID_SOCKET) {
1602                 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1603                                (void *)&on, sizeof(on)))
1604                         msyslog(LOG_ERR,
1605                                 "set_wildcard_reuse: setsockopt(SO_REUSEADDR, %s) failed: %m",
1606                                 on ? "on" : "off");
1607
1608                 DPRINTF(4, ("set SO_REUSEADDR to %s on %s\n",
1609                             on ? "on" : "off",
1610                             stoa(&any->sin)));
1611         }
1612 }
1613 #endif /* OS_NEEDS_REUSEADDR_FOR_IFADDRBIND */
1614
1615 static isc_boolean_t
1616 check_flags(
1617         sockaddr_u *psau,
1618         const char *name,
1619         u_int32 flags
1620         )
1621 {
1622 #if defined(SIOCGIFAFLAG_IN)
1623         struct ifreq ifr;
1624         int fd;
1625
1626         if (psau->sa.sa_family != AF_INET)
1627                 return ISC_FALSE;
1628         if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1629                 return ISC_FALSE;
1630         ZERO(ifr);
1631         memcpy(&ifr.ifr_addr, &psau->sa, sizeof(ifr.ifr_addr));
1632         strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
1633         if (ioctl(fd, SIOCGIFAFLAG_IN, &ifr) < 0) {
1634                 close(fd);
1635                 return ISC_FALSE;
1636         }
1637         close(fd);
1638         if ((ifr.ifr_addrflags & flags) != 0)
1639                 return ISC_TRUE;
1640 #endif  /* SIOCGIFAFLAG_IN */
1641         return ISC_FALSE;
1642 }
1643
1644 static isc_boolean_t
1645 check_flags6(
1646         sockaddr_u *psau,
1647         const char *name,
1648         u_int32 flags6
1649         )
1650 {
1651 #if defined(INCLUDE_IPV6_SUPPORT) && defined(SIOCGIFAFLAG_IN6)
1652         struct in6_ifreq ifr6;
1653         int fd;
1654
1655         if (psau->sa.sa_family != AF_INET6)
1656                 return ISC_FALSE;
1657         if ((fd = socket(AF_INET6, SOCK_DGRAM, 0)) < 0)
1658                 return ISC_FALSE;
1659         ZERO(ifr6);
1660         memcpy(&ifr6.ifr_addr, &psau->sa6, sizeof(ifr6.ifr_addr));
1661         strlcpy(ifr6.ifr_name, name, sizeof(ifr6.ifr_name));
1662         if (ioctl(fd, SIOCGIFAFLAG_IN6, &ifr6) < 0) {
1663                 close(fd);
1664                 return ISC_FALSE;
1665         }
1666         close(fd);
1667         if ((ifr6.ifr_ifru.ifru_flags6 & flags6) != 0)
1668                 return ISC_TRUE;
1669 #endif  /* INCLUDE_IPV6_SUPPORT && SIOCGIFAFLAG_IN6 */
1670         return ISC_FALSE;
1671 }
1672
1673 static isc_boolean_t
1674 is_anycast(
1675         sockaddr_u *psau,
1676         const char *name
1677         )
1678 {
1679 #ifdef IN6_IFF_ANYCAST
1680         return check_flags6(psau, name, IN6_IFF_ANYCAST);
1681 #else
1682         return ISC_FALSE;
1683 #endif
1684 }
1685
1686 static isc_boolean_t
1687 is_valid(
1688         sockaddr_u *psau,
1689         const char *name
1690         )
1691 {
1692         u_int32 flags;
1693
1694         flags = 0;
1695         switch (psau->sa.sa_family) {
1696         case AF_INET:
1697 #ifdef IN_IFF_DETACHED
1698                 flags |= IN_IFF_DETACHED;
1699 #endif
1700 #ifdef IN_IFF_TENTATIVE
1701                 flags |= IN_IFF_TENTATIVE;
1702 #endif
1703                 return check_flags(psau, name, flags) ? ISC_FALSE : ISC_TRUE;
1704         case AF_INET6:
1705 #ifdef IN6_IFF_DEPARTED
1706                 flags |= IN6_IFF_DEPARTED;
1707 #endif
1708 #ifdef IN6_IFF_DETACHED
1709                 flags |= IN6_IFF_DETACHED;
1710 #endif
1711 #ifdef IN6_IFF_TENTATIVE
1712                 flags |= IN6_IFF_TENTATIVE;
1713 #endif
1714                 return check_flags6(psau, name, flags) ? ISC_FALSE : ISC_TRUE;
1715         default:
1716                 return ISC_FALSE;
1717         }
1718 }
1719
1720 /*
1721  * update_interface strategy
1722  *
1723  * toggle configuration phase
1724  *
1725  * Phase 1:
1726  * forall currently existing interfaces
1727  *   if address is known:
1728  *      drop socket - rebind again
1729  *
1730  *   if address is NOT known:
1731  *      attempt to create a new interface entry
1732  *
1733  * Phase 2:
1734  * forall currently known non MCAST and WILDCARD interfaces
1735  *   if interface does not match configuration phase (not seen in phase 1):
1736  *      remove interface from known interface list
1737  *      forall peers associated with this interface
1738  *         disconnect peer from this interface
1739  *
1740  * Phase 3:
1741  *   attempt to re-assign interfaces to peers
1742  *
1743  */
1744
1745 static int
1746 update_interfaces(
1747         u_short                 port,
1748         interface_receiver_t    receiver,
1749         void *                  data
1750         )
1751 {
1752         isc_mem_t *             mctx = (void *)-1;
1753         interface_info_t        ifi;
1754         isc_interfaceiter_t *   iter;
1755         isc_result_t            result;
1756         isc_interface_t         isc_if;
1757         int                     new_interface_found;
1758         unsigned int            family;
1759         endpt                   enumep;
1760         endpt *                 ep;
1761         endpt *                 next_ep;
1762
1763         DPRINTF(3, ("update_interfaces(%d)\n", port));
1764
1765         /*
1766          * phase one - scan interfaces
1767          * - create those that are not found
1768          * - update those that are found
1769          */
1770
1771         new_interface_found = FALSE;
1772         iter = NULL;
1773         result = isc_interfaceiter_create(mctx, &iter);
1774
1775         if (result != ISC_R_SUCCESS)
1776                 return 0;
1777
1778         /*
1779          * Toggle system interface scan phase to find untouched
1780          * interfaces to be deleted.
1781          */
1782         sys_interphase ^= 0x1;
1783
1784         for (result = isc_interfaceiter_first(iter);
1785              ISC_R_SUCCESS == result;
1786              result = isc_interfaceiter_next(iter)) {
1787
1788                 result = isc_interfaceiter_current(iter, &isc_if);
1789
1790                 if (result != ISC_R_SUCCESS)
1791                         break;
1792
1793                 /* See if we have a valid family to use */
1794                 family = isc_if.address.family;
1795                 if (AF_INET != family && AF_INET6 != family)
1796                         continue;
1797                 if (AF_INET == family && !ipv4_works)
1798                         continue;
1799                 if (AF_INET6 == family && !ipv6_works)
1800                         continue;
1801
1802                 /* create prototype */
1803                 init_interface(&enumep);
1804
1805                 convert_isc_if(&isc_if, &enumep, port);
1806
1807                 DPRINT_INTERFACE(4, (&enumep, "examining ", "\n"));
1808
1809                 /*
1810                  * Check if and how we are going to use the interface.
1811                  */
1812                 switch (interface_action(enumep.name, &enumep.sin,
1813                                          enumep.flags)) {
1814
1815                 case ACTION_IGNORE:
1816                         DPRINTF(4, ("ignoring interface %s (%s) - by nic rules\n",
1817                                     enumep.name, stoa(&enumep.sin)));
1818                         continue;
1819
1820                 case ACTION_LISTEN:
1821                         DPRINTF(4, ("listen interface %s (%s) - by nic rules\n",
1822                                     enumep.name, stoa(&enumep.sin)));
1823                         enumep.ignore_packets = ISC_FALSE;
1824                         break;
1825
1826                 case ACTION_DROP:
1827                         DPRINTF(4, ("drop on interface %s (%s) - by nic rules\n",
1828                                     enumep.name, stoa(&enumep.sin)));
1829                         enumep.ignore_packets = ISC_TRUE;
1830                         break;
1831                 }
1832
1833                  /* interfaces must be UP to be usable */
1834                 if (!(enumep.flags & INT_UP)) {
1835                         DPRINTF(4, ("skipping interface %s (%s) - DOWN\n",
1836                                     enumep.name, stoa(&enumep.sin)));
1837                         continue;
1838                 }
1839
1840                 /*
1841                  * skip any interfaces UP and bound to a wildcard
1842                  * address - some dhcp clients produce that in the
1843                  * wild
1844                  */
1845                 if (is_wildcard_addr(&enumep.sin))
1846                         continue;
1847
1848                 if (is_anycast(&enumep.sin, isc_if.name))
1849                         continue;
1850
1851                 /*
1852                  * skip any address that is an invalid state to be used
1853                  */
1854                 if (!is_valid(&enumep.sin, isc_if.name))
1855                         continue;
1856
1857                 /*
1858                  * map to local *address* in order to map all duplicate
1859                  * interfaces to an endpt structure with the appropriate
1860                  * socket.  Our name space is (ip-address), NOT
1861                  * (interface name, ip-address).
1862                  */
1863                 ep = getinterface(&enumep.sin, INT_WILDCARD);
1864
1865                 if (ep != NULL && refresh_interface(ep)) {
1866                         /*
1867                          * found existing and up to date interface -
1868                          * mark present.
1869                          */
1870                         if (ep->phase != sys_interphase) {
1871                                 /*
1872                                  * On a new round we reset the name so
1873                                  * the interface name shows up again if
1874                                  * this address is no longer shared.
1875                                  * We reset ignore_packets from the
1876                                  * new prototype to respect any runtime
1877                                  * changes to the nic rules.
1878                                  */
1879                                 strlcpy(ep->name, enumep.name,
1880                                         sizeof(ep->name));
1881                                 ep->ignore_packets =
1882                                             enumep.ignore_packets;
1883                         } else {
1884                                 /* name collision - rename interface */
1885                                 strlcpy(ep->name, "*multiple*",
1886                                         sizeof(ep->name));
1887                         }
1888
1889                         DPRINT_INTERFACE(4, (ep, "updating ",
1890                                              " present\n"));
1891
1892                         if (ep->ignore_packets !=
1893                             enumep.ignore_packets) {
1894                                 /*
1895                                  * We have conflicting configurations
1896                                  * for the interface address. This is
1897                                  * caused by using -I <interfacename>
1898                                  * for an interface that shares its
1899                                  * address with other interfaces. We
1900                                  * can not disambiguate incoming
1901                                  * packets delivered to this socket
1902                                  * without extra syscalls/features.
1903                                  * These are not (commonly) available.
1904                                  * Note this is a more unusual
1905                                  * configuration where several
1906                                  * interfaces share an address but
1907                                  * filtering via interface name is
1908                                  * attempted.  We resolve the
1909                                  * configuration conflict by disabling
1910                                  * the processing of received packets.
1911                                  * This leads to no service on the
1912                                  * interface address where the conflict
1913                                  * occurs.
1914                                  */
1915                                 msyslog(LOG_ERR,
1916                                         "WARNING: conflicting enable configuration for interfaces %s and %s for address %s - unsupported configuration - address DISABLED",
1917                                         enumep.name, ep->name,
1918                                         stoa(&enumep.sin));
1919
1920                                 ep->ignore_packets = ISC_TRUE;
1921                         }
1922
1923                         ep->phase = sys_interphase;
1924
1925                         ifi.action = IFS_EXISTS;
1926                         ifi.ep = ep;
1927                         if (receiver != NULL)
1928                                 (*receiver)(data, &ifi);
1929                 } else {
1930                         /*
1931                          * This is new or refreshing failed - add to
1932                          * our interface list.  If refreshing failed we
1933                          * will delete the interface structure in phase
1934                          * 2 as the interface was not marked current.
1935                          * We can bind to the address as the refresh
1936                          * code already closed the offending socket
1937                          */
1938                         ep = create_interface(port, &enumep);
1939
1940                         if (ep != NULL) {
1941                                 ifi.action = IFS_CREATED;
1942                                 ifi.ep = ep;
1943                                 if (receiver != NULL)
1944                                         (*receiver)(data, &ifi);
1945
1946                                 new_interface_found = TRUE;
1947                                 DPRINT_INTERFACE(3,
1948                                         (ep, "updating ",
1949                                          " new - created\n"));
1950                         } else {
1951                                 DPRINT_INTERFACE(3,
1952                                         (&enumep, "updating ",
1953                                          " new - creation FAILED"));
1954
1955                                 msyslog(LOG_INFO,
1956                                         "failed to init interface for address %s",
1957                                         stoa(&enumep.sin));
1958                                 continue;
1959                         }
1960                 }
1961         }
1962
1963         isc_interfaceiter_destroy(&iter);
1964
1965         /*
1966          * phase 2 - delete gone interfaces - reassigning peers to
1967          * other interfaces
1968          */
1969         for (ep = ep_list; ep != NULL; ep = next_ep) {
1970                 next_ep = ep->elink;
1971
1972                 /*
1973                  * if phase does not match sys_phase this interface was
1974                  * not enumerated during the last interface scan - so it
1975                  * is gone and will be deleted here unless it did not
1976                  * originate from interface enumeration (INT_WILDCARD,
1977                  * INT_MCASTIF).
1978                  */
1979                 if (((INT_WILDCARD | INT_MCASTIF) & ep->flags) ||
1980                     ep->phase == sys_interphase)
1981                         continue;
1982
1983                 DPRINT_INTERFACE(3, (ep, "updating ",
1984                                      "GONE - deleting\n"));
1985                 remove_interface(ep);
1986
1987                 ifi.action = IFS_DELETED;
1988                 ifi.ep = ep;
1989                 if (receiver != NULL)
1990                         (*receiver)(data, &ifi);
1991
1992                 /* disconnect peers from deleted endpt. */
1993                 while (ep->peers != NULL)
1994                         set_peerdstadr(ep->peers, NULL);
1995
1996                 /*
1997                  * update globals in case we lose
1998                  * a loopback interface
1999                  */
2000                 if (ep == loopback_interface)
2001                         loopback_interface = NULL;
2002
2003                 delete_interface(ep);
2004         }
2005
2006         /*
2007          * phase 3 - re-configure as the world has possibly changed
2008          *
2009          * never ever make this conditional again - it is needed to track
2010          * routing updates. see bug #2506
2011          */
2012         refresh_all_peerinterfaces();
2013
2014         if (broadcast_client_enabled || sys_bclient)
2015                 io_setbclient();
2016
2017 #ifdef MCAST
2018         /*
2019          * Check multicast interfaces and try to join multicast groups if
2020          * not joined yet.
2021          */
2022         for (ep = ep_list; ep != NULL; ep = ep->elink) {
2023                 remaddr_t *entry;
2024
2025                 if (!(INT_MCASTIF & ep->flags) || (INT_MCASTOPEN & ep->flags))
2026                         continue;
2027
2028                 /* Find remote address that was linked to this interface */
2029                 for (entry = remoteaddr_list;
2030                      entry != NULL;
2031                      entry = entry->link) {
2032                         if (entry->ep == ep) {
2033                                 if (socket_multicast_enable(ep, &entry->addr)) {
2034                                         msyslog(LOG_INFO,
2035                                                 "Joined %s socket to multicast group %s",
2036                                                 stoa(&ep->sin),
2037                                                 stoa(&entry->addr));
2038                                 }
2039                                 break;
2040                         }
2041                 }
2042         }
2043 #endif /* MCAST */
2044
2045         return new_interface_found;
2046 }
2047
2048
2049 /*
2050  * create_sockets - create a socket for each interface plus a default
2051  *                      socket for when we don't know where to send
2052  */
2053 static int
2054 create_sockets(
2055         u_short port
2056         )
2057 {
2058 #ifndef HAVE_IO_COMPLETION_PORT
2059         /*
2060          * I/O Completion Ports don't care about the select and FD_SET
2061          */
2062         maxactivefd = 0;
2063         FD_ZERO(&activefds);
2064 #endif
2065
2066         DPRINTF(2, ("create_sockets(%d)\n", port));
2067
2068         create_wildcards(port);
2069
2070         update_interfaces(port, NULL, NULL);
2071
2072         /*
2073          * Now that we have opened all the sockets, turn off the reuse
2074          * flag for security.
2075          */
2076         set_reuseaddr(0);
2077
2078         DPRINTF(2, ("create_sockets: Total interfaces = %d\n", ninterfaces));
2079
2080         return ninterfaces;
2081 }
2082
2083 /*
2084  * create_interface - create a new interface for a given prototype
2085  *                    binding the socket.
2086  */
2087 static struct interface *
2088 create_interface(
2089         u_short                 port,
2090         struct interface *      protot
2091         )
2092 {
2093         sockaddr_u      resmask;
2094         endpt *         iface;
2095 #if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2096         remaddr_t *     entry;
2097         remaddr_t *     next_entry;
2098 #endif
2099         DPRINTF(2, ("create_interface(%s#%d)\n", stoa(&protot->sin),
2100                     port));
2101
2102         /* build an interface */
2103         iface = new_interface(protot);
2104
2105         /*
2106          * create socket
2107          */
2108         iface->fd = open_socket(&iface->sin, 0, 0, iface);
2109
2110         if (iface->fd != INVALID_SOCKET)
2111                 log_listen_address(iface);
2112
2113         if ((INT_BROADCAST & iface->flags)
2114             && iface->bfd != INVALID_SOCKET)
2115                 msyslog(LOG_INFO, "Listening on broadcast address %s#%d",
2116                         stoa((&iface->bcast)), port);
2117
2118         if (INVALID_SOCKET == iface->fd
2119             && INVALID_SOCKET == iface->bfd) {
2120                 msyslog(LOG_ERR, "unable to create socket on %s (%d) for %s#%d",
2121                         iface->name,
2122                         iface->ifnum,
2123                         stoa((&iface->sin)),
2124                         port);
2125                 delete_interface(iface);
2126                 return NULL;
2127         }
2128
2129         /*
2130          * Blacklist our own addresses, no use talking to ourself
2131          */
2132         SET_HOSTMASK(&resmask, AF(&iface->sin));
2133         hack_restrict(RESTRICT_FLAGS, &iface->sin, &resmask,
2134                       -4, RESM_NTPONLY | RESM_INTERFACE, RES_IGNORE, 0);
2135
2136         /*
2137          * set globals with the first found
2138          * loopback interface of the appropriate class
2139          */
2140         if (NULL == loopback_interface && AF_INET == iface->family
2141             && (INT_LOOPBACK & iface->flags))
2142                 loopback_interface = iface;
2143
2144         /*
2145          * put into our interface list
2146          */
2147         add_addr_to_list(&iface->sin, iface);
2148         add_interface(iface);
2149
2150 #if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2151         /*
2152          * Join any previously-configured compatible multicast groups.
2153          */
2154         if (INT_MULTICAST & iface->flags &&
2155             !((INT_LOOPBACK | INT_WILDCARD) & iface->flags) &&
2156             !iface->ignore_packets) {
2157                 for (entry = remoteaddr_list;
2158                      entry != NULL;
2159                      entry = next_entry) {
2160                         next_entry = entry->link;
2161                         if (AF(&iface->sin) != AF(&entry->addr) ||
2162                             !IS_MCAST(&entry->addr))
2163                                 continue;
2164                         if (socket_multicast_enable(iface,
2165                                                     &entry->addr))
2166                                 msyslog(LOG_INFO,
2167                                         "Joined %s socket to multicast group %s",
2168                                         stoa(&iface->sin),
2169                                         stoa(&entry->addr));
2170                         else
2171                                 msyslog(LOG_ERR,
2172                                         "Failed to join %s socket to multicast group %s",
2173                                         stoa(&iface->sin),
2174                                         stoa(&entry->addr));
2175                 }
2176         }
2177 #endif  /* MCAST && MCAST_NONEWSOCKET */
2178
2179         DPRINT_INTERFACE(2, (iface, "created ", "\n"));
2180         return iface;
2181 }
2182
2183
2184 #ifdef SO_EXCLUSIVEADDRUSE
2185 static void
2186 set_excladdruse(
2187         SOCKET fd
2188         )
2189 {
2190         int one = 1;
2191         int failed;
2192 #ifdef SYS_WINNT
2193         DWORD err;
2194 #endif
2195
2196         failed = setsockopt(fd, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
2197                             (void *)&one, sizeof(one));
2198
2199         if (!failed)
2200                 return;
2201
2202 #ifdef SYS_WINNT
2203         /*
2204          * Prior to Windows XP setting SO_EXCLUSIVEADDRUSE can fail with
2205          * error WSAINVAL depending on service pack level and whether
2206          * the user account is in the Administrators group.  Do not
2207          * complain if it fails that way on versions prior to XP (5.1).
2208          */
2209         err = GetLastError();
2210
2211         if (isc_win32os_versioncheck(5, 1, 0, 0) < 0    /* < 5.1/XP */
2212             && WSAEINVAL == err)
2213                 return;
2214
2215         SetLastError(err);
2216 #endif
2217         msyslog(LOG_ERR,
2218                 "setsockopt(%d, SO_EXCLUSIVEADDRUSE, on): %m",
2219                 (int)fd);
2220 }
2221 #endif  /* SO_EXCLUSIVEADDRUSE */
2222
2223
2224 /*
2225  * set_reuseaddr() - set/clear REUSEADDR on all sockets
2226  *                      NB possible hole - should we be doing this on broadcast
2227  *                      fd's also?
2228  */
2229 static void
2230 set_reuseaddr(
2231         int flag
2232         )
2233 {
2234 #ifndef SO_EXCLUSIVEADDRUSE
2235         endpt *ep;
2236
2237         for (ep = ep_list; ep != NULL; ep = ep->elink) {
2238                 if (ep->flags & INT_WILDCARD)
2239                         continue;
2240
2241                 /*
2242                  * if ep->fd  is INVALID_SOCKET, we might have a adapter
2243                  * configured but not present
2244                  */
2245                 DPRINTF(4, ("setting SO_REUSEADDR on %.16s@%s to %s\n",
2246                             ep->name, stoa(&ep->sin),
2247                             flag ? "on" : "off"));
2248
2249                 if (ep->fd != INVALID_SOCKET) {
2250                         if (setsockopt(ep->fd, SOL_SOCKET, SO_REUSEADDR,
2251                                        (void *)&flag, sizeof(flag))) {
2252                                 msyslog(LOG_ERR, "set_reuseaddr: setsockopt(%s, SO_REUSEADDR, %s) failed: %m",
2253                                         stoa(&ep->sin), flag ? "on" : "off");
2254                         }
2255                 }
2256         }
2257 #endif /* ! SO_EXCLUSIVEADDRUSE */
2258 }
2259
2260 /*
2261  * This is just a wrapper around an internal function so we can
2262  * make other changes as necessary later on
2263  */
2264 void
2265 enable_broadcast(
2266         struct interface *      iface,
2267         sockaddr_u *            baddr
2268         )
2269 {
2270 #ifdef OPEN_BCAST_SOCKET
2271         socket_broadcast_enable(iface, iface->fd, baddr);
2272 #endif
2273 }
2274
2275 #ifdef OPEN_BCAST_SOCKET
2276 /*
2277  * Enable a broadcast address to a given socket
2278  * The socket is in the ep_list all we need to do is enable
2279  * broadcasting. It is not this function's job to select the socket
2280  */
2281 static isc_boolean_t
2282 socket_broadcast_enable(
2283         struct interface *      iface,
2284         SOCKET                  fd,
2285         sockaddr_u *            baddr
2286         )
2287 {
2288 #ifdef SO_BROADCAST
2289         int on = 1;
2290
2291         if (IS_IPV4(baddr)) {
2292                 /* if this interface can support broadcast, set SO_BROADCAST */
2293                 if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
2294                                (void *)&on, sizeof(on)))
2295                         msyslog(LOG_ERR,
2296                                 "setsockopt(SO_BROADCAST) enable failure on address %s: %m",
2297                                 stoa(baddr));
2298                 else
2299                         DPRINTF(2, ("Broadcast enabled on socket %d for address %s\n",
2300                                     fd, stoa(baddr)));
2301         }
2302         iface->flags |= INT_BCASTXMIT;
2303         return ISC_TRUE;
2304 #else
2305         return ISC_FALSE;
2306 #endif /* SO_BROADCAST */
2307 }
2308
2309 #ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
2310 /*
2311  * Remove a broadcast address from a given socket
2312  * The socket is in the ep_list all we need to do is disable
2313  * broadcasting. It is not this function's job to select the socket
2314  */
2315 static isc_boolean_t
2316 socket_broadcast_disable(
2317         struct interface *      iface,
2318         sockaddr_u *            baddr
2319         )
2320 {
2321 #ifdef SO_BROADCAST
2322         int off = 0;    /* This seems to be OK as an int */
2323
2324         if (IS_IPV4(baddr) && setsockopt(iface->fd, SOL_SOCKET,
2325             SO_BROADCAST, (void *)&off, sizeof(off)))
2326                 msyslog(LOG_ERR,
2327                         "setsockopt(SO_BROADCAST) disable failure on address %s: %m",
2328                         stoa(baddr));
2329
2330         iface->flags &= ~INT_BCASTXMIT;
2331         return ISC_TRUE;
2332 #else
2333         return ISC_FALSE;
2334 #endif /* SO_BROADCAST */
2335 }
2336 #endif /* OS_MISSES_SPECIFIC_ROUTE_UPDATES */
2337
2338 #endif /* OPEN_BCAST_SOCKET */
2339
2340 /*
2341  * return the broadcast client flag value
2342  */
2343 /*isc_boolean_t
2344 get_broadcastclient_flag(void)
2345 {
2346         return (broadcast_client_enabled);
2347 }
2348 */
2349
2350 /*
2351  * Check to see if the address is a multicast address
2352  */
2353 static isc_boolean_t
2354 addr_ismulticast(
2355         sockaddr_u *maddr
2356         )
2357 {
2358         isc_boolean_t result;
2359
2360 #ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
2361         /*
2362          * If we don't have IPV6 support any IPV6 addr is not multicast
2363          */
2364         if (IS_IPV6(maddr))
2365                 result = ISC_FALSE;
2366         else
2367 #endif
2368                 result = IS_MCAST(maddr);
2369
2370         if (!result)
2371                 DPRINTF(4, ("address %s is not multicast\n",
2372                             stoa(maddr)));
2373
2374         return result;
2375 }
2376
2377 /*
2378  * Multicast servers need to set the appropriate Multicast interface
2379  * socket option in order for it to know which interface to use for
2380  * send the multicast packet.
2381  */
2382 void
2383 enable_multicast_if(
2384         struct interface *      iface,
2385         sockaddr_u *            maddr
2386         )
2387 {
2388 #ifdef MCAST
2389 #ifdef IP_MULTICAST_LOOP
2390         TYPEOF_IP_MULTICAST_LOOP off = 0;
2391 #endif
2392 #if defined(INCLUDE_IPV6_MULTICAST_SUPPORT) && defined(IPV6_MULTICAST_LOOP)
2393         u_int off6 = 0;
2394 #endif
2395
2396         REQUIRE(AF(maddr) == AF(&iface->sin));
2397
2398         switch (AF(&iface->sin)) {
2399
2400         case AF_INET:
2401 #ifdef IP_MULTICAST_LOOP
2402                 /*
2403                  * Don't send back to itself, but allow failure to set
2404                  */
2405                 if (setsockopt(iface->fd, IPPROTO_IP,
2406                                IP_MULTICAST_LOOP,
2407                                (void *)&off,
2408                                sizeof(off))) {
2409
2410                         msyslog(LOG_ERR,
2411                                 "setsockopt IP_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2412                                 iface->fd, stoa(&iface->sin),
2413                                 stoa(maddr));
2414                 }
2415 #endif
2416                 break;
2417
2418         case AF_INET6:
2419 #ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2420 #ifdef IPV6_MULTICAST_LOOP
2421                 /*
2422                  * Don't send back to itself, but allow failure to set
2423                  */
2424                 if (setsockopt(iface->fd, IPPROTO_IPV6,
2425                                IPV6_MULTICAST_LOOP,
2426                                (void *) &off6, sizeof(off6))) {
2427
2428                         msyslog(LOG_ERR,
2429                                 "setsockopt IPV6_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2430                                 iface->fd, stoa(&iface->sin),
2431                                 stoa(maddr));
2432                 }
2433 #endif
2434                 break;
2435 #else
2436                 return;
2437 #endif  /* INCLUDE_IPV6_MULTICAST_SUPPORT */
2438         }
2439         return;
2440 #endif
2441 }
2442
2443 /*
2444  * Add a multicast address to a given socket
2445  * The socket is in the ep_list all we need to do is enable
2446  * multicasting. It is not this function's job to select the socket
2447  */
2448 #if defined(MCAST)
2449 static isc_boolean_t
2450 socket_multicast_enable(
2451         endpt *         iface,
2452         sockaddr_u *    maddr
2453         )
2454 {
2455         struct ip_mreq          mreq;
2456 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2457         struct ipv6_mreq        mreq6;
2458 # endif
2459         switch (AF(maddr)) {
2460
2461         case AF_INET:
2462                 ZERO(mreq);
2463                 mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2464                 mreq.imr_interface.s_addr = htonl(INADDR_ANY);
2465                 if (setsockopt(iface->fd,
2466                                IPPROTO_IP,
2467                                IP_ADD_MEMBERSHIP,
2468                                (void *)&mreq,
2469                                sizeof(mreq))) {
2470                         DPRINTF(2, (
2471                                 "setsockopt IP_ADD_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2472                                 iface->fd, stoa(&iface->sin),
2473                                 mreq.imr_multiaddr.s_addr,
2474                                 mreq.imr_interface.s_addr,
2475                                 stoa(maddr)));
2476                         return ISC_FALSE;
2477                 }
2478                 DPRINTF(4, ("Added IPv4 multicast membership on socket %d, addr %s for %x / %x (%s)\n",
2479                             iface->fd, stoa(&iface->sin),
2480                             mreq.imr_multiaddr.s_addr,
2481                             mreq.imr_interface.s_addr, stoa(maddr)));
2482                 break;
2483
2484         case AF_INET6:
2485 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2486                 /*
2487                  * Enable reception of multicast packets.
2488                  * If the address is link-local we can get the
2489                  * interface index from the scope id. Don't do this
2490                  * for other types of multicast addresses. For now let
2491                  * the kernel figure it out.
2492                  */
2493                 ZERO(mreq6);
2494                 mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2495                 mreq6.ipv6mr_interface = iface->ifindex;
2496
2497                 if (setsockopt(iface->fd, IPPROTO_IPV6,
2498                                IPV6_JOIN_GROUP, (void *)&mreq6,
2499                                sizeof(mreq6))) {
2500                         DPRINTF(2, (
2501                                 "setsockopt IPV6_JOIN_GROUP failed: %m on socket %d, addr %s for interface %u (%s)",
2502                                 iface->fd, stoa(&iface->sin),
2503                                 mreq6.ipv6mr_interface, stoa(maddr)));
2504                         return ISC_FALSE;
2505                 }
2506                 DPRINTF(4, ("Added IPv6 multicast group on socket %d, addr %s for interface %u (%s)\n",
2507                             iface->fd, stoa(&iface->sin),
2508                             mreq6.ipv6mr_interface, stoa(maddr)));
2509 # else
2510                 return ISC_FALSE;
2511 # endif /* INCLUDE_IPV6_MULTICAST_SUPPORT */
2512         }
2513         iface->flags |= INT_MCASTOPEN;
2514         iface->num_mcast++;
2515
2516         return ISC_TRUE;
2517 }
2518 #endif  /* MCAST */
2519
2520
2521 /*
2522  * Remove a multicast address from a given socket
2523  * The socket is in the ep_list all we need to do is disable
2524  * multicasting. It is not this function's job to select the socket
2525  */
2526 #ifdef MCAST
2527 static isc_boolean_t
2528 socket_multicast_disable(
2529         struct interface *      iface,
2530         sockaddr_u *            maddr
2531         )
2532 {
2533 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2534         struct ipv6_mreq mreq6;
2535 # endif
2536         struct ip_mreq mreq;
2537
2538         ZERO(mreq);
2539
2540         if (find_addr_in_list(maddr) == NULL) {
2541                 DPRINTF(4, ("socket_multicast_disable(%s): not found\n",
2542                             stoa(maddr)));
2543                 return ISC_TRUE;
2544         }
2545
2546         switch (AF(maddr)) {
2547
2548         case AF_INET:
2549                 mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2550                 mreq.imr_interface = SOCK_ADDR4(&iface->sin);
2551                 if (setsockopt(iface->fd, IPPROTO_IP,
2552                                IP_DROP_MEMBERSHIP, (void *)&mreq,
2553                                sizeof(mreq))) {
2554
2555                         msyslog(LOG_ERR,
2556                                 "setsockopt IP_DROP_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2557                                 iface->fd, stoa(&iface->sin),
2558                                 SRCADR(maddr), SRCADR(&iface->sin),
2559                                 stoa(maddr));
2560                         return ISC_FALSE;
2561                 }
2562                 break;
2563         case AF_INET6:
2564 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2565                 /*
2566                  * Disable reception of multicast packets
2567                  * If the address is link-local we can get the
2568                  * interface index from the scope id.  Don't do this
2569                  * for other types of multicast addresses. For now let
2570                  * the kernel figure it out.
2571                  */
2572                 mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2573                 mreq6.ipv6mr_interface = iface->ifindex;
2574
2575                 if (setsockopt(iface->fd, IPPROTO_IPV6,
2576                                IPV6_LEAVE_GROUP, (void *)&mreq6,
2577                                sizeof(mreq6))) {
2578
2579                         msyslog(LOG_ERR,
2580                                 "setsockopt IPV6_LEAVE_GROUP failure: %m on socket %d, addr %s for %d (%s)",
2581                                 iface->fd, stoa(&iface->sin),
2582                                 iface->ifindex, stoa(maddr));
2583                         return ISC_FALSE;
2584                 }
2585                 break;
2586 # else
2587                 return ISC_FALSE;
2588 # endif /* INCLUDE_IPV6_MULTICAST_SUPPORT */
2589         }
2590
2591         iface->num_mcast--;
2592         if (!iface->num_mcast)
2593                 iface->flags &= ~INT_MCASTOPEN;
2594
2595         return ISC_TRUE;
2596 }
2597 #endif  /* MCAST */
2598
2599 /*
2600  * io_setbclient - open the broadcast client sockets
2601  */
2602 void
2603 io_setbclient(void)
2604 {
2605 #ifdef OPEN_BCAST_SOCKET
2606         endpt *         ep;
2607         unsigned int    nif, ni4, ni6;
2608
2609         nif = ni4 = ni6 = 0;
2610         set_reuseaddr(1);
2611
2612         for (ep = ep_list; ep != NULL; ep = ep->elink) {
2613                 /* count IPv6 vs IPv4 interfaces. Needed later to decide
2614                  * if we should log an error or not.
2615                  */
2616                 switch (ep->family) {
2617                 case AF_INET : ++ni4; break;
2618                 case AF_INET6: ++ni6; break;
2619                 default      :        break;
2620                 }
2621                 
2622                 if (ep->flags & (INT_WILDCARD | INT_LOOPBACK))
2623                         continue;
2624
2625                 /* use only allowed addresses */
2626                 if (ep->ignore_packets)
2627                         continue;
2628
2629                 /* Need a broadcast-capable interface */
2630                 if (!(ep->flags & INT_BROADCAST))
2631                         continue;
2632
2633                 /* Only IPv4 addresses are valid for broadcast */
2634                 REQUIRE(IS_IPV4(&ep->bcast));
2635
2636                 /* Do we already have the broadcast address open? */
2637                 if (ep->flags & INT_BCASTOPEN) {
2638                         /*
2639                          * account for already open interfaces to avoid
2640                          * misleading warning below
2641                          */
2642                         nif++;
2643                         continue;
2644                 }
2645
2646                 /*
2647                  * Try to open the broadcast address
2648                  */
2649                 ep->family = AF_INET;
2650                 ep->bfd = open_socket(&ep->bcast, 1, 0, ep);
2651
2652                 /*
2653                  * If we succeeded then we use it otherwise enable
2654                  * broadcast on the interface address
2655                  */
2656                 if (ep->bfd != INVALID_SOCKET) {
2657                         nif++;
2658                         ep->flags |= INT_BCASTOPEN;
2659                         msyslog(LOG_INFO,
2660                                 "Listen for broadcasts to %s on interface #%d %s",
2661                                 stoa(&ep->bcast), ep->ifnum, ep->name);
2662                 } else switch (errno) {
2663                         /* Silently ignore EADDRINUSE as we probably
2664                          * opened the socket already for an address in
2665                          * the same network */
2666                 case EADDRINUSE:
2667                         /* Some systems cannot bind a socket to a broadcast
2668                          * address, as that is not a valid host address. */
2669                 case EADDRNOTAVAIL:
2670 #                   ifdef SYS_WINNT     /*TODO: use for other systems, too? */
2671                         /* avoid recurrence here -- if we already have a
2672                          * regular socket, it's quite useless to try this
2673                          * again.
2674                          */
2675                         if (ep->fd != INVALID_SOCKET) {
2676                                 ep->flags |= INT_BCASTOPEN;
2677                                 nif++;
2678                         }
2679 #                   endif
2680                         break;
2681
2682                 default:
2683                         msyslog(LOG_INFO,
2684                                 "failed to listen for broadcasts to %s on interface #%d %s",
2685                                 stoa(&ep->bcast), ep->ifnum, ep->name);
2686                         break;
2687                 }
2688         }
2689         set_reuseaddr(0);
2690         if (nif != 0) {
2691                 broadcast_client_enabled = ISC_TRUE;
2692                 DPRINTF(1, ("io_setbclient: listening to %d broadcast addresses\n", nif));
2693         } else {
2694                 broadcast_client_enabled = ISC_FALSE;
2695                 /* This is expected when having only IPv6 interfaces
2696                  * and no IPv4 interfaces at all. We suppress the error
2697                  * log in that case... everything else should work!
2698                  */
2699                 if (ni4 && !ni6) {
2700                         msyslog(LOG_ERR,
2701                                 "Unable to listen for broadcasts, no broadcast interfaces available");
2702                 }
2703         }
2704 #else
2705         msyslog(LOG_ERR,
2706                 "io_setbclient: Broadcast Client disabled by build");
2707 #endif  /* OPEN_BCAST_SOCKET */
2708 }
2709
2710 /*
2711  * io_unsetbclient - close the broadcast client sockets
2712  */
2713 void
2714 io_unsetbclient(void)
2715 {
2716         endpt *ep;
2717
2718         for (ep = ep_list; ep != NULL; ep = ep->elink) {
2719                 if (INT_WILDCARD & ep->flags)
2720                         continue;
2721                 if (!(INT_BCASTOPEN & ep->flags))
2722                         continue;
2723
2724                 if (ep->bfd != INVALID_SOCKET) {
2725                         /* destroy broadcast listening socket */
2726                         msyslog(LOG_INFO,
2727                                 "stop listening for broadcasts to %s on interface #%d %s",
2728                                 stoa(&ep->bcast), ep->ifnum, ep->name);
2729 #                   ifdef HAVE_IO_COMPLETION_PORT
2730                         io_completion_port_remove_socket(ep->bfd, ep);
2731 #                   endif
2732                         close_and_delete_fd_from_list(ep->bfd);
2733                         ep->bfd = INVALID_SOCKET;
2734                 }
2735                 ep->flags &= ~INT_BCASTOPEN;
2736         }
2737         broadcast_client_enabled = ISC_FALSE;
2738 }
2739
2740 /*
2741  * io_multicast_add() - add multicast group address
2742  */
2743 void
2744 io_multicast_add(
2745         sockaddr_u *addr
2746         )
2747 {
2748 #ifdef MCAST
2749         endpt * ep;
2750         endpt * one_ep;
2751
2752         /*
2753          * Check to see if this is a multicast address
2754          */
2755         if (!addr_ismulticast(addr))
2756                 return;
2757
2758         /* If we already have it we can just return */
2759         if (NULL != find_flagged_addr_in_list(addr, INT_MCASTOPEN)) {
2760                 msyslog(LOG_INFO,
2761                         "Duplicate request found for multicast address %s",
2762                         stoa(addr));
2763                 return;
2764         }
2765
2766 # ifndef MULTICAST_NONEWSOCKET
2767         ep = new_interface(NULL);
2768
2769         /*
2770          * Open a new socket for the multicast address
2771          */
2772         ep->sin = *addr;
2773         SET_PORT(&ep->sin, NTP_PORT);
2774         ep->family = AF(&ep->sin);
2775         AF(&ep->mask) = ep->family;
2776         SET_ONESMASK(&ep->mask);
2777
2778         set_reuseaddr(1);
2779         ep->bfd = INVALID_SOCKET;
2780         ep->fd = open_socket(&ep->sin, 0, 0, ep);
2781         if (ep->fd != INVALID_SOCKET) {
2782                 ep->ignore_packets = ISC_FALSE;
2783                 ep->flags |= INT_MCASTIF;
2784                 ep->ifindex = SCOPE(addr);
2785
2786                 strlcpy(ep->name, "multicast", sizeof(ep->name));
2787                 DPRINT_INTERFACE(2, (ep, "multicast add ", "\n"));
2788                 add_interface(ep);
2789                 log_listen_address(ep);
2790         } else {
2791                 /* bind failed, re-use wildcard interface */
2792                 delete_interface(ep);
2793
2794                 if (IS_IPV4(addr))
2795                         ep = wildipv4;
2796                 else if (IS_IPV6(addr))
2797                         ep = wildipv6;
2798                 else
2799                         ep = NULL;
2800
2801                 if (ep != NULL) {
2802                         /* HACK ! -- stuff in an address */
2803                         /* because we don't bind addr? DH */
2804                         ep->bcast = *addr;
2805                         msyslog(LOG_ERR,
2806                                 "multicast address %s using wildcard interface #%d %s",
2807                                 stoa(addr), ep->ifnum, ep->name);
2808                 } else {
2809                         msyslog(LOG_ERR,
2810                                 "No multicast socket available to use for address %s",
2811                                 stoa(addr));
2812                         return;
2813                 }
2814         }
2815         {       /* in place of the { following for in #else clause */
2816                 one_ep = ep;
2817 # else  /* MULTICAST_NONEWSOCKET follows */
2818         /*
2819          * For the case where we can't use a separate socket (Windows)
2820          * join each applicable endpoint socket to the group address.
2821          */
2822         if (IS_IPV4(addr))
2823                 one_ep = wildipv4;
2824         else
2825                 one_ep = wildipv6;
2826         for (ep = ep_list; ep != NULL; ep = ep->elink) {
2827                 if (ep->ignore_packets || AF(&ep->sin) != AF(addr) ||
2828                     !(INT_MULTICAST & ep->flags) ||
2829                     (INT_LOOPBACK | INT_WILDCARD) & ep->flags)
2830                         continue;
2831                 one_ep = ep;
2832 # endif /* MULTICAST_NONEWSOCKET */
2833                 if (socket_multicast_enable(ep, addr))
2834                         msyslog(LOG_INFO,
2835                                 "Joined %s socket to multicast group %s",
2836                                 stoa(&ep->sin),
2837                                 stoa(addr));
2838         }
2839
2840         add_addr_to_list(addr, one_ep);
2841 #else   /* !MCAST  follows*/
2842         msyslog(LOG_ERR,
2843                 "Can not add multicast address %s: no multicast support",
2844                 stoa(addr));
2845 #endif
2846         return;
2847 }
2848
2849
2850 /*
2851  * io_multicast_del() - delete multicast group address
2852  */
2853 void
2854 io_multicast_del(
2855         sockaddr_u *    addr
2856         )
2857 {
2858 #ifdef MCAST
2859         endpt *iface;
2860
2861         /*
2862          * Check to see if this is a multicast address
2863          */
2864         if (!addr_ismulticast(addr)) {
2865                 msyslog(LOG_ERR, "invalid multicast address %s",
2866                         stoa(addr));
2867                 return;
2868         }
2869
2870         /*
2871          * Disable reception of multicast packets
2872          */
2873         while ((iface = find_flagged_addr_in_list(addr, INT_MCASTOPEN))
2874                != NULL)
2875                 socket_multicast_disable(iface, addr);
2876
2877         delete_addr_from_list(addr);
2878
2879 #else /* not MCAST */
2880         msyslog(LOG_ERR,
2881                 "Can not delete multicast address %s: no multicast support",
2882                 stoa(addr));
2883 #endif /* not MCAST */
2884 }
2885
2886
2887 /*
2888  * open_socket - open a socket, returning the file descriptor
2889  */
2890
2891 static SOCKET
2892 open_socket(
2893         sockaddr_u *    addr,
2894         int             bcast,
2895         int             turn_off_reuse,
2896         endpt *         interf
2897         )
2898 {
2899         SOCKET  fd;
2900         int     errval;
2901         /*
2902          * int is OK for REUSEADR per
2903          * http://www.kohala.com/start/mcast.api.txt
2904          */
2905         int     on = 1;
2906         int     off = 0;
2907
2908         if (IS_IPV6(addr) && !ipv6_works)
2909                 return INVALID_SOCKET;
2910
2911         /* create a datagram (UDP) socket */
2912         fd = socket(AF(addr), SOCK_DGRAM, 0);
2913         if (INVALID_SOCKET == fd) {
2914                 errval = socket_errno();
2915                 msyslog(LOG_ERR,
2916                         "socket(AF_INET%s, SOCK_DGRAM, 0) failed on address %s: %m",
2917                         IS_IPV6(addr) ? "6" : "", stoa(addr));
2918
2919                 if (errval == EPROTONOSUPPORT ||
2920                     errval == EAFNOSUPPORT ||
2921                     errval == EPFNOSUPPORT)
2922                         return (INVALID_SOCKET);
2923
2924                 errno = errval;
2925                 msyslog(LOG_ERR,
2926                         "unexpected socket() error %m code %d (not EPROTONOSUPPORT nor EAFNOSUPPORT nor EPFNOSUPPORT) - exiting",
2927                         errno);
2928                 exit(1);
2929         }
2930
2931 #ifdef SYS_WINNT
2932         connection_reset_fix(fd, addr);
2933 #endif
2934         /*
2935          * Fixup the file descriptor for some systems
2936          * See bug #530 for details of the issue.
2937          */
2938         fd = move_fd(fd);
2939
2940         /*
2941          * set SO_REUSEADDR since we will be binding the same port
2942          * number on each interface according to turn_off_reuse.
2943          * This is undesirable on Windows versions starting with
2944          * Windows XP (numeric version 5.1).
2945          */
2946 #ifdef SYS_WINNT
2947         if (isc_win32os_versioncheck(5, 1, 0, 0) < 0)  /* before 5.1 */
2948 #endif
2949                 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
2950                                (void *)((turn_off_reuse)
2951                                             ? &off
2952                                             : &on),
2953                                sizeof(on))) {
2954
2955                         msyslog(LOG_ERR,
2956                                 "setsockopt SO_REUSEADDR %s fails for address %s: %m",
2957                                 (turn_off_reuse)
2958                                     ? "off"
2959                                     : "on",
2960                                 stoa(addr));
2961                         closesocket(fd);
2962                         return INVALID_SOCKET;
2963                 }
2964 #ifdef SO_EXCLUSIVEADDRUSE
2965         /*
2966          * setting SO_EXCLUSIVEADDRUSE on the wildcard we open
2967          * first will cause more specific binds to fail.
2968          */
2969         if (!(interf->flags & INT_WILDCARD))
2970                 set_excladdruse(fd);
2971 #endif
2972
2973         /*
2974          * IPv4 specific options go here
2975          */
2976         if (IS_IPV4(addr)) {
2977 #if defined(IPPROTO_IP) && defined(IP_TOS)
2978                 if (setsockopt(fd, IPPROTO_IP, IP_TOS, (void *)&qos,
2979                                sizeof(qos)))
2980                         msyslog(LOG_ERR,
2981                                 "setsockopt IP_TOS (%02x) fails on address %s: %m",
2982                                 qos, stoa(addr));
2983 #endif /* IPPROTO_IP && IP_TOS */
2984                 if (bcast)
2985                         socket_broadcast_enable(interf, fd, addr);
2986         }
2987
2988         /*
2989          * IPv6 specific options go here
2990          */
2991         if (IS_IPV6(addr)) {
2992 #if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
2993                 if (setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, (void *)&qos,
2994                                sizeof(qos)))
2995                         msyslog(LOG_ERR,
2996                                 "setsockopt IPV6_TCLASS (%02x) fails on address %s: %m",
2997                                 qos, stoa(addr));
2998 #endif /* IPPROTO_IPV6 && IPV6_TCLASS */
2999 #ifdef IPV6_V6ONLY
3000                 if (isc_net_probe_ipv6only() == ISC_R_SUCCESS
3001                     && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
3002                     (void *)&on, sizeof(on)))
3003                         msyslog(LOG_ERR,
3004                                 "setsockopt IPV6_V6ONLY on fails on address %s: %m",
3005                                 stoa(addr));
3006 #endif
3007 #ifdef IPV6_BINDV6ONLY
3008                 if (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDV6ONLY,
3009                     (void *)&on, sizeof(on)))
3010                         msyslog(LOG_ERR,
3011                                 "setsockopt IPV6_BINDV6ONLY on fails on address %s: %m",
3012                                 stoa(addr));
3013 #endif
3014         }
3015
3016 #ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3017         /*
3018          * some OSes don't allow binding to more specific
3019          * addresses if a wildcard address already bound
3020          * to the port and SO_REUSEADDR is not set
3021          */
3022         if (!is_wildcard_addr(addr))
3023                 set_wildcard_reuse(AF(addr), 1);
3024 #endif
3025
3026         /*
3027          * bind the local address.
3028          */
3029         errval = bind(fd, &addr->sa, SOCKLEN(addr));
3030
3031 #ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3032         if (!is_wildcard_addr(addr))
3033                 set_wildcard_reuse(AF(addr), 0);
3034 #endif
3035
3036         if (errval < 0) {
3037                 /*
3038                  * Don't log this under all conditions
3039                  */
3040                 if (turn_off_reuse == 0
3041 #ifdef DEBUG
3042                     || debug > 1
3043 #endif
3044                     ) {
3045                         msyslog(LOG_ERR,
3046                                 "bind(%d) AF_INET%s %s#%d%s flags 0x%x failed: %m",
3047                                 fd, IS_IPV6(addr) ? "6" : "",
3048                                 stoa(addr), SRCPORT(addr),
3049                                 IS_MCAST(addr) ? " (multicast)" : "",
3050                                 interf->flags);
3051                 }
3052
3053                 closesocket(fd);
3054
3055                 return INVALID_SOCKET;
3056         }
3057
3058 #ifdef HAVE_TIMESTAMP
3059         {
3060                 if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP,
3061                                (void *)&on, sizeof(on)))
3062                         msyslog(LOG_DEBUG,
3063                                 "setsockopt SO_TIMESTAMP on fails on address %s: %m",
3064                                 stoa(addr));
3065                 else
3066                         DPRINTF(4, ("setsockopt SO_TIMESTAMP enabled on fd %d address %s\n",
3067                                     fd, stoa(addr)));
3068         }
3069 #endif
3070 #ifdef HAVE_TIMESTAMPNS
3071         {
3072                 if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS,
3073                                (void *)&on, sizeof(on)))
3074                         msyslog(LOG_DEBUG,
3075                                 "setsockopt SO_TIMESTAMPNS on fails on address %s: %m",
3076                                 stoa(addr));
3077                 else
3078                         DPRINTF(4, ("setsockopt SO_TIMESTAMPNS enabled on fd %d address %s\n",
3079                                     fd, stoa(addr)));
3080         }
3081 #endif
3082 #ifdef HAVE_BINTIME
3083         {
3084                 if (setsockopt(fd, SOL_SOCKET, SO_BINTIME,
3085                                (void *)&on, sizeof(on)))
3086                         msyslog(LOG_DEBUG,
3087                                 "setsockopt SO_BINTIME on fails on address %s: %m",
3088                                 stoa(addr));
3089                 else
3090                         DPRINTF(4, ("setsockopt SO_BINTIME enabled on fd %d address %s\n",
3091                                     fd, stoa(addr)));
3092         }
3093 #endif
3094
3095         DPRINTF(4, ("bind(%d) AF_INET%s, addr %s%%%d#%d, flags 0x%x\n",
3096                    fd, IS_IPV6(addr) ? "6" : "", stoa(addr),
3097                    SCOPE(addr), SRCPORT(addr), interf->flags));
3098
3099         make_socket_nonblocking(fd);
3100
3101 #ifdef HAVE_SIGNALED_IO
3102         init_socket_sig(fd);
3103 #endif /* not HAVE_SIGNALED_IO */
3104
3105         add_fd_to_list(fd, FD_TYPE_SOCKET);
3106
3107 #if !defined(SYS_WINNT) && !defined(VMS)
3108         DPRINTF(4, ("flags for fd %d: 0x%x\n", fd,
3109                     fcntl(fd, F_GETFL, 0)));
3110 #endif /* SYS_WINNT || VMS */
3111
3112 #if defined(HAVE_IO_COMPLETION_PORT)
3113 /*
3114  * Add the socket to the completion port
3115  */
3116         if (!io_completion_port_add_socket(fd, interf, bcast)) {
3117                 msyslog(LOG_ERR, "unable to set up io completion port - EXITING");
3118                 exit(1);
3119         }
3120 #endif
3121         return fd;
3122 }
3123
3124
3125
3126 /* XXX ELIMINATE sendpkt similar in ntpq.c, ntpdc.c, ntp_io.c, ntptrace.c */
3127 /*
3128  * sendpkt - send a packet to the specified destination. Maintain a
3129  * send error cache so that only the first consecutive error for a
3130  * destination is logged.
3131  */
3132 void
3133 sendpkt(
3134         sockaddr_u *            dest,
3135         struct interface *      ep,
3136         int                     ttl,
3137         struct pkt *            pkt,
3138         int                     len
3139         )
3140 {
3141         endpt * src;
3142         int     ismcast;
3143         int     cc;
3144         int     rc;
3145         u_char  cttl;
3146         l_fp    fp_zero = { { 0 }, 0 };
3147         l_fp    org, rec, xmt;
3148         
3149         ismcast = IS_MCAST(dest);
3150         if (!ismcast)
3151                 src = ep;
3152         else
3153                 src = (IS_IPV4(dest))
3154                           ? mc4_list
3155                           : mc6_list;
3156
3157         if (NULL == src) {
3158                 /*
3159                  * unbound peer - drop request and wait for better
3160                  * network conditions
3161                  */
3162                 DPRINTF(2, ("%ssendpkt(dst=%s, ttl=%d, len=%d): no interface - IGNORED\n",
3163                             ismcast ? "\tMCAST\t***** " : "",
3164                             stoa(dest), ttl, len));
3165                 return;
3166         }
3167
3168         do {
3169                 DPRINTF(2, ("%ssendpkt(%d, dst=%s, src=%s, ttl=%d, len=%d)\n",
3170                             ismcast ? "\tMCAST\t***** " : "", src->fd,
3171                             stoa(dest), stoa(&src->sin), ttl, len));
3172 #ifdef MCAST
3173                 /*
3174                  * for the moment we use the bcast option to set multicast ttl
3175                  */
3176                 if (ismcast && ttl > 0 && ttl != src->last_ttl) {
3177                         /*
3178                          * set the multicast ttl for outgoing packets
3179                          */
3180                         switch (AF(&src->sin)) {
3181
3182                         case AF_INET :
3183                                 cttl = (u_char)ttl;
3184                                 rc = setsockopt(src->fd, IPPROTO_IP,
3185                                                 IP_MULTICAST_TTL,
3186                                                 (void *)&cttl,
3187                                                 sizeof(cttl));
3188                                 break;
3189
3190 # ifdef INCLUDE_IPV6_SUPPORT
3191                         case AF_INET6 :
3192                                 rc = setsockopt(src->fd, IPPROTO_IPV6,
3193                                                  IPV6_MULTICAST_HOPS,
3194                                                  (void *)&ttl,
3195                                                  sizeof(ttl));
3196                                 break;
3197 # endif /* INCLUDE_IPV6_SUPPORT */
3198
3199                         default:
3200                                 rc = 0;
3201                         }
3202
3203                         if (!rc)
3204                                 src->last_ttl = ttl;
3205                         else
3206                                 msyslog(LOG_ERR,
3207                                         "setsockopt IP_MULTICAST_TTL/IPV6_MULTICAST_HOPS fails on address %s: %m",
3208                                         stoa(&src->sin));
3209                 }
3210 #endif  /* MCAST */
3211
3212 #ifdef SIM
3213                 cc = simulate_server(dest, src, pkt);
3214 #elif defined(HAVE_IO_COMPLETION_PORT)
3215                 cc = io_completion_port_sendto(src, src->fd, pkt,
3216                         (size_t)len, (sockaddr_u *)&dest->sa);
3217 #else
3218                 cc = sendto(src->fd, (char *)pkt, (u_int)len, 0,
3219                             &dest->sa, SOCKLEN(dest));
3220 #endif
3221                 if (cc == -1) {
3222                         src->notsent++;
3223                         packets_notsent++;
3224                 } else  {
3225                         src->sent++;
3226                         packets_sent++;
3227                 }
3228                 if (ismcast)
3229                         src = src->mclink;
3230         } while (ismcast && src != NULL);
3231
3232         /* HMS: pkt->rootdisp is usually random here */
3233         NTOHL_FP(&pkt->org, &org);
3234         NTOHL_FP(&pkt->rec, &rec);
3235         NTOHL_FP(&pkt->xmt, &xmt);
3236         record_raw_stats(src ? &src->sin : NULL, dest,
3237                         &org, &rec, &xmt, &fp_zero,
3238                         PKT_LEAP(pkt->li_vn_mode),
3239                         PKT_VERSION(pkt->li_vn_mode),
3240                         PKT_MODE(pkt->li_vn_mode),
3241                         pkt->stratum,
3242                         pkt->ppoll, pkt->precision,
3243                         pkt->rootdelay, pkt->rootdisp, pkt->refid,
3244                         len - MIN_V4_PKT_LEN, (u_char *)&pkt->exten);
3245
3246         return;
3247 }
3248
3249
3250 #if !defined(HAVE_IO_COMPLETION_PORT)
3251 #if !defined(HAVE_SIGNALED_IO)
3252 /*
3253  * fdbits - generate ascii representation of fd_set (FAU debug support)
3254  * HFDF format - highest fd first.
3255  */
3256 static char *
3257 fdbits(
3258         int             count,
3259         const fd_set*   set
3260         )
3261 {
3262         static char buffer[256];
3263         char * buf = buffer;
3264
3265         count = min(count,  255);
3266
3267         while (count >= 0) {
3268                 *buf++ = FD_ISSET(count, set) ? '#' : '-';
3269                 count--;
3270         }
3271         *buf = '\0';
3272
3273         return buffer;
3274 }
3275 #endif
3276
3277 #ifdef REFCLOCK
3278 /*
3279  * Routine to read the refclock packets for a specific interface
3280  * Return the number of bytes read. That way we know if we should
3281  * read it again or go on to the next one if no bytes returned
3282  */
3283 static inline int
3284 read_refclock_packet(
3285         SOCKET                  fd,
3286         struct refclockio *     rp,
3287         l_fp                    ts
3288         )
3289 {
3290         u_int                   read_count;
3291         int                     buflen;
3292         int                     saved_errno;
3293         int                     consumed;
3294         struct recvbuf *        rb;
3295
3296         rb = get_free_recv_buffer();
3297
3298         if (NULL == rb) {
3299                 /*
3300                  * No buffer space available - just drop the packet
3301                  */
3302                 char buf[RX_BUFF_SIZE];
3303
3304                 buflen = read(fd, buf, sizeof buf);
3305                 packets_dropped++;
3306                 return (buflen);
3307         }
3308
3309         /* TALOS-CAN-0064: avoid signed/unsigned clashes that can lead
3310          * to buffer overrun and memory corruption
3311          */
3312         if (rp->datalen <= 0 || (size_t)rp->datalen > sizeof(rb->recv_space))
3313                 read_count = sizeof(rb->recv_space);
3314         else
3315                 read_count = (u_int)rp->datalen;
3316         do {
3317                 buflen = read(fd, (char *)&rb->recv_space, read_count);
3318         } while (buflen < 0 && EINTR == errno);
3319
3320         if (buflen <= 0) {
3321                 saved_errno = errno;
3322                 freerecvbuf(rb);
3323                 errno = saved_errno;
3324                 return buflen;
3325         }
3326
3327         /*
3328          * Got one. Mark how and when it got here,
3329          * put it on the full list and do bookkeeping.
3330          */
3331         rb->recv_length = buflen;
3332         rb->recv_peer = rp->srcclock;
3333         rb->dstadr = 0;
3334         rb->fd = fd;
3335         rb->recv_time = ts;
3336         rb->receiver = rp->clock_recv;
3337
3338         consumed = indicate_refclock_packet(rp, rb);
3339         if (!consumed) {
3340                 rp->recvcount++;
3341                 packets_received++;
3342         }
3343
3344         return buflen;
3345 }
3346 #endif  /* REFCLOCK */
3347
3348
3349 #ifdef HAVE_PACKET_TIMESTAMP
3350 /*
3351  * extract timestamps from control message buffer
3352  */
3353 static l_fp
3354 fetch_timestamp(
3355         struct recvbuf *        rb,
3356         struct msghdr *         msghdr,
3357         l_fp                    ts
3358         )
3359 {
3360         struct cmsghdr *        cmsghdr;
3361         unsigned long           ticks;
3362         double                  fuzz;
3363         l_fp                    lfpfuzz;
3364         l_fp                    nts;
3365 #ifdef DEBUG_TIMING
3366         l_fp                    dts;
3367 #endif
3368
3369         cmsghdr = CMSG_FIRSTHDR(msghdr);
3370         while (cmsghdr != NULL) {
3371                 switch (cmsghdr->cmsg_type)
3372                 {
3373 #ifdef HAVE_BINTIME
3374                 case SCM_BINTIME:
3375 #endif  /* HAVE_BINTIME */
3376 #ifdef HAVE_TIMESTAMPNS
3377                 case SCM_TIMESTAMPNS:
3378 #endif  /* HAVE_TIMESTAMPNS */
3379 #ifdef HAVE_TIMESTAMP
3380                 case SCM_TIMESTAMP:
3381 #endif  /* HAVE_TIMESTAMP */
3382 #if defined(HAVE_BINTIME) || defined (HAVE_TIMESTAMPNS) || defined(HAVE_TIMESTAMP)
3383                         switch (cmsghdr->cmsg_type)
3384                         {
3385 #ifdef HAVE_BINTIME
3386                         case SCM_BINTIME:
3387                                 {
3388                                         struct bintime  pbt;
3389                                         memcpy(&pbt, CMSG_DATA(cmsghdr), sizeof(pbt));
3390                                         /*
3391                                          * bintime documentation is at http://phk.freebsd.dk/pubs/timecounter.pdf
3392                                          */
3393                                         nts.l_i = pbt.sec + JAN_1970;
3394                                         nts.l_uf = (u_int32)(pbt.frac >> 32);
3395                                         if (sys_tick > measured_tick &&
3396                                             sys_tick > 1e-9) {
3397                                                 ticks = (unsigned long)(nts.l_uf / (unsigned long)(sys_tick * FRAC));
3398                                                 nts.l_uf = (unsigned long)(ticks * (unsigned long)(sys_tick * FRAC));
3399                                         }
3400                                         DPRINTF(4, ("fetch_timestamp: system bintime network time stamp: %ld.%09lu\n",
3401                                                     pbt.sec, (unsigned long)((nts.l_uf / FRAC) * 1e9)));
3402                                 }
3403                                 break;
3404 #endif  /* HAVE_BINTIME */
3405 #ifdef HAVE_TIMESTAMPNS
3406                         case SCM_TIMESTAMPNS:
3407                                 {
3408                                         struct timespec pts;
3409                                         memcpy(&pts, CMSG_DATA(cmsghdr), sizeof(pts));
3410                                         if (sys_tick > measured_tick &&
3411                                             sys_tick > 1e-9) {
3412                                                 ticks = (unsigned long)((pts.tv_nsec * 1e-9) /
3413                                                                         sys_tick);
3414                                                 pts.tv_nsec = (long)(ticks * 1e9 *
3415                                                                      sys_tick);
3416                                         }
3417                                         DPRINTF(4, ("fetch_timestamp: system nsec network time stamp: %ld.%09ld\n",
3418                                                     pts.tv_sec, pts.tv_nsec));
3419                                         nts = tspec_stamp_to_lfp(pts);
3420                                 }
3421                                 break;
3422 #endif  /* HAVE_TIMESTAMPNS */
3423 #ifdef HAVE_TIMESTAMP
3424                         case SCM_TIMESTAMP:
3425                                 {
3426                                         struct timeval  ptv;
3427                                         memcpy(&ptv, CMSG_DATA(cmsghdr), sizeof(ptv));
3428                                         if (sys_tick > measured_tick &&
3429                                             sys_tick > 1e-6) {
3430                                                 ticks = (unsigned long)((ptv.tv_usec * 1e-6) /
3431                                                                         sys_tick);
3432                                                 ptv.tv_usec = (long)(ticks * 1e6 *
3433                                                                     sys_tick);
3434                                         }
3435                                         DPRINTF(4, ("fetch_timestamp: system usec network time stamp: %jd.%06ld\n",
3436                                                     (intmax_t)ptv.tv_sec, (long)ptv.tv_usec));
3437                                         nts = tval_stamp_to_lfp(ptv);
3438                                 }
3439                                 break;
3440 #endif  /* HAVE_TIMESTAMP */
3441                         }
3442                         fuzz = ntp_random() * 2. / FRAC * sys_fuzz;
3443                         DTOLFP(fuzz, &lfpfuzz);
3444                         L_ADD(&nts, &lfpfuzz);
3445 #ifdef DEBUG_TIMING
3446                         dts = ts;
3447                         L_SUB(&dts, &nts);
3448                         collect_timing(rb, "input processing delay", 1,
3449                                        &dts);
3450                         DPRINTF(4, ("fetch_timestamp: timestamp delta: %s (incl. fuzz)\n",
3451                                     lfptoa(&dts, 9)));
3452 #endif  /* DEBUG_TIMING */
3453                         ts = nts;  /* network time stamp */
3454                         break;
3455 #endif  /* HAVE_BINTIME || HAVE_TIMESTAMPNS || HAVE_TIMESTAMP */
3456
3457                 default:
3458                         DPRINTF(4, ("fetch_timestamp: skipping control message 0x%x\n",
3459                                     cmsghdr->cmsg_type));
3460                 }
3461                 cmsghdr = CMSG_NXTHDR(msghdr, cmsghdr);
3462         }
3463         return ts;
3464 }
3465 #endif  /* HAVE_PACKET_TIMESTAMP */
3466
3467
3468 /*
3469  * Routine to read the network NTP packets for a specific interface
3470  * Return the number of bytes read. That way we know if we should
3471  * read it again or go on to the next one if no bytes returned
3472  */
3473 static inline int
3474 read_network_packet(
3475         SOCKET                  fd,
3476         struct interface *      itf,
3477         l_fp                    ts
3478         )
3479 {
3480         GETSOCKNAME_SOCKLEN_TYPE fromlen;
3481         int buflen;
3482         register struct recvbuf *rb;
3483 #ifdef HAVE_PACKET_TIMESTAMP
3484         struct msghdr msghdr;
3485         struct iovec iovec;
3486         char control[CMSG_BUFSIZE];
3487 #endif
3488
3489         /*
3490          * Get a buffer and read the frame.  If we
3491          * haven't got a buffer, or this is received
3492          * on a disallowed socket, just dump the
3493          * packet.
3494          */
3495
3496         rb = get_free_recv_buffer();
3497         if (NULL == rb || itf->ignore_packets) {
3498                 char buf[RX_BUFF_SIZE];
3499                 sockaddr_u from;
3500
3501                 if (rb != NULL)
3502                         freerecvbuf(rb);
3503
3504                 fromlen = sizeof(from);
3505                 buflen = recvfrom(fd, buf, sizeof(buf), 0,
3506                                   &from.sa, &fromlen);
3507                 DPRINTF(4, ("%s on (%lu) fd=%d from %s\n",
3508                         (itf->ignore_packets)
3509                             ? "ignore"
3510                             : "drop",
3511                         free_recvbuffs(), fd, stoa(&from)));
3512                 if (itf->ignore_packets)
3513                         packets_ignored++;
3514                 else
3515                         packets_dropped++;
3516                 return (buflen);
3517         }
3518
3519         fromlen = sizeof(rb->recv_srcadr);
3520
3521 #ifndef HAVE_PACKET_TIMESTAMP
3522         rb->recv_length = recvfrom(fd, (char *)&rb->recv_space,
3523                                    sizeof(rb->recv_space), 0,
3524                                    &rb->recv_srcadr.sa, &fromlen);
3525 #else
3526         iovec.iov_base        = &rb->recv_space;
3527         iovec.iov_len         = sizeof(rb->recv_space);
3528         msghdr.msg_name       = &rb->recv_srcadr;
3529         msghdr.msg_namelen    = fromlen;
3530         msghdr.msg_iov        = &iovec;
3531         msghdr.msg_iovlen     = 1;
3532         msghdr.msg_control    = (void *)&control;
3533         msghdr.msg_controllen = sizeof(control);
3534         msghdr.msg_flags      = 0;
3535         rb->recv_length       = recvmsg(fd, &msghdr, 0);
3536 #endif
3537
3538         buflen = rb->recv_length;
3539
3540         if (buflen == 0 || (buflen == -1 &&
3541             (EWOULDBLOCK == errno
3542 #ifdef EAGAIN
3543              || EAGAIN == errno
3544 #endif
3545              ))) {
3546                 freerecvbuf(rb);
3547                 return (buflen);
3548         } else if (buflen < 0) {
3549                 msyslog(LOG_ERR, "recvfrom(%s) fd=%d: %m",
3550                         stoa(&rb->recv_srcadr), fd);
3551                 DPRINTF(5, ("read_network_packet: fd=%d dropped (bad recvfrom)\n",
3552                             fd));
3553                 freerecvbuf(rb);
3554                 return (buflen);
3555         }
3556
3557         DPRINTF(3, ("read_network_packet: fd=%d length %d from %s\n",
3558                     fd, buflen, stoa(&rb->recv_srcadr)));
3559
3560 #ifdef ENABLE_BUG3020_FIX
3561         if (ISREFCLOCKADR(&rb->recv_srcadr)) {
3562                 msyslog(LOG_ERR, "recvfrom(%s) fd=%d: refclock srcadr on a network interface!",
3563                         stoa(&rb->recv_srcadr), fd);
3564                 DPRINTF(1, ("read_network_packet: fd=%d dropped (refclock srcadr))\n",
3565                             fd));
3566                 packets_dropped++;
3567                 freerecvbuf(rb);
3568                 return (buflen);
3569         }
3570 #endif
3571
3572         /*
3573         ** Bug 2672: Some OSes (MacOSX and Linux) don't block spoofed ::1
3574         */
3575
3576         if (AF_INET6 == itf->family) {
3577                 DPRINTF(2, ("Got an IPv6 packet, from <%s> (%d) to <%s> (%d)\n",
3578                         stoa(&rb->recv_srcadr),
3579                         IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr)),
3580                         stoa(&itf->sin),
3581                         !IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&itf->sin))
3582                         ));
3583
3584                 if (   IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr))
3585                     && !IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&itf->sin))
3586                    ) {
3587                         packets_dropped++;
3588                         DPRINTF(2, ("DROPPING that packet\n"));
3589                         freerecvbuf(rb);
3590                         return buflen;
3591                 }
3592                 DPRINTF(2, ("processing that packet\n"));
3593         }
3594
3595         /*
3596          * Got one.  Mark how and when it got here,
3597          * put it on the full list and do bookkeeping.
3598          */
3599         rb->dstadr = itf;
3600         rb->fd = fd;
3601 #ifdef HAVE_PACKET_TIMESTAMP
3602         /* pick up a network time stamp if possible */
3603         ts = fetch_timestamp(rb, &msghdr, ts);
3604 #endif
3605         rb->recv_time = ts;
3606         rb->receiver = receive;
3607
3608         add_full_recv_buffer(rb);
3609
3610         itf->received++;
3611         packets_received++;
3612         return (buflen);
3613 }
3614
3615 /*
3616  * attempt to handle io (select()/signaled IO)
3617  */
3618 void
3619 io_handler(void)
3620 {
3621 #  ifndef HAVE_SIGNALED_IO
3622         fd_set rdfdes;
3623         int nfound;
3624
3625         /*
3626          * Use select() on all on all input fd's for unlimited
3627          * time.  select() will terminate on SIGALARM or on the
3628          * reception of input.  Using select() means we can't do
3629          * robust signal handling and we get a potential race
3630          * between checking for alarms and doing the select().
3631          * Mostly harmless, I think.
3632          */
3633         /*
3634          * On VMS, I suspect that select() can't be interrupted
3635          * by a "signal" either, so I take the easy way out and
3636          * have select() time out after one second.
3637          * System clock updates really aren't time-critical,
3638          * and - lacking a hardware reference clock - I have
3639          * yet to learn about anything else that is.
3640          */
3641         ++handler_calls;
3642         rdfdes = activefds;
3643 #   if !defined(VMS) && !defined(SYS_VXWORKS)
3644         nfound = select(maxactivefd + 1, &rdfdes, NULL,
3645                         NULL, NULL);
3646 #   else        /* VMS, VxWorks */
3647         /* make select() wake up after one second */
3648         {
3649                 struct timeval t1;
3650                 t1.tv_sec  = 1;
3651                 t1.tv_usec = 0;
3652                 nfound = select(maxactivefd + 1,
3653                                 &rdfdes, NULL, NULL,
3654                                 &t1);
3655         }
3656 #   endif       /* VMS, VxWorks */
3657         if (nfound < 0 && sanitize_fdset(errno)) {
3658                 struct timeval t1;
3659                 t1.tv_sec  = 0;
3660                 t1.tv_usec = 0;
3661                 rdfdes = activefds;
3662                 nfound = select(maxactivefd + 1,
3663                                 &rdfdes, NULL, NULL,
3664                                 &t1);
3665         }
3666
3667         if (nfound > 0) {
3668                 l_fp ts;
3669
3670                 get_systime(&ts);
3671
3672                 input_handler_scan(&ts, &rdfdes);
3673         } else if (nfound == -1 && errno != EINTR) {
3674                 msyslog(LOG_ERR, "select() error: %m");
3675         }
3676 #   ifdef DEBUG
3677         else if (debug > 4) {
3678                 msyslog(LOG_DEBUG, "select(): nfound=%d, error: %m", nfound);
3679         } else {
3680                 DPRINTF(3, ("select() returned %d: %m\n", nfound));
3681         }
3682 #   endif /* DEBUG */
3683 #  else /* HAVE_SIGNALED_IO */
3684         wait_for_signal();
3685 #  endif /* HAVE_SIGNALED_IO */
3686 }
3687
3688 #ifdef HAVE_SIGNALED_IO
3689 /*
3690  * input_handler - receive packets asynchronously
3691  *
3692  * ALWAYS IN SIGNAL HANDLER CONTEXT -- only async-safe functions allowed!
3693  */
3694 static RETSIGTYPE
3695 input_handler(
3696         l_fp *  cts
3697         )
3698 {
3699         int             n;
3700         struct timeval  tvzero;
3701         fd_set          fds;
3702         
3703         ++handler_calls;
3704
3705         /*
3706          * Do a poll to see who has data
3707          */
3708
3709         fds = activefds;
3710         tvzero.tv_sec = tvzero.tv_usec = 0;
3711
3712         n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3713         if (n < 0 && sanitize_fdset(errno)) {
3714                 fds = activefds;
3715                 tvzero.tv_sec = tvzero.tv_usec = 0;
3716                 n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3717         }
3718         if (n > 0)
3719                 input_handler_scan(cts, &fds);
3720 }
3721 #endif /* HAVE_SIGNALED_IO */
3722
3723
3724 /*
3725  * Try to sanitize the global FD set
3726  *
3727  * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3728  */
3729 static int/*BOOL*/
3730 sanitize_fdset(
3731         int     errc
3732         )
3733 {
3734         int j, b, maxscan;
3735
3736 #  ifndef HAVE_SIGNALED_IO
3737         /*
3738          * extended FAU debugging output
3739          */
3740         if (errc != EINTR) {
3741                 msyslog(LOG_ERR,
3742                         "select(%d, %s, 0L, 0L, &0.0) error: %m",
3743                         maxactivefd + 1,
3744                         fdbits(maxactivefd, &activefds));
3745         }
3746 #   endif
3747         
3748         if (errc != EBADF)
3749                 return FALSE;
3750
3751         /* if we have oviously bad FDs, try to sanitize the FD set. */
3752         for (j = 0, maxscan = 0; j <= maxactivefd; j++) {
3753                 if (FD_ISSET(j, &activefds)) {
3754                         if (-1 != read(j, &b, 0)) {
3755                                 maxscan = j;
3756                                 continue;
3757                         }
3758 #                   ifndef HAVE_SIGNALED_IO
3759                         msyslog(LOG_ERR,
3760                                 "Removing bad file descriptor %d from select set",
3761                                 j);
3762 #                   endif
3763                         FD_CLR(j, &activefds);
3764                 }
3765         }
3766         if (maxactivefd != maxscan)
3767                 maxactivefd = maxscan;
3768         return TRUE;
3769 }
3770
3771 /*
3772  * scan the known FDs (clocks, servers, ...) for presence in a 'fd_set'. 
3773  *
3774  * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3775  */
3776 static void
3777 input_handler_scan(
3778         const l_fp *    cts,
3779         const fd_set *  pfds
3780         )
3781 {
3782         int             buflen;
3783         u_int           idx;
3784         int             doing;
3785         SOCKET          fd;
3786         blocking_child *c;
3787         l_fp            ts;     /* Timestamp at BOselect() gob */
3788
3789 #if defined(DEBUG_TIMING)
3790         l_fp            ts_e;   /* Timestamp at EOselect() gob */
3791 #endif
3792         endpt *         ep;
3793 #ifdef REFCLOCK
3794         struct refclockio *rp;
3795         int             saved_errno;
3796         const char *    clk;
3797 #endif
3798 #ifdef HAS_ROUTING_SOCKET
3799         struct asyncio_reader * asyncio_reader;
3800         struct asyncio_reader * next_asyncio_reader;
3801 #endif
3802
3803         ++handler_pkts;
3804         ts = *cts;
3805
3806 #ifdef REFCLOCK
3807         /*
3808          * Check out the reference clocks first, if any
3809          */
3810         
3811         for (rp = refio; rp != NULL; rp = rp->next) {
3812                 fd = rp->fd;
3813                 
3814                 if (!FD_ISSET(fd, pfds))
3815                         continue;
3816                 buflen = read_refclock_packet(fd, rp, ts);
3817                 /*
3818                  * The first read must succeed after select() indicates
3819                  * readability, or we've reached a permanent EOF.
3820                  * http://bugs.ntp.org/1732 reported ntpd munching CPU
3821                  * after a USB GPS was unplugged because select was
3822                  * indicating EOF but ntpd didn't remove the descriptor
3823                  * from the activefds set.
3824                  */
3825                 if (buflen < 0 && EAGAIN != errno) {
3826                         saved_errno = errno;
3827                         clk = refnumtoa(&rp->srcclock->srcadr);
3828                         errno = saved_errno;
3829                         msyslog(LOG_ERR, "%s read: %m", clk);
3830                         maintain_activefds(fd, TRUE);
3831                 } else if (0 == buflen) {
3832                         clk = refnumtoa(&rp->srcclock->srcadr);
3833                         msyslog(LOG_ERR, "%s read EOF", clk);
3834                         maintain_activefds(fd, TRUE);
3835                 } else {
3836                         /* drain any remaining refclock input */
3837                         do {
3838                                 buflen = read_refclock_packet(fd, rp, ts);
3839                         } while (buflen > 0);
3840                 }
3841         }
3842 #endif /* REFCLOCK */
3843
3844         /*
3845          * Loop through the interfaces looking for data to read.
3846          */
3847         for (ep = ep_list; ep != NULL; ep = ep->elink) {
3848                 for (doing = 0; doing < 2; doing++) {
3849                         if (!doing) {
3850                                 fd = ep->fd;
3851                         } else {
3852                                 if (!(ep->flags & INT_BCASTOPEN))
3853                                         break;
3854                                 fd = ep->bfd;
3855                         }
3856                         if (fd < 0)
3857                                 continue;
3858                         if (FD_ISSET(fd, pfds))
3859                                 do {
3860                                         buflen = read_network_packet(
3861                                                         fd, ep, ts);
3862                                 } while (buflen > 0);
3863                         /* Check more interfaces */
3864                 }
3865         }
3866
3867 #ifdef HAS_ROUTING_SOCKET
3868         /*
3869          * scan list of asyncio readers - currently only used for routing sockets
3870          */
3871         asyncio_reader = asyncio_reader_list;
3872
3873         while (asyncio_reader != NULL) {
3874                 /* callback may unlink and free asyncio_reader */
3875                 next_asyncio_reader = asyncio_reader->link;
3876                 if (FD_ISSET(asyncio_reader->fd, pfds))
3877                         (*asyncio_reader->receiver)(asyncio_reader);
3878                 asyncio_reader = next_asyncio_reader;
3879         }
3880 #endif /* HAS_ROUTING_SOCKET */
3881
3882         /*
3883          * Check for a response from a blocking child
3884          */
3885         for (idx = 0; idx < blocking_children_alloc; idx++) {
3886                 c = blocking_children[idx];
3887                 if (NULL == c || -1 == c->resp_read_pipe)
3888                         continue;
3889                 if (FD_ISSET(c->resp_read_pipe, pfds)) {
3890                         ++c->resp_ready_seen;
3891                         ++blocking_child_ready_seen;
3892                 }
3893         }
3894
3895         /* We've done our work */
3896 #if defined(DEBUG_TIMING)
3897         get_systime(&ts_e);
3898         /*
3899          * (ts_e - ts) is the amount of time we spent
3900          * processing this gob of file descriptors.  Log
3901          * it.
3902          */
3903         L_SUB(&ts_e, &ts);
3904         collect_timing(NULL, "input handler", 1, &ts_e);
3905         if (debug > 3)
3906                 msyslog(LOG_DEBUG,
3907                         "input_handler: Processed a gob of fd's in %s msec",
3908                         lfptoms(&ts_e, 6));
3909 #endif /* DEBUG_TIMING */
3910 }
3911 #endif /* !HAVE_IO_COMPLETION_PORT */
3912
3913 /*
3914  * find an interface suitable for the src address
3915  */
3916 endpt *
3917 select_peerinterface(
3918         struct peer *   peer,
3919         sockaddr_u *    srcadr,
3920         endpt *         dstadr
3921         )
3922 {
3923         endpt *ep;
3924 #ifndef SIM
3925         endpt *wild;
3926
3927         wild = ANY_INTERFACE_CHOOSE(srcadr);
3928
3929         /*
3930          * Initialize the peer structure and dance the interface jig.
3931          * Reference clocks step the loopback waltz, the others
3932          * squaredance around the interface list looking for a buddy. If
3933          * the dance peters out, there is always the wildcard interface.
3934          * This might happen in some systems and would preclude proper
3935          * operation with public key cryptography.
3936          */
3937         if (ISREFCLOCKADR(srcadr)) {
3938                 ep = loopback_interface;
3939         } else if (peer->cast_flags &
3940                    (MDF_BCLNT | MDF_ACAST | MDF_MCAST | MDF_BCAST)) {
3941                 ep = findbcastinter(srcadr);
3942                 if (ep != NULL)
3943                         DPRINTF(4, ("Found *-cast interface %s for address %s\n",
3944                                 stoa(&ep->sin), stoa(srcadr)));
3945                 else
3946                         DPRINTF(4, ("No *-cast local address found for address %s\n",
3947                                 stoa(srcadr)));
3948         } else {
3949                 ep = dstadr;
3950                 if (NULL == ep)
3951                         ep = wild;
3952         }
3953         /*
3954          * If it is a multicast address, findbcastinter() may not find
3955          * it.  For unicast, we get to find the interface when dstadr is
3956          * given to us as the wildcard (ANY_INTERFACE_CHOOSE).  Either
3957          * way, try a little harder.
3958          */
3959         if (wild == ep)
3960                 ep = findinterface(srcadr);
3961         /*
3962          * we do not bind to the wildcard interfaces for output
3963          * as our (network) source address would be undefined and
3964          * crypto will not work without knowing the own transmit address
3965          */
3966         if (ep != NULL && INT_WILDCARD & ep->flags)
3967                 if (!accept_wildcard_if_for_winnt)
3968                         ep = NULL;
3969 #else   /* SIM follows */
3970         ep = loopback_interface;
3971 #endif
3972
3973         return ep;
3974 }
3975
3976
3977 /*
3978  * findinterface - find local interface corresponding to address
3979  */
3980 endpt *
3981 findinterface(
3982         sockaddr_u *addr
3983         )
3984 {
3985         endpt *iface;
3986
3987         iface = findlocalinterface(addr, INT_WILDCARD, 0);
3988
3989         if (NULL == iface) {
3990                 DPRINTF(4, ("Found no interface for address %s - returning wildcard\n",
3991                             stoa(addr)));
3992
3993                 iface = ANY_INTERFACE_CHOOSE(addr);
3994         } else
3995                 DPRINTF(4, ("Found interface #%d %s for address %s\n",
3996                             iface->ifnum, iface->name, stoa(addr)));
3997
3998         return iface;
3999 }
4000
4001 /*
4002  * findlocalinterface - find local interface corresponding to addr,
4003  * which does not have any of flags set.  If bast is nonzero, addr is
4004  * a broadcast address.
4005  *
4006  * This code attempts to find the local sending address for an outgoing
4007  * address by connecting a new socket to destinationaddress:NTP_PORT
4008  * and reading the sockname of the resulting connect.
4009  * the complicated sequence simulates the routing table lookup
4010  * for to first hop without duplicating any of the routing logic into
4011  * ntpd. preferably we would have used an API call - but its not there -
4012  * so this is the best we can do here short of duplicating to entire routing
4013  * logic in ntpd which would be a silly and really unportable thing to do.
4014  *
4015  */
4016 static endpt *
4017 findlocalinterface(
4018         sockaddr_u *    addr,
4019         int             flags,
4020         int             bcast
4021         )
4022 {
4023         GETSOCKNAME_SOCKLEN_TYPE        sockaddrlen;
4024         endpt *                         iface;
4025         sockaddr_u                      saddr;
4026         SOCKET                          s;
4027         int                             rtn;
4028         int                             on;
4029
4030         DPRINTF(4, ("Finding interface for addr %s in list of addresses\n",
4031                     stoa(addr)));
4032
4033         /* [Bug 3437] The dummy POOL peer comes in with an AF of
4034          * zero. This is bound to fail, but on the way to nowhere it
4035          * triggers a security incident on SELinux.
4036          *
4037          * Checking the condition and failing early is probably a good
4038          * advice, and even saves us some syscalls in that case.
4039          * Thanks to Miroslav Lichvar for finding this.
4040          */
4041         if (AF_UNSPEC == AF(addr))
4042                 return NULL;
4043
4044         s = socket(AF(addr), SOCK_DGRAM, 0);
4045         if (INVALID_SOCKET == s)
4046                 return NULL;
4047
4048         /*
4049          * If we are looking for broadcast interface we need to set this
4050          * socket to allow broadcast
4051          */
4052         if (bcast) {
4053                 on = 1;
4054                 if (SOCKET_ERROR == setsockopt(s, SOL_SOCKET,
4055                                                 SO_BROADCAST,
4056                                                 (void *)&on,
4057                                                 sizeof(on))) {
4058                         closesocket(s);
4059                         return NULL;
4060                 }
4061         }
4062
4063         rtn = connect(s, &addr->sa, SOCKLEN(addr));
4064         if (SOCKET_ERROR == rtn) {
4065                 closesocket(s);
4066                 return NULL;
4067         }
4068
4069         sockaddrlen = sizeof(saddr);
4070         rtn = getsockname(s, &saddr.sa, &sockaddrlen);
4071         closesocket(s);
4072         if (SOCKET_ERROR == rtn)
4073                 return NULL;
4074
4075         DPRINTF(4, ("findlocalinterface: kernel maps %s to %s\n",
4076                     stoa(addr), stoa(&saddr)));
4077
4078         iface = getinterface(&saddr, flags);
4079
4080         /*
4081          * if we didn't find an exact match on saddr, find the closest
4082          * available local address.  This handles the case of the
4083          * address suggested by the kernel being excluded by nic rules
4084          * or the user's -I and -L options to ntpd.
4085          * See http://bugs.ntp.org/1184 and http://bugs.ntp.org/1683
4086          * for more background.
4087          */
4088         if (NULL == iface || iface->ignore_packets)
4089                 iface = findclosestinterface(&saddr,
4090                                              flags | INT_LOOPBACK);
4091
4092         /* Don't use an interface which will ignore replies */
4093         if (iface != NULL && iface->ignore_packets)
4094                 iface = NULL;
4095
4096         return iface;
4097 }
4098
4099
4100 /*
4101  * findclosestinterface
4102  *
4103  * If there are -I/--interface or -L/novirtualips command-line options,
4104  * or "nic" or "interface" rules in ntp.conf, findlocalinterface() may
4105  * find the kernel's preferred local address for a given peer address is
4106  * administratively unavailable to ntpd, and punt to this routine's more
4107  * expensive search.
4108  *
4109  * Find the numerically closest local address to the one connect()
4110  * suggested.  This matches an address on the same subnet first, as
4111  * needed by Bug 1184, and provides a consistent choice if there are
4112  * multiple feasible local addresses, regardless of the order ntpd
4113  * enumerated them.
4114  */
4115 endpt *
4116 findclosestinterface(
4117         sockaddr_u *    addr,
4118         int             flags
4119         )
4120 {
4121         endpt *         ep;
4122         endpt *         winner;
4123         sockaddr_u      addr_dist;
4124         sockaddr_u      min_dist;
4125
4126         ZERO_SOCK(&min_dist);
4127         winner = NULL;
4128
4129         for (ep = ep_list; ep != NULL; ep = ep->elink) {
4130                 if (ep->ignore_packets ||
4131                     AF(addr) != ep->family ||
4132                     flags & ep->flags)
4133                         continue;
4134
4135                 calc_addr_distance(&addr_dist, addr, &ep->sin);
4136                 if (NULL == winner ||
4137                     -1 == cmp_addr_distance(&addr_dist, &min_dist)) {
4138                         min_dist = addr_dist;
4139                         winner = ep;
4140                 }
4141         }
4142         if (NULL == winner)
4143                 DPRINTF(4, ("findclosestinterface(%s) failed\n",
4144                             stoa(addr)));
4145         else
4146                 DPRINTF(4, ("findclosestinterface(%s) -> %s\n",
4147                             stoa(addr), stoa(&winner->sin)));
4148
4149         return winner;
4150 }
4151
4152
4153 /*
4154  * calc_addr_distance - calculate the distance between two addresses,
4155  *                      the absolute value of the difference between
4156  *                      the addresses numerically, stored as an address.
4157  */
4158 static void
4159 calc_addr_distance(
4160         sockaddr_u *            dist,
4161         const sockaddr_u *      a1,
4162         const sockaddr_u *      a2
4163         )
4164 {
4165         u_int32 a1val;
4166         u_int32 a2val;
4167         u_int32 v4dist;
4168         int     found_greater;
4169         int     a1_greater;
4170         int     i;
4171
4172         REQUIRE(AF(a1) == AF(a2));
4173
4174         ZERO_SOCK(dist);
4175         AF(dist) = AF(a1);
4176
4177         /* v4 can be done a bit simpler */
4178         if (IS_IPV4(a1)) {
4179                 a1val = SRCADR(a1);
4180                 a2val = SRCADR(a2);
4181                 v4dist = (a1val > a2val)
4182                              ? a1val - a2val
4183                              : a2val - a1val;
4184                 SET_ADDR4(dist, v4dist);
4185
4186                 return;
4187         }
4188
4189         found_greater = FALSE;
4190         a1_greater = FALSE;     /* suppress pot. uninit. warning */
4191         for (i = 0; i < (int)sizeof(NSRCADR6(a1)); i++) {
4192                 if (!found_greater &&
4193                     NSRCADR6(a1)[i] != NSRCADR6(a2)[i]) {
4194                         found_greater = TRUE;
4195                         a1_greater = (NSRCADR6(a1)[i] > NSRCADR6(a2)[i]);
4196                 }
4197                 if (!found_greater) {
4198                         NSRCADR6(dist)[i] = 0;
4199                 } else {
4200                         if (a1_greater)
4201                                 NSRCADR6(dist)[i] = NSRCADR6(a1)[i] -
4202                                                     NSRCADR6(a2)[i];
4203                         else
4204                                 NSRCADR6(dist)[i] = NSRCADR6(a2)[i] -
4205                                                     NSRCADR6(a1)[i];
4206                 }
4207         }
4208 }
4209
4210
4211 /*
4212  * cmp_addr_distance - compare two address distances, returning -1, 0,
4213  *                     1 to indicate their relationship.
4214  */
4215 static int
4216 cmp_addr_distance(
4217         const sockaddr_u *      d1,
4218         const sockaddr_u *      d2
4219         )
4220 {
4221         int     i;
4222
4223         REQUIRE(AF(d1) == AF(d2));
4224
4225         if (IS_IPV4(d1)) {
4226                 if (SRCADR(d1) < SRCADR(d2))
4227                         return -1;
4228                 else if (SRCADR(d1) == SRCADR(d2))
4229                         return 0;
4230                 else
4231                         return 1;
4232         }
4233
4234         for (i = 0; i < (int)sizeof(NSRCADR6(d1)); i++) {
4235                 if (NSRCADR6(d1)[i] < NSRCADR6(d2)[i])
4236                         return -1;
4237                 else if (NSRCADR6(d1)[i] > NSRCADR6(d2)[i])
4238                         return 1;
4239         }
4240
4241         return 0;
4242 }
4243
4244
4245
4246 /*
4247  * fetch an interface structure the matches the
4248  * address and has the given flags NOT set
4249  */
4250 endpt *
4251 getinterface(
4252         sockaddr_u *    addr,
4253         u_int32         flags
4254         )
4255 {
4256         endpt *iface;
4257
4258         iface = find_addr_in_list(addr);
4259
4260         if (iface != NULL && (iface->flags & flags))
4261                 iface = NULL;
4262
4263         return iface;
4264 }
4265
4266
4267 /*
4268  * findbcastinter - find broadcast interface corresponding to address
4269  */
4270 endpt *
4271 findbcastinter(
4272         sockaddr_u *addr
4273         )
4274 {
4275         endpt * iface;
4276
4277         iface = NULL;
4278 #if !defined(MPE) && (defined(SIOCGIFCONF) || defined(SYS_WINNT))
4279         DPRINTF(4, ("Finding broadcast/multicast interface for addr %s in list of addresses\n",
4280                     stoa(addr)));
4281
4282         iface = findlocalinterface(addr, INT_LOOPBACK | INT_WILDCARD,
4283                                    1);
4284         if (iface != NULL) {
4285                 DPRINTF(4, ("Easily found bcast-/mcast- interface index #%d %s\n",
4286                             iface->ifnum, iface->name));
4287                 return iface;
4288         }
4289
4290         /*
4291          * plan B - try to find something reasonable in our lists in
4292          * case kernel lookup doesn't help
4293          */
4294         for (iface = ep_list; iface != NULL; iface = iface->elink) {
4295                 if (iface->flags & INT_WILDCARD)
4296                         continue;
4297
4298                 /* Don't bother with ignored interfaces */
4299                 if (iface->ignore_packets)
4300                         continue;
4301
4302                 /*
4303                  * First look if this is the correct family
4304                  */
4305                 if(AF(&iface->sin) != AF(addr))
4306                         continue;
4307
4308                 /* Skip the loopback addresses */
4309                 if (iface->flags & INT_LOOPBACK)
4310                         continue;
4311
4312                 /*
4313                  * If we are looking to match a multicast address and
4314                  * this interface is one...
4315                  */
4316                 if (addr_ismulticast(addr)
4317                     && (iface->flags & INT_MULTICAST)) {
4318 #ifdef INCLUDE_IPV6_SUPPORT
4319                         /*
4320                          * ...it is the winner unless we're looking for
4321                          * an interface to use for link-local multicast
4322                          * and its address is not link-local.
4323                          */
4324                         if (IS_IPV6(addr)
4325                             && IN6_IS_ADDR_MC_LINKLOCAL(PSOCK_ADDR6(addr))
4326                             && !IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&iface->sin)))
4327                                 continue;
4328 #endif
4329                         break;
4330                 }
4331
4332                 /*
4333                  * We match only those interfaces marked as
4334                  * broadcastable and either the explicit broadcast
4335                  * address or the network portion of the IP address.
4336                  * Sloppy.
4337                  */
4338                 if (IS_IPV4(addr)) {
4339                         if (SOCK_EQ(&iface->bcast, addr))
4340                                 break;
4341
4342                         if ((NSRCADR(&iface->sin) & NSRCADR(&iface->mask))
4343                             == (NSRCADR(addr)     & NSRCADR(&iface->mask)))
4344                                 break;
4345                 }
4346 #ifdef INCLUDE_IPV6_SUPPORT
4347                 else if (IS_IPV6(addr)) {
4348                         if (SOCK_EQ(&iface->bcast, addr))
4349                                 break;
4350
4351                         if (SOCK_EQ(netof(&iface->sin), netof(addr)))
4352                                 break;
4353                 }
4354 #endif
4355         }
4356 #endif /* SIOCGIFCONF */
4357         if (NULL == iface) {
4358                 DPRINTF(4, ("No bcast interface found for %s\n",
4359                             stoa(addr)));
4360                 iface = ANY_INTERFACE_CHOOSE(addr);
4361         } else {
4362                 DPRINTF(4, ("Found bcast-/mcast- interface index #%d %s\n",
4363                             iface->ifnum, iface->name));
4364         }
4365
4366         return iface;
4367 }
4368
4369
4370 /*
4371  * io_clr_stats - clear I/O module statistics
4372  */
4373 void
4374 io_clr_stats(void)
4375 {
4376         packets_dropped = 0;
4377         packets_ignored = 0;
4378         packets_received = 0;
4379         packets_sent = 0;
4380         packets_notsent = 0;
4381
4382         handler_calls = 0;
4383         handler_pkts = 0;
4384         io_timereset = current_time;
4385 }
4386
4387
4388 #ifdef REFCLOCK
4389 /*
4390  * io_addclock - add a reference clock to the list and arrange that we
4391  *                               get SIGIO interrupts from it.
4392  */
4393 int
4394 io_addclock(
4395         struct refclockio *rio
4396         )
4397 {
4398         BLOCKIO();
4399
4400         /*
4401          * Stuff the I/O structure in the list and mark the descriptor
4402          * in use.  There is a harmless (I hope) race condition here.
4403          */
4404         rio->active = TRUE;
4405
4406 # ifdef HAVE_SIGNALED_IO
4407         if (init_clock_sig(rio)) {
4408                 UNBLOCKIO();
4409                 return 0;
4410         }
4411 # elif defined(HAVE_IO_COMPLETION_PORT)
4412         if (!io_completion_port_add_clock_io(rio)) {
4413                 UNBLOCKIO();
4414                 return 0;
4415         }
4416 # endif
4417
4418         /*
4419          * enqueue
4420          */
4421         LINK_SLIST(refio, rio, next);
4422
4423         /*
4424          * register fd
4425          */
4426         add_fd_to_list(rio->fd, FD_TYPE_FILE);
4427
4428         UNBLOCKIO();
4429         return 1;
4430 }
4431
4432
4433 /*
4434  * io_closeclock - close the clock in the I/O structure given
4435  */
4436 void
4437 io_closeclock(
4438         struct refclockio *rio
4439         )
4440 {
4441         struct refclockio *unlinked;
4442
4443         BLOCKIO();
4444
4445         /*
4446          * Remove structure from the list
4447          */
4448         rio->active = FALSE;
4449         UNLINK_SLIST(unlinked, refio, rio, next, struct refclockio);
4450         if (NULL != unlinked) {
4451                 /* Close the descriptor. The order of operations is
4452                  * important here in case of async / overlapped IO:
4453                  * only after we have removed the clock from the
4454                  * IO completion port we can be sure no further
4455                  * input is queued. So...
4456                  *  - we first disable feeding to the queu by removing
4457                  *    the clock from the IO engine
4458                  *  - close the file (which brings down any IO on it)
4459                  *  - clear the buffer from results for this fd
4460                  */
4461 #           ifdef HAVE_IO_COMPLETION_PORT
4462                 io_completion_port_remove_clock_io(rio);
4463 #           endif
4464                 close_and_delete_fd_from_list(rio->fd);
4465                 purge_recv_buffers_for_fd(rio->fd);
4466                 rio->fd = -1;
4467         }
4468
4469         UNBLOCKIO();
4470 }
4471 #endif  /* REFCLOCK */
4472
4473
4474 /*
4475  * On NT a SOCKET is an unsigned int so we cannot possibly keep it in
4476  * an array. So we use one of the ISC_LIST functions to hold the
4477  * socket value and use that when we want to enumerate it.
4478  *
4479  * This routine is called by the forked intres child process to close
4480  * all open sockets.  On Windows there's no need as intres runs in
4481  * the same process as a thread.
4482  */
4483 #ifndef SYS_WINNT
4484 void
4485 kill_asyncio(
4486         int     startfd
4487         )
4488 {
4489         BLOCKIO();
4490
4491         /*
4492          * In the child process we do not maintain activefds and
4493          * maxactivefd.  Zeroing maxactivefd disables code which
4494          * maintains it in close_and_delete_fd_from_list().
4495          */
4496         maxactivefd = 0;
4497
4498         while (fd_list != NULL)
4499                 close_and_delete_fd_from_list(fd_list->fd);
4500
4501         UNBLOCKIO();
4502 }
4503 #endif  /* !SYS_WINNT */
4504
4505
4506 /*
4507  * Add and delete functions for the list of open sockets
4508  */
4509 static void
4510 add_fd_to_list(
4511         SOCKET fd,
4512         enum desc_type type
4513         )
4514 {
4515         vsock_t *lsock = emalloc(sizeof(*lsock));
4516
4517         lsock->fd = fd;
4518         lsock->type = type;
4519
4520         LINK_SLIST(fd_list, lsock, link);
4521         maintain_activefds(fd, 0);
4522 }
4523
4524
4525 static void
4526 close_and_delete_fd_from_list(
4527         SOCKET fd
4528         )
4529 {
4530         vsock_t *lsock;
4531
4532         UNLINK_EXPR_SLIST(lsock, fd_list, fd ==
4533             UNLINK_EXPR_SLIST_CURRENT()->fd, link, vsock_t);
4534
4535         if (NULL == lsock)
4536                 return;
4537
4538         switch (lsock->type) {
4539
4540         case FD_TYPE_SOCKET:
4541                 closesocket(lsock->fd);
4542                 break;
4543
4544         case FD_TYPE_FILE:
4545                 closeserial((int)lsock->fd);
4546                 break;
4547
4548         default:
4549                 msyslog(LOG_ERR,
4550                         "internal error - illegal descriptor type %d - EXITING",
4551                         (int)lsock->type);
4552                 exit(1);
4553         }
4554
4555         free(lsock);
4556         /*
4557          * remove from activefds
4558          */
4559         maintain_activefds(fd, 1);
4560 }
4561
4562
4563 static void
4564 add_addr_to_list(
4565         sockaddr_u *    addr,
4566         endpt *         ep
4567         )
4568 {
4569         remaddr_t *laddr;
4570
4571 #ifdef DEBUG
4572         if (find_addr_in_list(addr) == NULL) {
4573 #endif
4574                 /* not there yet - add to list */
4575                 laddr = emalloc(sizeof(*laddr));
4576                 laddr->addr = *addr;
4577                 laddr->ep = ep;
4578
4579                 LINK_SLIST(remoteaddr_list, laddr, link);
4580
4581                 DPRINTF(4, ("Added addr %s to list of addresses\n",
4582                             stoa(addr)));
4583 #ifdef DEBUG
4584         } else
4585                 DPRINTF(4, ("WARNING: Attempt to add duplicate addr %s to address list\n",
4586                             stoa(addr)));
4587 #endif
4588 }
4589
4590
4591 static void
4592 delete_addr_from_list(
4593         sockaddr_u *addr
4594         )
4595 {
4596         remaddr_t *unlinked;
4597
4598         UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, SOCK_EQ(addr,
4599                 &(UNLINK_EXPR_SLIST_CURRENT()->addr)), link, remaddr_t);
4600
4601         if (unlinked != NULL) {
4602                 DPRINTF(4, ("Deleted addr %s from list of addresses\n",
4603                         stoa(addr)));
4604                 free(unlinked);
4605         }
4606 }
4607
4608
4609 static void
4610 delete_interface_from_list(
4611         endpt *iface
4612         )
4613 {
4614         remaddr_t *unlinked;
4615
4616         for (;;) {
4617                 UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, iface ==
4618                     UNLINK_EXPR_SLIST_CURRENT()->ep, link,
4619                     remaddr_t);
4620
4621                 if (unlinked == NULL)
4622                         break;
4623                 DPRINTF(4, ("Deleted addr %s for interface #%d %s from list of addresses\n",
4624                             stoa(&unlinked->addr), iface->ifnum,
4625                             iface->name));
4626                 free(unlinked);
4627         }
4628 }
4629
4630
4631 static struct interface *
4632 find_addr_in_list(
4633         sockaddr_u *addr
4634         )
4635 {
4636         remaddr_t *entry;
4637
4638         DPRINTF(4, ("Searching for addr %s in list of addresses - ",
4639                     stoa(addr)));
4640
4641         for (entry = remoteaddr_list;
4642              entry != NULL;
4643              entry = entry->link)
4644                 if (SOCK_EQ(&entry->addr, addr)) {
4645                         DPRINTF(4, ("FOUND\n"));
4646                         return entry->ep;
4647                 }
4648
4649         DPRINTF(4, ("NOT FOUND\n"));
4650         return NULL;
4651 }
4652
4653
4654 /*
4655  * Find the given address with the all given flags set in the list
4656  */
4657 static endpt *
4658 find_flagged_addr_in_list(
4659         sockaddr_u *    addr,
4660         u_int32         flags
4661         )
4662 {
4663         remaddr_t *entry;
4664
4665         DPRINTF(4, ("Finding addr %s with flags %d in list: ",
4666                     stoa(addr), flags));
4667
4668         for (entry = remoteaddr_list;
4669              entry != NULL;
4670              entry = entry->link)
4671
4672                 if (SOCK_EQ(&entry->addr, addr)
4673                     && (entry->ep->flags & flags) == flags) {
4674
4675                         DPRINTF(4, ("FOUND\n"));
4676                         return entry->ep;
4677                 }
4678
4679         DPRINTF(4, ("NOT FOUND\n"));
4680         return NULL;
4681 }
4682
4683
4684 const char *
4685 localaddrtoa(
4686         endpt *la
4687         )
4688 {
4689         return (NULL == la)
4690                    ? "<null>"
4691                    : stoa(&la->sin);
4692 }
4693
4694
4695 #ifdef HAS_ROUTING_SOCKET
4696 # ifndef UPDATE_GRACE
4697 #  define UPDATE_GRACE  2       /* wait UPDATE_GRACE seconds before scanning */
4698 # endif
4699
4700 static void
4701 process_routing_msgs(struct asyncio_reader *reader)
4702 {
4703         char buffer[5120];
4704         int cnt, msg_type;
4705 #ifdef HAVE_RTNETLINK
4706         struct nlmsghdr *nh;
4707 #else
4708         struct rt_msghdr rtm;
4709         char *p;
4710 #endif
4711
4712         if (disable_dynamic_updates) {
4713                 /*
4714                  * discard ourselves if we are not needed any more
4715                  * usually happens when running unprivileged
4716                  */
4717                 remove_asyncio_reader(reader);
4718                 delete_asyncio_reader(reader);
4719                 return;
4720         }
4721
4722         cnt = read(reader->fd, buffer, sizeof(buffer));
4723
4724         if (cnt < 0) {
4725                 if (errno == ENOBUFS) {
4726                         msyslog(LOG_ERR,
4727                                 "routing socket reports: %m");
4728                 } else {
4729                         msyslog(LOG_ERR,
4730                                 "routing socket reports: %m - disabling");
4731                         remove_asyncio_reader(reader);
4732                         delete_asyncio_reader(reader);
4733                 }
4734                 return;
4735         }
4736
4737         /*
4738          * process routing message
4739          */
4740 #ifdef HAVE_RTNETLINK
4741         for (nh = UA_PTR(struct nlmsghdr, buffer);
4742              NLMSG_OK(nh, cnt);
4743              nh = NLMSG_NEXT(nh, cnt)) {
4744                 msg_type = nh->nlmsg_type;
4745 #else
4746         for (p = buffer;
4747              (p + sizeof(struct rt_msghdr)) <= (buffer + cnt);
4748              p += rtm.rtm_msglen) {
4749                 memcpy(&rtm, p, sizeof(rtm));
4750                 if (rtm.rtm_version != RTM_VERSION) {
4751                         msyslog(LOG_ERR,
4752                                 "version mismatch (got %d - expected %d) on routing socket - disabling",
4753                                 rtm.rtm_version, RTM_VERSION);
4754
4755                         remove_asyncio_reader(reader);
4756                         delete_asyncio_reader(reader);
4757                         return;
4758                 }
4759                 msg_type = rtm.rtm_type;
4760 #endif
4761                 switch (msg_type) {
4762 #ifdef RTM_NEWADDR
4763                 case RTM_NEWADDR:
4764 #endif
4765 #ifdef RTM_DELADDR
4766                 case RTM_DELADDR:
4767 #endif
4768 #ifdef RTM_ADD
4769                 case RTM_ADD:
4770 #endif
4771 #ifdef RTM_DELETE
4772                 case RTM_DELETE:
4773 #endif
4774 #ifdef RTM_REDIRECT
4775                 case RTM_REDIRECT:
4776 #endif
4777 #ifdef RTM_CHANGE
4778                 case RTM_CHANGE:
4779 #endif
4780 #ifdef RTM_LOSING
4781                 case RTM_LOSING:
4782 #endif
4783 #ifdef RTM_IFINFO
4784                 case RTM_IFINFO:
4785 #endif
4786 #ifdef RTM_IFANNOUNCE
4787                 case RTM_IFANNOUNCE:
4788 #endif
4789 #ifdef RTM_NEWLINK
4790                 case RTM_NEWLINK:
4791 #endif
4792 #ifdef RTM_DELLINK
4793                 case RTM_DELLINK:
4794 #endif
4795 #ifdef RTM_NEWROUTE
4796                 case RTM_NEWROUTE:
4797 #endif
4798 #ifdef RTM_DELROUTE
4799                 case RTM_DELROUTE:
4800 #endif
4801                         /*
4802                          * we are keen on new and deleted addresses and
4803                          * if an interface goes up and down or routing
4804                          * changes
4805                          */
4806                         DPRINTF(3, ("routing message op = %d: scheduling interface update\n",
4807                                     msg_type));
4808                         timer_interfacetimeout(current_time + UPDATE_GRACE);
4809                         break;
4810 #ifdef HAVE_RTNETLINK
4811                 case NLMSG_DONE:
4812                         /* end of multipart message */
4813                         return;
4814 #endif
4815                 default:
4816                         /*
4817                          * the rest doesn't bother us.
4818                          */
4819                         DPRINTF(4, ("routing message op = %d: ignored\n",
4820                                     msg_type));
4821                         break;
4822                 }
4823         }
4824 }
4825
4826 /*
4827  * set up routing notifications
4828  */
4829 static void
4830 init_async_notifications()
4831 {
4832         struct asyncio_reader *reader;
4833 #ifdef HAVE_RTNETLINK
4834         int fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
4835         struct sockaddr_nl sa;
4836 #else
4837         int fd = socket(PF_ROUTE, SOCK_RAW, 0);
4838 #endif
4839         if (fd < 0) {
4840                 msyslog(LOG_ERR,
4841                         "unable to open routing socket (%m) - using polled interface update");
4842                 return;
4843         }
4844
4845         fd = move_fd(fd);
4846 #ifdef HAVE_RTNETLINK
4847         ZERO(sa);
4848         sa.nl_family = PF_NETLINK;
4849         sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR
4850                        | RTMGRP_IPV6_IFADDR | RTMGRP_IPV4_ROUTE
4851                        | RTMGRP_IPV4_MROUTE | RTMGRP_IPV6_ROUTE
4852                        | RTMGRP_IPV6_MROUTE;
4853         if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
4854                 msyslog(LOG_ERR,
4855                         "bind failed on routing socket (%m) - using polled interface update");
4856                 return;
4857         }
4858 #endif
4859         make_socket_nonblocking(fd);
4860 #if defined(HAVE_SIGNALED_IO)
4861         init_socket_sig(fd);
4862 #endif /* HAVE_SIGNALED_IO */
4863
4864         reader = new_asyncio_reader();
4865
4866         reader->fd = fd;
4867         reader->receiver = process_routing_msgs;
4868
4869         add_asyncio_reader(reader, FD_TYPE_SOCKET);
4870         msyslog(LOG_INFO,
4871                 "Listening on routing socket on fd #%d for interface updates",
4872                 fd);
4873 }
4874 #else
4875 /* HAS_ROUTING_SOCKET not defined */
4876 static void
4877 init_async_notifications(void)
4878 {
4879 }
4880 #endif
4881