2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1985, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 static char sccsid[] = "@(#)master.c 8.1 (Berkeley) 6/6/93";
36 static const char rcsid[] =
42 #include <sys/types.h>
43 #include <sys/times.h>
46 #include "pathnames.h"
48 extern int measure_delta;
49 extern jmp_buf jmpenv;
54 static int slvcount; /* slaves listening to our clock */
56 static void mchgdate(struct tsp *);
59 * The main function of `master' is to periodically compute the differences
60 * (deltas) between its clock and the clocks of the slaves, to compute the
61 * network average delta, and to send to the slaves the differences between
62 * their individual deltas and the network delta.
63 * While waiting, it receives messages from the slaves (i.e. requests for
64 * master's name, remote requests to set the network time, ...), and
65 * takes the appropriate action.
74 struct timeval wait, ntime;
76 struct tsp *msg, *answer, to;
78 struct sockaddr_in taddr;
79 char tname[MAXHOSTNAMELEN];
83 syslog(LOG_NOTICE, "This machine is master");
85 fprintf(fd, "This machine is master\n");
86 for (ntp = nettab; ntp != NULL; ntp = ntp->next) {
87 if (ntp->status == MASTER)
90 (void)gettimeofday(&ntime, NULL);
91 pollingtime = ntime.tv_sec+3;
97 /* Process all outstanding messages before spending the long time necessary
98 * to update all timers.
101 (void)gettimeofday(&ntime, NULL);
102 wait.tv_sec = pollingtime - ntime.tv_sec;
106 msg = readmsg(TSP_ANY, ANYADDR, &wait, 0);
108 (void)gettimeofday(&ntime, NULL);
109 if (ntime.tv_sec >= pollingtime) {
110 pollingtime = ntime.tv_sec + SAMPLEINTVL;
113 /* If a bogus master told us to quit, we can have decided to ignore a
114 * network. Therefore, periodically try to take over everything.
116 polls = (polls + 1) % POLLRATE;
117 if (0 == polls && nignorednets > 0) {
118 trace_msg("Looking for nets to re-master\n");
119 for (ntp = nettab; ntp; ntp = ntp->next) {
120 if (ntp->status == IGNORE
121 || ntp->status == NOMASTER) {
123 if (ntp->status == MASTER) {
128 if (ntp->status == MASTER
129 && --ntp->quit_count < 0)
138 for (ntp = nettab; ntp != NULL; ntp = ntp->next) {
139 to.tsp_type = TSP_LOOP;
140 to.tsp_vers = TSPVERSION;
141 to.tsp_seq = sequence++;
142 to.tsp_hopcnt = MAX_HOPCNT;
143 (void)strcpy(to.tsp_name, hostname);
145 if (sendto(sock, (char *)&to,
146 sizeof(struct tsp), 0,
147 (struct sockaddr*)&ntp->dest_addr,
148 sizeof(ntp->dest_addr)) < 0) {
149 trace_sendto_err(ntp->dest_addr.sin_addr);
156 switch (msg->tsp_type) {
167 * XXX check to see it is from ourself
169 tsp_time_sec = msg->tsp_time.tv_sec;
170 (void)strlcpy(newdate, ctime(&tsp_time_sec),
172 if (!good_host_name(msg->tsp_name)) {
174 "attempted date change by %s to %s",
175 msg->tsp_name, newdate);
181 (void)gettimeofday(&ntime, NULL);
182 pollingtime = ntime.tv_sec + SAMPLEINTVL;
186 if (!fromnet || fromnet->status != MASTER)
188 tsp_time_sec = msg->tsp_time.tv_sec;
189 (void)strlcpy(newdate, ctime(&tsp_time_sec),
191 htp = findhost(msg->tsp_name);
194 "attempted SET DATEREQ by uncontrolled %s to %s",
195 msg->tsp_name, newdate);
198 if (htp->seq == msg->tsp_seq)
200 htp->seq = msg->tsp_seq;
203 "attempted SET DATEREQ by untrusted %s to %s",
204 msg->tsp_name, newdate);
210 (void)gettimeofday(&ntime, NULL);
211 pollingtime = ntime.tv_sec + SAMPLEINTVL;
215 xmit(TSP_ACK, msg->tsp_seq, &from);
226 traceoff("Tracing ended at %s\n");
232 if (fromnet->status == MASTER) {
234 (void)addmach(msg->tsp_name, &from,fromnet);
237 (void)strcpy(tname, msg->tsp_name);
238 to.tsp_type = TSP_QUIT;
239 (void)strcpy(to.tsp_name, hostname);
240 answer = acksend(&to, &taddr, tname,
242 if (answer == NULL) {
243 syslog(LOG_ERR, "election error by %s",
250 * After a network partition, there can be
251 * more than one master: the first slave to
252 * come up will notify here the situation.
254 if (!fromnet || fromnet->status != MASTER)
256 (void)strcpy(to.tsp_name, hostname);
258 /* The other master often gets into the same state,
259 * with boring results if we stay at it forever.
261 ntp = fromnet; /* (acksend() can leave fromnet=0 */
262 for (i = 0; i < 3; i++) {
263 to.tsp_type = TSP_RESOLVE;
264 (void)strcpy(to.tsp_name, hostname);
265 answer = acksend(&to, &ntp->dest_addr,
266 ANYADDR, TSP_MASTERACK,
270 htp = addmach(answer->tsp_name,&from,ntp);
271 to.tsp_type = TSP_QUIT;
272 msg = acksend(&to, &htp->addr, htp->name,
273 TSP_ACK, 0, htp->noanswer);
276 "no response from %s to CONFLICT-QUIT",
285 if (!fromnet || fromnet->status != MASTER)
288 * do not want to call synch() while waiting
291 (void)gettimeofday(&ntime, NULL);
292 pollingtime = ntime.tv_sec + SAMPLEINTVL;
296 doquit(msg); /* become a slave */
300 if (!fromnet || fromnet->status != MASTER
301 || !strcmp(msg->tsp_name, hostname))
304 * We should not have received this from a net
305 * we are master on. There must be two masters.
307 htp = addmach(msg->tsp_name, &from,fromnet);
308 to.tsp_type = TSP_QUIT;
309 (void)strcpy(to.tsp_name, hostname);
310 answer = acksend(&to, &htp->addr, htp->name,
314 "loop breakage: no reply from %s=%s to QUIT",
315 htp->name, inet_ntoa(htp->addr.sin_addr));
322 "\tnets = %d, masters = %d, slaves = %d, ignored = %d\n",
323 nnets, nmasternets, nslavenets, nignorednets);
332 fprintf(fd, "garbage message: ");
343 * change the system date on the master
346 mchgdate(struct tsp *msg)
348 char tname[MAXHOSTNAMELEN];
350 struct timeval otime, ntime, tmptv;
353 (void)strcpy(tname, msg->tsp_name);
355 xmit(TSP_DATEACK, msg->tsp_seq, &from);
357 (void)strlcpy(olddate, date(), sizeof(olddate));
359 /* adjust time for residence on the queue */
360 (void)gettimeofday(&otime, NULL);
361 adj_msg_time(msg,&otime);
363 tmptv.tv_sec = msg->tsp_time.tv_sec;
364 tmptv.tv_usec = msg->tsp_time.tv_usec;
365 timevalsub(&ntime, &tmptv, &otime);
366 if (ntime.tv_sec < MAXADJ && ntime.tv_sec > -MAXADJ) {
368 * do not change the clock if we can adjust it
371 synch(tvtomsround(ntime));
373 utx.ut_type = OLD_TIME;
374 (void)gettimeofday(&utx.ut_tv, NULL);
376 (void)settimeofday(&tmptv, 0);
377 utx.ut_type = NEW_TIME;
378 (void)gettimeofday(&utx.ut_tv, NULL);
383 syslog(LOG_NOTICE, "date changed by %s from %s",
389 * synchronize all of the slaves
396 struct timeval check, stop, wait;
400 fprintf(fd, "measurements starting at %s\n", date());
401 (void)gettimeofday(&check, NULL);
402 for (htp = self.l_fwd; htp != &self; htp = htp->l_fwd) {
403 if (htp->noanswer != 0) {
404 measure_status = measure(500, 100,
408 measure_status = measure(3000, 100,
412 if (measure_status != GOOD) {
413 /* The slave did not respond. We have
414 * just wasted lots of time on it.
416 htp->delta = HOSTDOWN;
417 if (++htp->noanswer >= LOSTHOST) {
420 "purging %s for not answering ICMP\n",
427 htp->delta = measure_delta;
429 (void)gettimeofday(&stop, NULL);
430 timevalsub(&stop, &stop, &check);
431 if (stop.tv_sec >= 1) {
435 * ack messages periodically
439 if (0 != readmsg(TSP_TRACEON,ANYADDR,
442 (void)gettimeofday(&check, NULL);
446 fprintf(fd, "measurements finished at %s\n", date());
448 if (!(status & SLAVE)) {
450 mydelta = networkdelta();
455 if (trace && (mydelta != 0 || (status & SLAVE)))
456 fprintf(fd,"local correction of %ld ms.\n", mydelta);
461 * sends the time to each slave after the master
462 * has received the command to set the network time
470 struct timeval tmptv;
472 /* Do not listen to the consensus after forcing the time. This is because
473 * the consensus takes a while to reach the time we are dictating.
476 for (htp = self.l_fwd; htp != &self; htp = htp->l_fwd) {
477 to.tsp_type = TSP_SETTIME;
478 (void)strcpy(to.tsp_name, hostname);
479 (void)gettimeofday(&tmptv, NULL);
480 to.tsp_time.tv_sec = tmptv.tv_sec;
481 to.tsp_time.tv_usec = tmptv.tv_usec;
482 answer = acksend(&to, &htp->addr, htp->name,
483 TSP_ACK, 0, htp->noanswer);
484 if (answer == NULL) {
485 /* We client does not respond, then we have
486 * just wasted lots of time on it.
489 "no reply to SETTIME from %s", htp->name);
490 if (++htp->noanswer >= LOSTHOST) {
493 "purging %s for not answering",
506 static time_t next_time;
513 if (!fd) /* quit if tracing already off */
516 this_time = times(&tm);
517 if ((time_t)(this_time + delta) < next_time)
519 next_time = this_time + CLK_TCK;
521 fprintf(fd, "host table: %d entries at %s\n", slvcount, date());
524 for (i = 1; i <= slvcount; i++, htp = htp->l_fwd) {
525 l = strlen(htp->name) + 1;
526 if (length+l >= 80) {
531 fprintf(fd, " %s", htp->name);
537 static struct hosttbl *newhost_hash;
538 static struct hosttbl *lasthfree = &hosttbl[0];
541 struct hosttbl * /* answer or 0 */
549 for (p = name, i = 0; i < 8 && *p != '\0'; i++, p++)
551 newhost_hash = &hosttbl[j % NHOSTS];
554 if (htp->name[0] == '\0')
557 if (!strcmp(name, htp->name))
560 } while (htp != newhost_hash);
565 * add a host to the list of controlled machines if not already there
568 addmach(char *name, struct sockaddr_in *addr, struct netinfo *ntp)
570 struct hosttbl *ret, *p, *b, *f;
572 ret = findhost(name);
574 if (slvcount >= NHOSTS) {
576 fprintf(fd, "no more slots in host table\n");
579 syslog(LOG_ERR, "no more slots in host table");
581 longjmp(jmpenv, 2); /* give up and be a slave */
584 /* if our home hash slot is occupied, find a free entry
587 if (newhost_hash->name[0] != '\0') {
590 if (++lasthfree > &hosttbl[NHOSTS])
591 lasthfree = &hosttbl[1];
592 } while (ret->name[0] != '\0');
594 if (!newhost_hash->head) {
595 /* Move an interloper using our home. Use
596 * scratch pointers in case the new head is
597 * pointing to itself.
599 f = newhost_hash->h_fwd;
600 b = newhost_hash->h_bak;
603 f = newhost_hash->l_fwd;
604 b = newhost_hash->l_bak;
607 bcopy(newhost_hash,ret,sizeof(*ret));
613 /* link to an existing chain in our home
616 p = newhost_hash->h_bak;
617 ret->h_fwd = newhost_hash;
620 newhost_hash->h_bak = ret;
630 (void)strlcpy(ret->name, name, sizeof(ret->name));
631 ret->good = good_host_name(name);
633 ret->l_bak = self.l_bak;
634 self.l_bak->l_fwd = ret;
642 ret->noanswer = (ret->noanswer != 0);
645 /* need to clear sequence number anyhow */
651 * remove the machine with the given index in the host table.
654 remmach(struct hosttbl *htp)
656 struct hosttbl *lprv, *hnxt, *f, *b;
659 fprintf(fd, "remove %s\n", htp->name);
661 /* get out of the lists */
662 htp->l_fwd->l_bak = lprv = htp->l_bak;
663 htp->l_bak->l_fwd = htp->l_fwd;
664 htp->h_fwd->h_bak = htp->h_bak;
665 htp->h_bak->h_fwd = hnxt = htp->h_fwd;
667 /* If we are in the home slot, pull up the chain */
668 if (htp->head && hnxt != htp) {
672 /* Use scratch pointers in case the new head is pointing to
684 bcopy(hnxt, htp, sizeof(*htp));
690 lasthfree->name[0] = '\0';
691 lasthfree->h_fwd = NULL;
692 lasthfree->l_fwd = NULL;
700 * Remove all the machines from the host table that exist on the given
701 * network. This is called when a master transitions to a slave on a
705 rmnetmachs(struct netinfo *ntp)
711 for (htp = self.l_fwd; htp != &self; htp = htp->l_fwd) {
720 masterup(struct netinfo *net)
722 xmit(TSP_MASTERUP, 0, &net->dest_addr);
725 * Do not tell new slaves our time for a while. This ensures
726 * we do not tell them to start using our time, before we have
727 * found a good master.
729 (void)gettimeofday(&net->slvwait, NULL);
733 newslave(struct tsp *msg)
736 struct tsp *answer, to;
737 struct timeval now, tmptv;
739 if (!fromnet || fromnet->status != MASTER)
742 htp = addmach(msg->tsp_name, &from,fromnet);
743 htp->seq = msg->tsp_seq;
748 * If we are stable, send our time to the slave.
749 * Do not go crazy if the date has been changed.
751 (void)gettimeofday(&now, NULL);
752 if (now.tv_sec >= fromnet->slvwait.tv_sec+3
753 || now.tv_sec < fromnet->slvwait.tv_sec) {
754 to.tsp_type = TSP_SETTIME;
755 (void)strcpy(to.tsp_name, hostname);
756 (void)gettimeofday(&tmptv, NULL);
757 to.tsp_time.tv_sec = tmptv.tv_sec;
758 to.tsp_time.tv_usec = tmptv.tv_usec;
759 answer = acksend(&to, &htp->addr,
766 "no reply to initial SETTIME from %s",
768 htp->noanswer = LOSTHOST;
775 * react to a TSP_QUIT:
778 doquit(struct tsp *msg)
780 if (fromnet->status == MASTER) {
781 if (!good_host_name(msg->tsp_name)) {
782 if (fromnet->quit_count <= 0) {
783 syslog(LOG_NOTICE,"untrusted %s told us QUIT",
785 suppress(&from, msg->tsp_name, fromnet);
786 fromnet->quit_count = 1;
789 syslog(LOG_NOTICE, "untrusted %s told us QUIT twice",
791 fromnet->quit_count = 2;
792 fromnet->status = NOMASTER;
794 fromnet->status = SLAVE;
797 longjmp(jmpenv, 2); /* give up and be a slave */
800 if (!good_host_name(msg->tsp_name)) {
801 syslog(LOG_NOTICE, "untrusted %s told us QUIT",
803 fromnet->quit_count = 2;
812 fd = fopen(_PATH_TIMEDLOG, "w");
817 fprintf(fd,"Tracing started at %s\n", date());
833 fprintf(fd, msg, date());