]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sbin/hastd/hastd.c
MFC r208028,r210368,r210702,r210869,r210870,r210872,r210873,r210875,r210876,
[FreeBSD/stable/8.git] / sbin / hastd / hastd.c
1 /*-
2  * Copyright (c) 2009-2010 The FreeBSD Foundation
3  * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
4  * All rights reserved.
5  *
6  * This software was developed by Pawel Jakub Dawidek under sponsorship from
7  * the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include <sys/param.h>
35 #include <sys/linker.h>
36 #include <sys/module.h>
37 #include <sys/wait.h>
38
39 #include <assert.h>
40 #include <err.h>
41 #include <errno.h>
42 #include <libutil.h>
43 #include <signal.h>
44 #include <stdbool.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <sysexits.h>
49 #include <unistd.h>
50
51 #include <activemap.h>
52 #include <pjdlog.h>
53
54 #include "control.h"
55 #include "event.h"
56 #include "hast.h"
57 #include "hast_proto.h"
58 #include "hastd.h"
59 #include "hooks.h"
60 #include "subr.h"
61
62 /* Path to configuration file. */
63 const char *cfgpath = HAST_CONFIG;
64 /* Hastd configuration. */
65 static struct hastd_config *cfg;
66 /* Was SIGINT or SIGTERM signal received? */
67 bool sigexit_received = false;
68 /* PID file handle. */
69 struct pidfh *pfh;
70
71 /* How often check for hooks running for too long. */
72 #define REPORT_INTERVAL 10
73
74 static void
75 usage(void)
76 {
77
78         errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
79 }
80
81 static void
82 g_gate_load(void)
83 {
84
85         if (modfind("g_gate") == -1) {
86                 /* Not present in kernel, try loading it. */
87                 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
88                         if (errno != EEXIST) {
89                                 pjdlog_exit(EX_OSERR,
90                                     "Unable to load geom_gate module");
91                         }
92                 }
93         }
94 }
95
96 static void
97 child_exit_log(unsigned int pid, int status)
98 {
99
100         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
101                 pjdlog_debug(1, "Worker process exited gracefully (pid=%u).",
102                     pid);
103         } else if (WIFSIGNALED(status)) {
104                 pjdlog_error("Worker process killed (pid=%u, signal=%d).",
105                     pid, WTERMSIG(status));
106         } else {
107                 pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).",
108                     pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1);
109         }
110 }
111
112 static void
113 child_exit(void)
114 {
115         struct hast_resource *res;
116         int status;
117         pid_t pid;
118
119         while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
120                 /* Find resource related to the process that just exited. */
121                 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
122                         if (pid == res->hr_workerpid)
123                                 break;
124                 }
125                 if (res == NULL) {
126                         /*
127                          * This can happen when new connection arrives and we
128                          * cancel child responsible for the old one or if this
129                          * was hook which we executed.
130                          */
131                         hook_check_one(pid, status);
132                         continue;
133                 }
134                 pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
135                     role2str(res->hr_role));
136                 child_exit_log(pid, status);
137                 child_cleanup(res);
138                 if (res->hr_role == HAST_ROLE_PRIMARY) {
139                         /*
140                          * Restart child process if it was killed by signal
141                          * or exited because of temporary problem.
142                          */
143                         if (WIFSIGNALED(status) ||
144                             (WIFEXITED(status) &&
145                              WEXITSTATUS(status) == EX_TEMPFAIL)) {
146                                 sleep(1);
147                                 pjdlog_info("Restarting worker process.");
148                                 hastd_primary(res);
149                         } else {
150                                 res->hr_role = HAST_ROLE_INIT;
151                                 pjdlog_info("Changing resource role back to %s.",
152                                     role2str(res->hr_role));
153                         }
154                 }
155                 pjdlog_prefix_set("%s", "");
156         }
157 }
158
159 static bool
160 resource_needs_restart(const struct hast_resource *res0,
161     const struct hast_resource *res1)
162 {
163
164         assert(strcmp(res0->hr_name, res1->hr_name) == 0);
165
166         if (strcmp(res0->hr_provname, res1->hr_provname) != 0)
167                 return (true);
168         if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0)
169                 return (true);
170         if (res0->hr_role == HAST_ROLE_INIT ||
171             res0->hr_role == HAST_ROLE_SECONDARY) {
172                 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
173                         return (true);
174                 if (res0->hr_replication != res1->hr_replication)
175                         return (true);
176                 if (res0->hr_timeout != res1->hr_timeout)
177                         return (true);
178                 if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
179                         return (true);
180         }
181         return (false);
182 }
183
184 static bool
185 resource_needs_reload(const struct hast_resource *res0,
186     const struct hast_resource *res1)
187 {
188
189         assert(strcmp(res0->hr_name, res1->hr_name) == 0);
190         assert(strcmp(res0->hr_provname, res1->hr_provname) == 0);
191         assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0);
192
193         if (res0->hr_role != HAST_ROLE_PRIMARY)
194                 return (false);
195
196         if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
197                 return (true);
198         if (res0->hr_replication != res1->hr_replication)
199                 return (true);
200         if (res0->hr_timeout != res1->hr_timeout)
201                 return (true);
202         if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
203                 return (true);
204         return (false);
205 }
206
207 static void
208 hastd_reload(void)
209 {
210         struct hastd_config *newcfg;
211         struct hast_resource *nres, *cres, *tres;
212         uint8_t role;
213
214         pjdlog_info("Reloading configuration...");
215
216         newcfg = yy_config_parse(cfgpath, false);
217         if (newcfg == NULL)
218                 goto failed;
219
220         /*
221          * Check if control address has changed.
222          */
223         if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) {
224                 if (proto_server(newcfg->hc_controladdr,
225                     &newcfg->hc_controlconn) < 0) {
226                         pjdlog_errno(LOG_ERR,
227                             "Unable to listen on control address %s",
228                             newcfg->hc_controladdr);
229                         goto failed;
230                 }
231         }
232         /*
233          * Check if listen address has changed.
234          */
235         if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) {
236                 if (proto_server(newcfg->hc_listenaddr,
237                     &newcfg->hc_listenconn) < 0) {
238                         pjdlog_errno(LOG_ERR, "Unable to listen on address %s",
239                             newcfg->hc_listenaddr);
240                         goto failed;
241                 }
242         }
243         /*
244          * Only when both control and listen sockets are successfully
245          * initialized switch them to new configuration.
246          */
247         if (newcfg->hc_controlconn != NULL) {
248                 pjdlog_info("Control socket changed from %s to %s.",
249                     cfg->hc_controladdr, newcfg->hc_controladdr);
250                 proto_close(cfg->hc_controlconn);
251                 cfg->hc_controlconn = newcfg->hc_controlconn;
252                 newcfg->hc_controlconn = NULL;
253                 strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr,
254                     sizeof(cfg->hc_controladdr));
255         }
256         if (newcfg->hc_listenconn != NULL) {
257                 pjdlog_info("Listen socket changed from %s to %s.",
258                     cfg->hc_listenaddr, newcfg->hc_listenaddr);
259                 proto_close(cfg->hc_listenconn);
260                 cfg->hc_listenconn = newcfg->hc_listenconn;
261                 newcfg->hc_listenconn = NULL;
262                 strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr,
263                     sizeof(cfg->hc_listenaddr));
264         }
265
266         /*
267          * Stop and remove resources that were removed from the configuration.
268          */
269         TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) {
270                 TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) {
271                         if (strcmp(cres->hr_name, nres->hr_name) == 0)
272                                 break;
273                 }
274                 if (nres == NULL) {
275                         control_set_role(cres, HAST_ROLE_INIT);
276                         TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
277                         pjdlog_info("Resource %s removed.", cres->hr_name);
278                         free(cres);
279                 }
280         }
281         /*
282          * Move new resources to the current configuration.
283          */
284         TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
285                 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
286                         if (strcmp(cres->hr_name, nres->hr_name) == 0)
287                                 break;
288                 }
289                 if (cres == NULL) {
290                         TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
291                         TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
292                         pjdlog_info("Resource %s added.", nres->hr_name);
293                 }
294         }
295         /*
296          * Deal with modified resources.
297          * Depending on what has changed exactly we might want to perform
298          * different actions.
299          *
300          * We do full resource restart in the following situations:
301          * Resource role is INIT or SECONDARY.
302          * Resource role is PRIMARY and path to local component or provider
303          * name has changed.
304          * In case of PRIMARY, the worker process will be killed and restarted,
305          * which also means removing /dev/hast/<name> provider and
306          * recreating it.
307          *
308          * We do just reload (send SIGHUP to worker process) if we act as
309          * PRIMARY, but only remote address, replication mode and timeout
310          * has changed. For those, there is no need to restart worker process.
311          * If PRIMARY receives SIGHUP, it will reconnect if remote address or
312          * replication mode has changed or simply set new timeout if only
313          * timeout has changed.
314          */
315         TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
316                 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
317                         if (strcmp(cres->hr_name, nres->hr_name) == 0)
318                                 break;
319                 }
320                 assert(cres != NULL);
321                 if (resource_needs_restart(cres, nres)) {
322                         pjdlog_info("Resource %s configuration was modified, restarting it.",
323                             cres->hr_name);
324                         role = cres->hr_role;
325                         control_set_role(cres, HAST_ROLE_INIT);
326                         TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
327                         free(cres);
328                         TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
329                         TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
330                         control_set_role(nres, role);
331                 } else if (resource_needs_reload(cres, nres)) {
332                         pjdlog_info("Resource %s configuration was modified, reloading it.",
333                             cres->hr_name);
334                         strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr,
335                             sizeof(cres->hr_remoteaddr));
336                         cres->hr_replication = nres->hr_replication;
337                         cres->hr_timeout = nres->hr_timeout;
338                         if (cres->hr_workerpid != 0) {
339                                 if (kill(cres->hr_workerpid, SIGHUP) < 0) {
340                                         pjdlog_errno(LOG_WARNING,
341                                             "Unable to send SIGHUP to worker process %u",
342                                             (unsigned int)cres->hr_workerpid);
343                                 }
344                         }
345                 }
346         }
347
348         yy_config_free(newcfg);
349         pjdlog_info("Configuration reloaded successfully.");
350         return;
351 failed:
352         if (newcfg != NULL) {
353                 if (newcfg->hc_controlconn != NULL)
354                         proto_close(newcfg->hc_controlconn);
355                 if (newcfg->hc_listenconn != NULL)
356                         proto_close(newcfg->hc_listenconn);
357                 yy_config_free(newcfg);
358         }
359         pjdlog_warning("Configuration not reloaded.");
360 }
361
362 static void
363 terminate_workers(void)
364 {
365         struct hast_resource *res;
366
367         pjdlog_info("Termination signal received, exiting.");
368         TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
369                 if (res->hr_workerpid == 0)
370                         continue;
371                 pjdlog_info("Terminating worker process (resource=%s, role=%s, pid=%u).",
372                     res->hr_name, role2str(res->hr_role), res->hr_workerpid);
373                 if (kill(res->hr_workerpid, SIGTERM) == 0)
374                         continue;
375                 pjdlog_errno(LOG_WARNING,
376                     "Unable to send signal to worker process (resource=%s, role=%s, pid=%u).",
377                     res->hr_name, role2str(res->hr_role), res->hr_workerpid);
378         }
379 }
380
381 static void
382 listen_accept(void)
383 {
384         struct hast_resource *res;
385         struct proto_conn *conn;
386         struct nv *nvin, *nvout, *nverr;
387         const char *resname;
388         const unsigned char *token;
389         char laddr[256], raddr[256];
390         size_t size;
391         pid_t pid;
392         int status;
393
394         proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr));
395         pjdlog_debug(1, "Accepting connection to %s.", laddr);
396
397         if (proto_accept(cfg->hc_listenconn, &conn) < 0) {
398                 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
399                 return;
400         }
401
402         proto_local_address(conn, laddr, sizeof(laddr));
403         proto_remote_address(conn, raddr, sizeof(raddr));
404         pjdlog_info("Connection from %s to %s.", raddr, laddr);
405
406         /* Error in setting timeout is not critical, but why should it fail? */
407         if (proto_timeout(conn, HAST_TIMEOUT) < 0)
408                 pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
409
410         nvin = nvout = nverr = NULL;
411
412         /*
413          * Before receiving any data see if remote host have access to any
414          * resource.
415          */
416         TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
417                 if (proto_address_match(conn, res->hr_remoteaddr))
418                         break;
419         }
420         if (res == NULL) {
421                 pjdlog_error("Client %s isn't known.", raddr);
422                 goto close;
423         }
424         /* Ok, remote host can access at least one resource. */
425
426         if (hast_proto_recv_hdr(conn, &nvin) < 0) {
427                 pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
428                     raddr);
429                 goto close;
430         }
431
432         resname = nv_get_string(nvin, "resource");
433         if (resname == NULL) {
434                 pjdlog_error("No 'resource' field in the header received from %s.",
435                     raddr);
436                 goto close;
437         }
438         pjdlog_debug(2, "%s: resource=%s", raddr, resname);
439         token = nv_get_uint8_array(nvin, &size, "token");
440         /*
441          * NULL token means that this is first conection.
442          */
443         if (token != NULL && size != sizeof(res->hr_token)) {
444                 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
445                     raddr, sizeof(res->hr_token), size);
446                 goto close;
447         }
448
449         /*
450          * From now on we want to send errors to the remote node.
451          */
452         nverr = nv_alloc();
453
454         /* Find resource related to this connection. */
455         TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
456                 if (strcmp(resname, res->hr_name) == 0)
457                         break;
458         }
459         /* Have we found the resource? */
460         if (res == NULL) {
461                 pjdlog_error("No resource '%s' as requested by %s.",
462                     resname, raddr);
463                 nv_add_stringf(nverr, "errmsg", "Resource not configured.");
464                 goto fail;
465         }
466
467         /* Now that we know resource name setup log prefix. */
468         pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
469
470         /* Does the remote host have access to this resource? */
471         if (!proto_address_match(conn, res->hr_remoteaddr)) {
472                 pjdlog_error("Client %s has no access to the resource.", raddr);
473                 nv_add_stringf(nverr, "errmsg", "No access to the resource.");
474                 goto fail;
475         }
476         /* Is the resource marked as secondary? */
477         if (res->hr_role != HAST_ROLE_SECONDARY) {
478                 pjdlog_error("We act as %s for the resource and not as %s as requested by %s.",
479                     role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
480                     raddr);
481                 nv_add_stringf(nverr, "errmsg",
482                     "Remote node acts as %s for the resource and not as %s.",
483                     role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
484                 goto fail;
485         }
486         /* Does token (if exists) match? */
487         if (token != NULL && memcmp(token, res->hr_token,
488             sizeof(res->hr_token)) != 0) {
489                 pjdlog_error("Token received from %s doesn't match.", raddr);
490                 nv_add_stringf(nverr, "errmsg", "Token doesn't match.");
491                 goto fail;
492         }
493         /*
494          * If there is no token, but we have half-open connection
495          * (only remotein) or full connection (worker process is running)
496          * we have to cancel those and accept the new connection.
497          */
498         if (token == NULL) {
499                 assert(res->hr_remoteout == NULL);
500                 pjdlog_debug(1, "Initial connection from %s.", raddr);
501                 if (res->hr_workerpid != 0) {
502                         assert(res->hr_remotein == NULL);
503                         pjdlog_debug(1,
504                             "Worker process exists (pid=%u), stopping it.",
505                             (unsigned int)res->hr_workerpid);
506                         /* Stop child process. */
507                         if (kill(res->hr_workerpid, SIGINT) < 0) {
508                                 pjdlog_errno(LOG_ERR,
509                                     "Unable to stop worker process (pid=%u)",
510                                     (unsigned int)res->hr_workerpid);
511                                 /*
512                                  * Other than logging the problem we
513                                  * ignore it - nothing smart to do.
514                                  */
515                         }
516                         /* Wait for it to exit. */
517                         else if ((pid = waitpid(res->hr_workerpid,
518                             &status, 0)) != res->hr_workerpid) {
519                                 /* We can only log the problem. */
520                                 pjdlog_errno(LOG_ERR,
521                                     "Waiting for worker process (pid=%u) failed",
522                                     (unsigned int)res->hr_workerpid);
523                         } else {
524                                 child_exit_log(res->hr_workerpid, status);
525                         }
526                         child_cleanup(res);
527                 } else if (res->hr_remotein != NULL) {
528                         char oaddr[256];
529
530                         proto_remote_address(conn, oaddr, sizeof(oaddr));
531                         pjdlog_debug(1,
532                             "Canceling half-open connection from %s on connection from %s.",
533                             oaddr, raddr);
534                         proto_close(res->hr_remotein);
535                         res->hr_remotein = NULL;
536                 }
537         }
538
539         /*
540          * Checks and cleanups are done.
541          */
542
543         if (token == NULL) {
544                 arc4random_buf(res->hr_token, sizeof(res->hr_token));
545                 nvout = nv_alloc();
546                 nv_add_uint8_array(nvout, res->hr_token,
547                     sizeof(res->hr_token), "token");
548                 if (nv_error(nvout) != 0) {
549                         pjdlog_common(LOG_ERR, 0, nv_error(nvout),
550                             "Unable to prepare return header for %s", raddr);
551                         nv_add_stringf(nverr, "errmsg",
552                             "Remote node was unable to prepare return header: %s.",
553                             strerror(nv_error(nvout)));
554                         goto fail;
555                 }
556                 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) {
557                         int error = errno;
558
559                         pjdlog_errno(LOG_ERR, "Unable to send response to %s",
560                             raddr);
561                         nv_add_stringf(nverr, "errmsg",
562                             "Remote node was unable to send response: %s.",
563                             strerror(error));
564                         goto fail;
565                 }
566                 res->hr_remotein = conn;
567                 pjdlog_debug(1, "Incoming connection from %s configured.",
568                     raddr);
569         } else {
570                 res->hr_remoteout = conn;
571                 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
572                 hastd_secondary(res, nvin);
573         }
574         nv_free(nvin);
575         nv_free(nvout);
576         nv_free(nverr);
577         pjdlog_prefix_set("%s", "");
578         return;
579 fail:
580         if (nv_error(nverr) != 0) {
581                 pjdlog_common(LOG_ERR, 0, nv_error(nverr),
582                     "Unable to prepare error header for %s", raddr);
583                 goto close;
584         }
585         if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) {
586                 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
587                 goto close;
588         }
589 close:
590         if (nvin != NULL)
591                 nv_free(nvin);
592         if (nvout != NULL)
593                 nv_free(nvout);
594         if (nverr != NULL)
595                 nv_free(nverr);
596         proto_close(conn);
597         pjdlog_prefix_set("%s", "");
598 }
599
600 static void
601 main_loop(void)
602 {
603         struct hast_resource *res;
604         struct timeval seltimeout;
605         struct timespec sigtimeout;
606         int fd, maxfd, ret, signo;
607         sigset_t mask;
608         fd_set rfds;
609
610         seltimeout.tv_sec = REPORT_INTERVAL;
611         seltimeout.tv_usec = 0;
612         sigtimeout.tv_sec = 0;
613         sigtimeout.tv_nsec = 0;
614
615         PJDLOG_VERIFY(sigemptyset(&mask) == 0);
616         PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
617         PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);
618         PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
619         PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
620
621         for (;;) {
622                 while ((signo = sigtimedwait(&mask, NULL, &sigtimeout)) != -1) {
623                         switch (signo) {
624                         case SIGINT:
625                         case SIGTERM:
626                                 sigexit_received = true;
627                                 terminate_workers();
628                                 exit(EX_OK);
629                                 break;
630                         case SIGCHLD:
631                                 child_exit();
632                                 break;
633                         case SIGHUP:
634                                 hastd_reload();
635                                 break;
636                         default:
637                                 assert(!"invalid condition");
638                         }
639                 }
640
641                 /* Setup descriptors for select(2). */
642                 FD_ZERO(&rfds);
643                 maxfd = fd = proto_descriptor(cfg->hc_controlconn);
644                 assert(fd >= 0);
645                 FD_SET(fd, &rfds);
646                 fd = proto_descriptor(cfg->hc_listenconn);
647                 assert(fd >= 0);
648                 FD_SET(fd, &rfds);
649                 maxfd = fd > maxfd ? fd : maxfd;
650                 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
651                         if (res->hr_event == NULL)
652                                 continue;
653                         fd = proto_descriptor(res->hr_event);
654                         assert(fd >= 0);
655                         FD_SET(fd, &rfds);
656                         maxfd = fd > maxfd ? fd : maxfd;
657                 }
658
659                 assert(maxfd + 1 <= (int)FD_SETSIZE);
660                 ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout);
661                 if (ret == 0)
662                         hook_check(false);
663                 else if (ret == -1) {
664                         if (errno == EINTR)
665                                 continue;
666                         KEEP_ERRNO((void)pidfile_remove(pfh));
667                         pjdlog_exit(EX_OSERR, "select() failed");
668                 }
669
670                 if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds))
671                         control_handle(cfg);
672                 if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds))
673                         listen_accept();
674                 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
675                         if (res->hr_event == NULL)
676                                 continue;
677                         if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) {
678                                 if (event_recv(res) == 0)
679                                         continue;
680                                 /* The worker process exited? */
681                                 proto_close(res->hr_event);
682                                 res->hr_event = NULL;
683                         }
684                 }
685         }
686 }
687
688 int
689 main(int argc, char *argv[])
690 {
691         const char *pidfile;
692         pid_t otherpid;
693         bool foreground;
694         int debuglevel;
695         sigset_t mask;
696
697         g_gate_load();
698
699         foreground = false;
700         debuglevel = 0;
701         pidfile = HASTD_PIDFILE;
702
703         for (;;) {
704                 int ch;
705
706                 ch = getopt(argc, argv, "c:dFhP:");
707                 if (ch == -1)
708                         break;
709                 switch (ch) {
710                 case 'c':
711                         cfgpath = optarg;
712                         break;
713                 case 'd':
714                         debuglevel++;
715                         break;
716                 case 'F':
717                         foreground = true;
718                         break;
719                 case 'P':
720                         pidfile = optarg;
721                         break;
722                 case 'h':
723                 default:
724                         usage();
725                 }
726         }
727         argc -= optind;
728         argv += optind;
729
730         pjdlog_debug_set(debuglevel);
731
732         pfh = pidfile_open(pidfile, 0600, &otherpid);
733         if (pfh == NULL) {
734                 if (errno == EEXIST) {
735                         pjdlog_exitx(EX_TEMPFAIL,
736                             "Another hastd is already running, pid: %jd.",
737                             (intmax_t)otherpid);
738                 }
739                 /* If we cannot create pidfile from other reasons, only warn. */
740                 pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile");
741         }
742
743         cfg = yy_config_parse(cfgpath, true);
744         assert(cfg != NULL);
745
746         PJDLOG_VERIFY(sigemptyset(&mask) == 0);
747         PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
748         PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);
749         PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
750         PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
751         PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
752
753         /* Listen on control address. */
754         if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) {
755                 KEEP_ERRNO((void)pidfile_remove(pfh));
756                 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
757                     cfg->hc_controladdr);
758         }
759         /* Listen for remote connections. */
760         if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) {
761                 KEEP_ERRNO((void)pidfile_remove(pfh));
762                 pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
763                     cfg->hc_listenaddr);
764         }
765
766         if (!foreground) {
767                 if (daemon(0, 0) < 0) {
768                         KEEP_ERRNO((void)pidfile_remove(pfh));
769                         pjdlog_exit(EX_OSERR, "Unable to daemonize");
770                 }
771
772                 /* Start logging to syslog. */
773                 pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
774
775                 /* Write PID to a file. */
776                 if (pidfile_write(pfh) < 0) {
777                         pjdlog_errno(LOG_WARNING,
778                             "Unable to write PID to a file");
779                 }
780         }
781
782         hook_init();
783
784         main_loop();
785
786         exit(0);
787 }