2 * Copyright (c) 2009-2010 The FreeBSD Foundation
5 * This software was developed by Pawel Jakub Dawidek under sponsorship from
6 * the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/linker.h>
35 #include <sys/module.h>
50 #include <activemap.h>
55 #include "hast_proto.h"
59 /* Path to configuration file. */
60 static const char *cfgpath = HAST_CONFIG;
61 /* Hastd configuration. */
62 static struct hastd_config *cfg;
63 /* Was SIGCHLD signal received? */
64 static bool sigchld_received = false;
65 /* Was SIGHUP signal received? */
66 static bool sighup_received = false;
67 /* Was SIGINT or SIGTERM signal received? */
68 bool sigexit_received = false;
69 /* PID file handle. */
76 errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
85 sigchld_received = true;
88 sighup_received = true;
91 assert(!"invalid condition");
99 if (modfind("g_gate") == -1) {
100 /* Not present in kernel, try loading it. */
101 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
102 if (errno != EEXIST) {
103 pjdlog_exit(EX_OSERR,
104 "Unable to load geom_gate module");
111 child_exit_log(unsigned int pid, int status)
114 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
115 pjdlog_debug(1, "Worker process exited gracefully (pid=%u).",
117 } else if (WIFSIGNALED(status)) {
118 pjdlog_error("Worker process killed (pid=%u, signal=%d).",
119 pid, WTERMSIG(status));
121 pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).",
122 pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1);
129 struct hast_resource *res;
133 while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
134 /* Find resource related to the process that just exited. */
135 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
136 if (pid == res->hr_workerpid)
141 * This can happen when new connection arrives and we
142 * cancel child responsible for the old one.
146 pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
147 role2str(res->hr_role));
148 child_exit_log(pid, status);
149 proto_close(res->hr_ctrl);
150 res->hr_workerpid = 0;
151 if (res->hr_role == HAST_ROLE_PRIMARY) {
153 * Restart child process if it was killed by signal
154 * or exited because of temporary problem.
156 if (WIFSIGNALED(status) ||
157 (WIFEXITED(status) &&
158 WEXITSTATUS(status) == EX_TEMPFAIL)) {
160 pjdlog_info("Restarting worker process.");
163 res->hr_role = HAST_ROLE_INIT;
164 pjdlog_info("Changing resource role back to %s.",
165 role2str(res->hr_role));
168 pjdlog_prefix_set("%s", "");
177 pjdlog_warning("Configuration reload is not implemented.");
183 struct hast_resource *res;
184 struct proto_conn *conn;
185 struct nv *nvin, *nvout, *nverr;
187 const unsigned char *token;
188 char laddr[256], raddr[256];
193 proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr));
194 pjdlog_debug(1, "Accepting connection to %s.", laddr);
196 if (proto_accept(cfg->hc_listenconn, &conn) < 0) {
197 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
201 proto_local_address(conn, laddr, sizeof(laddr));
202 proto_remote_address(conn, raddr, sizeof(raddr));
203 pjdlog_info("Connection from %s to %s.", laddr, raddr);
205 /* Error in setting timeout is not critical, but why should it fail? */
206 if (proto_timeout(conn, HAST_TIMEOUT) < 0)
207 pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
209 nvin = nvout = nverr = NULL;
212 * Before receiving any data see if remote host have access to any
215 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
216 if (proto_address_match(conn, res->hr_remoteaddr))
220 pjdlog_error("Client %s isn't known.", raddr);
223 /* Ok, remote host can access at least one resource. */
225 if (hast_proto_recv_hdr(conn, &nvin) < 0) {
226 pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
231 resname = nv_get_string(nvin, "resource");
232 if (resname == NULL) {
233 pjdlog_error("No 'resource' field in the header received from %s.",
237 pjdlog_debug(2, "%s: resource=%s", raddr, resname);
238 token = nv_get_uint8_array(nvin, &size, "token");
240 * NULL token means that this is first conection.
242 if (token != NULL && size != sizeof(res->hr_token)) {
243 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
244 raddr, sizeof(res->hr_token), size);
249 * From now on we want to send errors to the remote node.
253 /* Find resource related to this connection. */
254 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
255 if (strcmp(resname, res->hr_name) == 0)
258 /* Have we found the resource? */
260 pjdlog_error("No resource '%s' as requested by %s.",
262 nv_add_stringf(nverr, "errmsg", "Resource not configured.");
266 /* Now that we know resource name setup log prefix. */
267 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
269 /* Does the remote host have access to this resource? */
270 if (!proto_address_match(conn, res->hr_remoteaddr)) {
271 pjdlog_error("Client %s has no access to the resource.", raddr);
272 nv_add_stringf(nverr, "errmsg", "No access to the resource.");
275 /* Is the resource marked as secondary? */
276 if (res->hr_role != HAST_ROLE_SECONDARY) {
277 pjdlog_error("We act as %s for the resource and not as %s as requested by %s.",
278 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
280 nv_add_stringf(nverr, "errmsg",
281 "Remote node acts as %s for the resource and not as %s.",
282 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
285 /* Does token (if exists) match? */
286 if (token != NULL && memcmp(token, res->hr_token,
287 sizeof(res->hr_token)) != 0) {
288 pjdlog_error("Token received from %s doesn't match.", raddr);
289 nv_add_stringf(nverr, "errmsg", "Toke doesn't match.");
293 * If there is no token, but we have half-open connection
294 * (only remotein) or full connection (worker process is running)
295 * we have to cancel those and accept the new connection.
298 assert(res->hr_remoteout == NULL);
299 pjdlog_debug(1, "Initial connection from %s.", raddr);
300 if (res->hr_workerpid != 0) {
301 assert(res->hr_remotein == NULL);
303 "Worker process exists (pid=%u), stopping it.",
304 (unsigned int)res->hr_workerpid);
305 /* Stop child process. */
306 if (kill(res->hr_workerpid, SIGINT) < 0) {
307 pjdlog_errno(LOG_ERR,
308 "Unable to stop worker process (pid=%u)",
309 (unsigned int)res->hr_workerpid);
311 * Other than logging the problem we
312 * ignore it - nothing smart to do.
315 /* Wait for it to exit. */
316 else if ((pid = waitpid(res->hr_workerpid,
317 &status, 0)) != res->hr_workerpid) {
318 /* We can only log the problem. */
319 pjdlog_errno(LOG_ERR,
320 "Waiting for worker process (pid=%u) failed",
321 (unsigned int)res->hr_workerpid);
323 child_exit_log(res->hr_workerpid, status);
325 res->hr_workerpid = 0;
326 } else if (res->hr_remotein != NULL) {
329 proto_remote_address(conn, oaddr, sizeof(oaddr));
331 "Canceling half-open connection from %s on connection from %s.",
333 proto_close(res->hr_remotein);
334 res->hr_remotein = NULL;
339 * Checks and cleanups are done.
343 arc4random_buf(res->hr_token, sizeof(res->hr_token));
345 nv_add_uint8_array(nvout, res->hr_token,
346 sizeof(res->hr_token), "token");
347 if (nv_error(nvout) != 0) {
348 pjdlog_common(LOG_ERR, 0, nv_error(nvout),
349 "Unable to prepare return header for %s", raddr);
350 nv_add_stringf(nverr, "errmsg",
351 "Remote node was unable to prepare return header: %s.",
352 strerror(nv_error(nvout)));
355 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) {
358 pjdlog_errno(LOG_ERR, "Unable to send response to %s",
360 nv_add_stringf(nverr, "errmsg",
361 "Remote node was unable to send response: %s.",
365 res->hr_remotein = conn;
366 pjdlog_debug(1, "Incoming connection from %s configured.",
369 res->hr_remoteout = conn;
370 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
371 hastd_secondary(res, nvin);
376 pjdlog_prefix_set("%s", "");
379 if (nv_error(nverr) != 0) {
380 pjdlog_common(LOG_ERR, 0, nv_error(nverr),
381 "Unable to prepare error header for %s", raddr);
384 if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) {
385 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
396 pjdlog_prefix_set("%s", "");
406 if (sigchld_received) {
407 sigchld_received = false;
410 if (sighup_received) {
411 sighup_received = false;
419 /* Setup descriptors for select(2). */
420 #define SETUP_FD(conn) do { \
421 fd = proto_descriptor(conn); \
423 maxfd = fd > maxfd ? fd : maxfd; \
428 SETUP_FD(cfg->hc_controlconn);
429 SETUP_FD(cfg->hc_listenconn);
432 ret = select(maxfd + 1, &rfds, &wfds, NULL, NULL);
436 KEEP_ERRNO((void)pidfile_remove(pfh));
437 pjdlog_exit(EX_OSERR, "select() failed");
440 #define ISSET_FD(conn) \
441 (FD_ISSET((fd = proto_descriptor(conn)), &rfds) || FD_ISSET(fd, &wfds))
442 if (ISSET_FD(cfg->hc_controlconn))
444 if (ISSET_FD(cfg->hc_listenconn))
451 main(int argc, char *argv[])
462 pidfile = HASTD_PIDFILE;
467 ch = getopt(argc, argv, "c:dFhP:");
491 pjdlog_debug_set(debuglevel);
493 pfh = pidfile_open(pidfile, 0600, &otherpid);
495 if (errno == EEXIST) {
496 pjdlog_exitx(EX_TEMPFAIL,
497 "Another hastd is already running, pid: %jd.",
500 /* If we cannot create pidfile from other reasons, only warn. */
501 pjdlog_errno(LOG_WARNING, "Cannot open or create pidfile");
504 cfg = yy_config_parse(cfgpath);
507 signal(SIGHUP, sighandler);
508 signal(SIGCHLD, sighandler);
510 /* Listen on control address. */
511 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) {
512 KEEP_ERRNO((void)pidfile_remove(pfh));
513 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
514 cfg->hc_controladdr);
516 /* Listen for remote connections. */
517 if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) {
518 KEEP_ERRNO((void)pidfile_remove(pfh));
519 pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
524 if (daemon(0, 0) < 0) {
525 KEEP_ERRNO((void)pidfile_remove(pfh));
526 pjdlog_exit(EX_OSERR, "Unable to daemonize");
529 /* Start logging to syslog. */
530 pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
532 /* Write PID to a file. */
533 if (pidfile_write(pfh) < 0) {
534 pjdlog_errno(LOG_WARNING,
535 "Unable to write PID to a file");