From cb82d8f50a1bf604772a395d56b3961f5cd9f86b Mon Sep 17 00:00:00 2001 From: pjd Date: Sun, 17 Oct 2010 16:43:20 +0000 Subject: [PATCH] MFC r213183,r213428,r213429,r213430,r213529,r213530,r213531,r213533,r213579, r213580,r213938,r213939,r213981: r213183: Plug memory leak on fork(2) failure. Submitted by: Mikolaj Golub r213428: We can't mask ignored signal, so install dummy signal hander for SIGCHLD before masking it. This fixes bogus reports about hooks running for too long and other problems related to garbage-collecting child processes. Reported by: Mikolaj Golub r213429: hook_check() is now only used to report about long-running hooks, so the argument is redundant, remove it. r213430: Decrease report interval to 5 seconds, as this also means we will check for signals every 5 seconds and not every 10 seconds as before. r213529: Don't close local component on exit as we can hang waiting on g_waitidle. I'm unable to reproduce the race described in comment anymore and also the comment is incorrect - localfd represents local component from configuration file, eg. /dev/da0 and not HAST provider. Reported by: Mikolaj Golub r213530: Start the guard thread first, so we can handle signals from the very begining. Reported by: Mikolaj Golub r213531: Log error message when we fail to destroy ggate provider. r213533: Clear ggate structures before using them. We don't initialize all the field and there can be some garbage from the stack. r213579: We close the event socketpair early in the mainloop to prevent spaming with error messages, so when we clean up after child process, we have to check if the event socketpair is still there. Submitted by: Mikolaj Golub r213580: We can't zero out ggio request, as we have some fields in there we initialize once during start-up. Reported by: Mikolaj Golub r213938: Clear signal mask before executing a hook. Submitted by: Mikolaj Golub r213939: Use one fprintf() instead of two. r213981: Log correct connection when canceling half-open connection. Submitted by: Mikolaj Golub git-svn-id: svn://svn.freebsd.org/base/stable/8@213984 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- sbin/hastd/control.c | 6 ++++-- sbin/hastd/hastd.c | 18 +++++++++++++++--- sbin/hastd/hooks.c | 16 +++++----------- sbin/hastd/hooks.h | 2 +- sbin/hastd/pjdlog.c | 3 +-- sbin/hastd/primary.c | 23 ++++++++++++----------- 6 files changed, 38 insertions(+), 30 deletions(-) diff --git a/sbin/hastd/control.c b/sbin/hastd/control.c index b1cb10b31..49e0d74fc 100644 --- a/sbin/hastd/control.c +++ b/sbin/hastd/control.c @@ -58,8 +58,10 @@ child_cleanup(struct hast_resource *res) proto_close(res->hr_ctrl); res->hr_ctrl = NULL; - proto_close(res->hr_event); - res->hr_event = NULL; + if (res->hr_event != NULL) { + proto_close(res->hr_event); + res->hr_event = NULL; + } res->hr_workerpid = 0; } diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c index e47b19d49..a025d6b8a 100644 --- a/sbin/hastd/hastd.c +++ b/sbin/hastd/hastd.c @@ -69,7 +69,7 @@ bool sigexit_received = false; struct pidfh *pfh; /* How often check for hooks running for too long. */ -#define REPORT_INTERVAL 10 +#define REPORT_INTERVAL 5 static void usage(void) @@ -527,7 +527,8 @@ listen_accept(void) } else if (res->hr_remotein != NULL) { char oaddr[256]; - proto_remote_address(conn, oaddr, sizeof(oaddr)); + proto_remote_address(res->hr_remotein, oaddr, + sizeof(oaddr)); pjdlog_debug(1, "Canceling half-open connection from %s on connection from %s.", oaddr, raddr); @@ -659,7 +660,7 @@ main_loop(void) assert(maxfd + 1 <= (int)FD_SETSIZE); ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout); if (ret == 0) - hook_check(false); + hook_check(); else if (ret == -1) { if (errno == EINTR) continue; @@ -685,6 +686,12 @@ main_loop(void) } } +static void +dummy_sighandler(int sig __unused) +{ + /* Nothing to do. */ +} + int main(int argc, char *argv[]) { @@ -743,6 +750,11 @@ main(int argc, char *argv[]) cfg = yy_config_parse(cfgpath, true); assert(cfg != NULL); + /* + * Because SIGCHLD is ignored by default, setup dummy handler for it, + * so we can mask it. + */ + PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR); PJDLOG_VERIFY(sigemptyset(&mask) == 0); PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); diff --git a/sbin/hastd/hooks.c b/sbin/hastd/hooks.c index eff2f6f95..2e43357fd 100644 --- a/sbin/hastd/hooks.c +++ b/sbin/hastd/hooks.c @@ -293,23 +293,13 @@ hook_check_one(pid_t pid, int status) } void -hook_check(bool sigchld) +hook_check(void) { struct hookproc *hp, *hp2; - int status; time_t now; - pid_t pid; assert(hooks_initialized); - /* - * If SIGCHLD was received, garbage collect finished processes. - */ - if (sigchld) { - while ((pid = wait3(&status, WNOHANG, NULL)) > 0) - hook_check_one(pid, status); - } - /* * Report about processes that are running for a long time. */ @@ -364,6 +354,7 @@ hook_execv(const char *path, va_list ap) struct hookproc *hp; char *args[64]; unsigned int ii; + sigset_t mask; pid_t pid; assert(hooks_initialized); @@ -388,9 +379,12 @@ hook_execv(const char *path, va_list ap) switch (pid) { case -1: /* Error. */ pjdlog_errno(LOG_ERR, "Unable to fork to execute %s", path); + hook_free(hp); return; case 0: /* Child. */ descriptors(); + PJDLOG_VERIFY(sigemptyset(&mask) == 0); + PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); execv(path, args); pjdlog_errno(LOG_ERR, "Unable to execute %s", path); exit(EX_SOFTWARE); diff --git a/sbin/hastd/hooks.h b/sbin/hastd/hooks.h index 5cc57df5e..4ce435e2b 100644 --- a/sbin/hastd/hooks.h +++ b/sbin/hastd/hooks.h @@ -41,7 +41,7 @@ void hook_init(void); void hook_fini(void); void hook_check_one(pid_t pid, int status); -void hook_check(bool sigchld); +void hook_check(void); void hook_exec(const char *path, ...); void hook_execv(const char *path, va_list ap); diff --git a/sbin/hastd/pjdlog.c b/sbin/hastd/pjdlog.c index 891210586..6a72e7a72 100644 --- a/sbin/hastd/pjdlog.c +++ b/sbin/hastd/pjdlog.c @@ -214,8 +214,7 @@ pjdlogv_common(int loglevel, int debuglevel, int error, const char *fmt, /* Attach debuglevel if this is debug log. */ if (loglevel == LOG_DEBUG) fprintf(out, "[%d]", debuglevel); - fprintf(out, " "); - fprintf(out, "%s", pjdlog_prefix); + fprintf(out, " %s", pjdlog_prefix); vfprintf(out, fmt, ap); if (error != -1) fprintf(out, ": %s.", strerror(error)); diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c index d99bfd7ae..aabbecf33 100644 --- a/sbin/hastd/primary.c +++ b/sbin/hastd/primary.c @@ -234,21 +234,17 @@ cleanup(struct hast_resource *res) /* Remember errno. */ rerrno = errno; - /* - * Close descriptor to /dev/hast/ - * to work-around race in the kernel. - */ - close(res->hr_localfd); - /* Destroy ggate provider if we created one. */ if (res->hr_ggateunit >= 0) { struct g_gate_ctl_destroy ggiod; + bzero(&ggiod, sizeof(ggiod)); ggiod.gctl_version = G_GATE_VERSION; ggiod.gctl_unit = res->hr_ggateunit; ggiod.gctl_force = 1; if (ioctl(res->hr_ggatefd, G_GATE_CMD_DESTROY, &ggiod) < 0) { - pjdlog_warning("Unable to destroy hast/%s device", + pjdlog_errno(LOG_WARNING, + "Unable to destroy hast/%s device", res->hr_provname); } res->hr_ggateunit = -1; @@ -705,6 +701,7 @@ init_ggate(struct hast_resource *res) * Create provider before trying to connect, as connection failure * is not critical, but may take some time. */ + bzero(&ggiocreate, sizeof(ggiocreate)); ggiocreate.gctl_version = G_GATE_VERSION; ggiocreate.gctl_mediasize = res->hr_datasize; ggiocreate.gctl_sectorsize = res->hr_local_sectorsize; @@ -714,7 +711,6 @@ init_ggate(struct hast_resource *res) ggiocreate.gctl_unit = G_GATE_NAME_GIVEN; snprintf(ggiocreate.gctl_name, sizeof(ggiocreate.gctl_name), "hast/%s", res->hr_provname); - bzero(ggiocreate.gctl_info, sizeof(ggiocreate.gctl_info)); if (ioctl(res->hr_ggatefd, G_GATE_CMD_CREATE, &ggiocreate) == 0) { pjdlog_info("Device hast/%s created.", res->hr_provname); res->hr_ggateunit = ggiocreate.gctl_unit; @@ -732,6 +728,7 @@ init_ggate(struct hast_resource *res) * provider died and didn't clean up. In that case we will start from * where he left of. */ + bzero(&ggiocancel, sizeof(ggiocancel)); ggiocancel.gctl_version = G_GATE_VERSION; ggiocancel.gctl_unit = G_GATE_NAME_GIVEN; snprintf(ggiocancel.gctl_name, sizeof(ggiocancel.gctl_name), "hast/%s", @@ -796,6 +793,12 @@ hastd_primary(struct hast_resource *res) init_local(res); init_ggate(res); init_environment(res); + /* + * Create the guard thread first, so we can handle signals from the + * very begining. + */ + error = pthread_create(&td, NULL, guard_thread, res); + assert(error == 0); /* * Create the control thread before sending any event to the parent, * as we can deadlock when parent sends control request to worker, @@ -818,9 +821,7 @@ hastd_primary(struct hast_resource *res) assert(error == 0); error = pthread_create(&td, NULL, ggate_send_thread, res); assert(error == 0); - error = pthread_create(&td, NULL, sync_thread, res); - assert(error == 0); - (void)guard_thread(res); + (void)sync_thread(res); } static void -- 2.45.0