2 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2002 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: os.c,v 1.89.12.5 2009/03/02 03:03:54 marka Exp $ */
25 #include <sys/types.h> /* dev_t FreeBSD 2.1 */
31 #include <grp.h> /* Required for initgroups() on IRIX. */
42 #include <isc/buffer.h>
44 #include <isc/print.h>
45 #include <isc/resource.h>
46 #include <isc/result.h>
47 #include <isc/strerror.h>
48 #include <isc/string.h>
50 #include <named/main.h>
53 #include <named/ns_smf_globals.h>
56 static char *pidfile = NULL;
57 static int devnullfd = -1;
60 #define ISC_FACILITY LOG_DAEMON
64 * If there's no <linux/capability.h>, we don't care about <sys/prctl.h>
66 #ifndef HAVE_LINUX_CAPABILITY_H
67 #undef HAVE_SYS_PRCTL_H
72 * (T) HAVE_LINUXTHREADS
73 * (C) HAVE_SYS_CAPABILITY_H (or HAVE_LINUX_CAPABILITY_H)
74 * (P) HAVE_SYS_PRCTL_H
75 * The possible cases are:
76 * none: setuid() normally
78 * C: setuid() normally, drop caps (keep CAP_SETUID)
79 * T+C: no setuid(), drop caps (don't keep CAP_SETUID)
80 * T+C+P: setuid() early, drop caps (keep CAP_SETUID)
81 * C+P: setuid() normally, drop caps (keep CAP_SETUID)
86 * caps = BIND_SERVICE + CHROOT + SETGID
87 * if ((T && C && P) || !T)
92 * if (T && C && P && -u)
99 * if (C && (P || !-u))
100 * caps = BIND_SERVICE
104 * It will be nice when Linux threads work properly with setuid().
107 #ifdef HAVE_LINUXTHREADS
108 static pid_t mainpid = 0;
111 static struct passwd *runas_pw = NULL;
112 static isc_boolean_t done_setuid = ISC_FALSE;
113 static int dfd[2] = { -1, -1 };
115 #ifdef HAVE_LINUX_CAPABILITY_H
117 static isc_boolean_t non_root = ISC_FALSE;
118 static isc_boolean_t non_root_caps = ISC_FALSE;
120 #ifdef HAVE_SYS_CAPABILITY_H
121 #include <sys/capability.h>
124 * We define _LINUX_FS_H to prevent it from being included. We don't need
125 * anything from it, and the files it includes cause warnings with 2.2
126 * kernels, and compilation failures (due to conflicts between <linux/string.h>
127 * and <string.h>) on 2.3 kernels.
130 #include <linux/capability.h>
134 #include <asm/unistd.h> /* Slackware 4.0 needs this. */
135 #endif /* __NR_capset */
136 #define SYS_capset __NR_capset
137 #endif /* SYS_capset */
138 #endif /* HAVE_SYS_CAPABILITY_H */
140 #ifdef HAVE_SYS_PRCTL_H
141 #include <sys/prctl.h> /* Required for prctl(). */
144 * If the value of PR_SET_KEEPCAPS is not in <sys/prctl.h>, define it
145 * here. This allows setuid() to work on systems running a new enough
146 * kernel but with /usr/include/linux pointing to "standard" kernel
149 #ifndef PR_SET_KEEPCAPS
150 #define PR_SET_KEEPCAPS 8
153 #endif /* HAVE_SYS_PRCTL_H */
156 #define SETCAPS_FUNC "cap_set_proc "
158 typedef unsigned int cap_t;
159 #define SETCAPS_FUNC "syscall(capset) "
160 #endif /* HAVE_LIBCAP */
163 linux_setcaps(cap_t caps) {
165 struct __user_cap_header_struct caphead;
166 struct __user_cap_data_struct cap;
168 char strbuf[ISC_STRERRORSIZE];
170 if ((getuid() != 0 && !non_root_caps) || non_root)
173 memset(&caphead, 0, sizeof(caphead));
174 caphead.version = _LINUX_CAPABILITY_VERSION;
176 memset(&cap, 0, sizeof(cap));
177 cap.effective = caps;
178 cap.permitted = caps;
182 if (cap_set_proc(caps) < 0) {
184 if (syscall(SYS_capset, &caphead, &cap) < 0) {
186 isc__strerror(errno, strbuf, sizeof(strbuf));
187 ns_main_earlyfatal(SETCAPS_FUNC "failed: %s:"
188 " please ensure that the capset kernel"
189 " module is loaded. see insmod(8)",
195 #define SET_CAP(flag) \
198 cap_flag_value_t curval; \
199 err = cap_get_flag(curcaps, capval, CAP_PERMITTED, &curval); \
200 if (err != -1 && curval) { \
201 err = cap_set_flag(caps, CAP_EFFECTIVE, 1, &capval, CAP_SET); \
203 isc__strerror(errno, strbuf, sizeof(strbuf)); \
204 ns_main_earlyfatal("cap_set_proc failed: %s", strbuf); \
207 err = cap_set_flag(caps, CAP_PERMITTED, 1, &capval, CAP_SET); \
209 isc__strerror(errno, strbuf, sizeof(strbuf)); \
210 ns_main_earlyfatal("cap_set_proc failed: %s", strbuf); \
217 if (caps == NULL) { \
218 isc__strerror(errno, strbuf, sizeof(strbuf)); \
219 ns_main_earlyfatal("cap_init failed: %s", strbuf); \
221 curcaps = cap_get_proc(); \
222 if (curcaps == NULL) { \
223 isc__strerror(errno, strbuf, sizeof(strbuf)); \
224 ns_main_earlyfatal("cap_get_proc failed: %s", strbuf); \
233 #define SET_CAP(flag) do { caps |= (1 << (flag)); } while (0)
234 #define INIT_CAP do { caps = 0; } while (0)
235 #endif /* HAVE_LIBCAP */
238 linux_initialprivs(void) {
243 char strbuf[ISC_STRERRORSIZE];
248 * We don't need most privileges, so we drop them right away.
249 * Later on linux_minprivs() will be called, which will drop our
250 * capabilities to the minimum needed to run the server.
255 * We need to be able to bind() to privileged ports, notably port 53!
257 SET_CAP(CAP_NET_BIND_SERVICE);
260 * We need chroot() initially too.
262 SET_CAP(CAP_SYS_CHROOT);
264 #if defined(HAVE_SYS_PRCTL_H) || !defined(HAVE_LINUXTHREADS)
266 * We can setuid() only if either the kernel supports keeping
267 * capabilities after setuid() (which we don't know until we've
268 * tried) or we're not using threads. If either of these is
269 * true, we want the setuid capability.
275 * Since we call initgroups, we need this.
280 * Without this, we run into problems reading a configuration file
281 * owned by a non-root user and non-world-readable on startup.
283 SET_CAP(CAP_DAC_READ_SEARCH);
286 * XXX We might want to add CAP_SYS_RESOURCE, though it's not
287 * clear it would work right given the way linuxthreads work.
288 * XXXDCL But since we need to be able to set the maximum number
289 * of files, the stack size, data size, and core dump size to
290 * support named.conf options, this is now being added to test.
292 SET_CAP(CAP_SYS_RESOURCE);
302 linux_minprivs(void) {
307 char strbuf[ISC_STRERRORSIZE];
313 * Drop all privileges except the ability to bind() to privileged
316 * It's important that we drop CAP_SYS_CHROOT. If we didn't, it
317 * chroot() could be used to escape from the chrooted area.
320 SET_CAP(CAP_NET_BIND_SERVICE);
323 * XXX We might want to add CAP_SYS_RESOURCE, though it's not
324 * clear it would work right given the way linuxthreads work.
325 * XXXDCL But since we need to be able to set the maximum number
326 * of files, the stack size, data size, and core dump size to
327 * support named.conf options, this is now being added to test.
329 SET_CAP(CAP_SYS_RESOURCE);
338 #ifdef HAVE_SYS_PRCTL_H
340 linux_keepcaps(void) {
341 char strbuf[ISC_STRERRORSIZE];
343 * Ask the kernel to allow us to keep our capabilities after we
347 if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) < 0) {
348 if (errno != EINVAL) {
349 isc__strerror(errno, strbuf, sizeof(strbuf));
350 ns_main_earlyfatal("prctl() failed: %s", strbuf);
353 non_root_caps = ISC_TRUE;
360 #endif /* HAVE_LINUX_CAPABILITY_H */
364 setup_syslog(const char *progname) {
369 options |= LOG_NDELAY;
371 openlog(isc_file_basename(progname), options, ISC_FACILITY);
375 ns_os_init(const char *progname) {
376 setup_syslog(progname);
377 #ifdef HAVE_LINUX_CAPABILITY_H
378 linux_initialprivs();
380 #ifdef HAVE_LINUXTHREADS
384 signal(SIGXFSZ, SIG_IGN);
389 ns_os_daemonize(void) {
391 char strbuf[ISC_STRERRORSIZE];
393 if (pipe(dfd) == -1) {
394 isc__strerror(errno, strbuf, sizeof(strbuf));
395 ns_main_earlyfatal("pipe(): %s", strbuf);
400 isc__strerror(errno, strbuf, sizeof(strbuf));
401 ns_main_earlyfatal("fork(): %s", strbuf);
406 * Wait for the child to finish loading for the first time.
407 * This would be so much simpler if fork() worked once we
408 * were multi-threaded.
413 n = read(dfd[0], &buf, 1);
416 } while (n == -1 && errno == EINTR);
425 #ifdef HAVE_LINUXTHREADS
429 if (setsid() == -1) {
430 isc__strerror(errno, strbuf, sizeof(strbuf));
431 ns_main_earlyfatal("setsid(): %s", strbuf);
435 * Try to set stdin, stdout, and stderr to /dev/null, but press
436 * on even if it fails.
438 * XXXMLG The close() calls here are unneeded on all but NetBSD, but
439 * are harmless to include everywhere. dup2() is supposed to close
440 * the FD if it is in use, but unproven-pthreads-0.16 is broken
441 * and will end up closing the wrong FD. This will be fixed eventually,
442 * and these calls will be removed.
444 if (devnullfd != -1) {
445 if (devnullfd != STDIN_FILENO) {
446 (void)close(STDIN_FILENO);
447 (void)dup2(devnullfd, STDIN_FILENO);
449 if (devnullfd != STDOUT_FILENO) {
450 (void)close(STDOUT_FILENO);
451 (void)dup2(devnullfd, STDOUT_FILENO);
453 if (devnullfd != STDERR_FILENO) {
454 (void)close(STDERR_FILENO);
455 (void)dup2(devnullfd, STDERR_FILENO);
461 ns_os_started(void) {
465 * Signal to the parent that we started successfully.
467 if (dfd[0] != -1 && dfd[1] != -1) {
468 if (write(dfd[1], &buf, 1) != 1)
469 ns_main_earlyfatal("unable to signal parent that we "
470 "otherwise started successfully.");
472 dfd[0] = dfd[1] = -1;
477 ns_os_opendevnull(void) {
478 devnullfd = open("/dev/null", O_RDWR, 0);
482 ns_os_closedevnull(void) {
483 if (devnullfd != STDIN_FILENO &&
484 devnullfd != STDOUT_FILENO &&
485 devnullfd != STDERR_FILENO) {
492 all_digits(const char *s) {
496 if (!isdigit((*s)&0xff))
504 ns_os_chroot(const char *root) {
505 char strbuf[ISC_STRERRORSIZE];
511 if (chroot(root) < 0) {
512 isc__strerror(errno, strbuf, sizeof(strbuf));
513 ns_main_earlyfatal("chroot(): %s", strbuf);
516 ns_main_earlyfatal("chroot(): disabled");
518 if (chdir("/") < 0) {
519 isc__strerror(errno, strbuf, sizeof(strbuf));
520 ns_main_earlyfatal("chdir(/): %s", strbuf);
523 /* Set ns_smf_chroot flag on successful chroot. */
530 ns_os_inituserinfo(const char *username) {
531 char strbuf[ISC_STRERRORSIZE];
532 if (username == NULL)
535 if (all_digits(username))
536 runas_pw = getpwuid((uid_t)atoi(username));
538 runas_pw = getpwnam(username);
541 if (runas_pw == NULL)
542 ns_main_earlyfatal("user '%s' unknown", username);
545 if (initgroups(runas_pw->pw_name, runas_pw->pw_gid) < 0) {
546 isc__strerror(errno, strbuf, sizeof(strbuf));
547 ns_main_earlyfatal("initgroups(): %s", strbuf);
554 ns_os_changeuser(void) {
555 char strbuf[ISC_STRERRORSIZE];
556 if (runas_pw == NULL || done_setuid)
559 done_setuid = ISC_TRUE;
561 #ifdef HAVE_LINUXTHREADS
562 #ifdef HAVE_LINUX_CAPABILITY_H
564 ns_main_earlyfatal("-u with Linux threads not supported: "
565 "requires kernel support for "
566 "prctl(PR_SET_KEEPCAPS)");
568 ns_main_earlyfatal("-u with Linux threads not supported: "
569 "no capabilities support or capabilities "
570 "disabled at build time");
574 if (setgid(runas_pw->pw_gid) < 0) {
575 isc__strerror(errno, strbuf, sizeof(strbuf));
576 ns_main_earlyfatal("setgid(): %s", strbuf);
579 if (setuid(runas_pw->pw_uid) < 0) {
580 isc__strerror(errno, strbuf, sizeof(strbuf));
581 ns_main_earlyfatal("setuid(): %s", strbuf);
584 #if defined(HAVE_SYS_PRCTL_H) && defined(PR_SET_DUMPABLE)
586 * Restore the ability of named to drop core after the setuid()
587 * call has disabled it.
589 if (prctl(PR_SET_DUMPABLE,1,0,0,0) < 0) {
590 isc__strerror(errno, strbuf, sizeof(strbuf));
591 ns_main_earlywarning("prctl(PR_SET_DUMPABLE) failed: %s",
595 #if defined(HAVE_LINUX_CAPABILITY_H) && !defined(HAVE_LINUXTHREADS)
601 ns_os_adjustnofile() {
602 #ifdef HAVE_LINUXTHREADS
604 isc_resourcevalue_t newvalue;
607 * Linux: max number of open files specified by one thread doesn't seem
608 * to apply to other threads on Linux.
610 newvalue = ISC_RESOURCE_UNLIMITED;
612 result = isc_resource_setlimit(isc_resource_openfiles, newvalue);
613 if (result != ISC_R_SUCCESS)
614 ns_main_earlywarning("couldn't adjust limit on open files");
619 ns_os_minprivs(void) {
620 #ifdef HAVE_SYS_PRCTL_H
624 #ifdef HAVE_LINUXTHREADS
625 ns_os_changeuser(); /* Call setuid() before threads are started */
628 #if defined(HAVE_LINUX_CAPABILITY_H) && defined(HAVE_LINUXTHREADS)
634 safe_open(const char *filename, isc_boolean_t append) {
638 if (stat(filename, &sb) == -1) {
641 } else if ((sb.st_mode & S_IFREG) == 0) {
647 fd = open(filename, O_WRONLY|O_CREAT|O_APPEND,
648 S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
650 if (unlink(filename) < 0 && errno != ENOENT)
652 fd = open(filename, O_WRONLY|O_CREAT|O_EXCL,
653 S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
659 cleanup_pidfile(void) {
661 if (pidfile != NULL) {
663 if (n == -1 && errno != ENOENT)
664 ns_main_earlywarning("unlink '%s': failed", pidfile);
671 mkdirpath(char *filename, void (*report)(const char *, ...)) {
672 char *slash = strrchr(filename, '/');
673 char strbuf[ISC_STRERRORSIZE];
676 if (slash != NULL && slash != filename) {
680 if (stat(filename, &sb) == -1) {
681 if (errno != ENOENT) {
682 isc__strerror(errno, strbuf, sizeof(strbuf));
683 (*report)("couldn't stat '%s': %s", filename,
687 if (mkdirpath(filename, report) == -1)
689 mode = S_IRUSR | S_IWUSR | S_IXUSR; /* u=rwx */
690 mode |= S_IRGRP | S_IXGRP; /* g=rx */
691 mode |= S_IROTH | S_IXOTH; /* o=rx */
692 if (mkdir(filename, mode) == -1) {
693 isc__strerror(errno, strbuf, sizeof(strbuf));
694 (*report)("couldn't mkdir '%s': %s", filename,
709 ns_os_writepidfile(const char *filename, isc_boolean_t first_time) {
714 char strbuf[ISC_STRERRORSIZE];
715 void (*report)(const char *, ...);
718 * The caller must ensure any required synchronization.
721 report = first_time ? ns_main_earlyfatal : ns_main_earlywarning;
725 if (filename == NULL)
728 len = strlen(filename);
729 pidfile = malloc(len + 1);
730 if (pidfile == NULL) {
731 isc__strerror(errno, strbuf, sizeof(strbuf));
732 (*report)("couldn't malloc '%s': %s", filename, strbuf);
737 strcpy(pidfile, filename);
740 * Make the containing directory if it doesn't exist.
742 if (mkdirpath(pidfile, report) == -1) {
748 fd = safe_open(filename, ISC_FALSE);
750 isc__strerror(errno, strbuf, sizeof(strbuf));
751 (*report)("couldn't open pid file '%s': %s", filename, strbuf);
756 lockfile = fdopen(fd, "w");
757 if (lockfile == NULL) {
758 isc__strerror(errno, strbuf, sizeof(strbuf));
759 (*report)("could not fdopen() pid file '%s': %s",
765 #ifdef HAVE_LINUXTHREADS
770 if (fprintf(lockfile, "%ld\n", (long)pid) < 0) {
771 (*report)("fprintf() to pid file '%s' failed", filename);
772 (void)fclose(lockfile);
776 if (fflush(lockfile) == EOF) {
777 (*report)("fflush() to pid file '%s' failed", filename);
778 (void)fclose(lockfile);
782 (void)fclose(lockfile);
786 ns_os_shutdown(void) {
792 ns_os_gethostname(char *buf, size_t len) {
795 n = gethostname(buf, len);
796 return ((n == 0) ? ISC_R_SUCCESS : ISC_R_FAILURE);
800 next_token(char **stringp, const char *delim) {
804 res = strsep(stringp, delim);
807 } while (*res == '\0');
812 ns_os_shutdownmsg(char *command, isc_buffer_t *text) {
819 /* Skip the command name. */
820 ptr = next_token(&input, " \t");
824 ptr = next_token(&input, " \t");
828 if (strcmp(ptr, "-p") != 0)
831 #ifdef HAVE_LINUXTHREADS
837 n = snprintf((char *)isc_buffer_used(text),
838 isc_buffer_availablelength(text),
839 "pid: %ld", (long)pid);
840 /* Only send a message if it is complete. */
841 if (n < isc_buffer_availablelength(text))
842 isc_buffer_add(text, n);