]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/jail/command.c
jail(8): add support for ZFS datasets
[FreeBSD/FreeBSD.git] / usr.sbin / jail / command.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 James Gritton
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/types.h>
30 #include <sys/cpuset.h>
31 #include <sys/event.h>
32 #include <sys/mount.h>
33 #include <sys/stat.h>
34 #include <sys/sysctl.h>
35 #include <sys/user.h>
36 #include <sys/wait.h>
37
38 #include <err.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <kvm.h>
42 #include <login_cap.h>
43 #include <paths.h>
44 #include <pwd.h>
45 #include <signal.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <vis.h>
51
52 #include "jailp.h"
53
54 #define DEFAULT_STOP_TIMEOUT    10
55 #define PHASH_SIZE              256
56
57 LIST_HEAD(phhead, phash);
58
59 struct phash {
60         LIST_ENTRY(phash)       le;
61         struct cfjail           *j;
62         pid_t                   pid;
63 };
64
65 int paralimit = -1;
66
67 extern char **environ;
68
69 static int run_command(struct cfjail *j);
70 static int add_proc(struct cfjail *j, pid_t pid);
71 static void clear_procs(struct cfjail *j);
72 static struct cfjail *find_proc(pid_t pid);
73 static int term_procs(struct cfjail *j);
74 static int get_user_info(struct cfjail *j, const char *username,
75     const struct passwd **pwdp, login_cap_t **lcapp);
76 static int check_path(struct cfjail *j, const char *pname, const char *path,
77     int isfile, const char *umount_type);
78
79 static struct cfjails sleeping = TAILQ_HEAD_INITIALIZER(sleeping);
80 static struct cfjails runnable = TAILQ_HEAD_INITIALIZER(runnable);
81 static struct cfstring dummystring = { .len = 1 };
82 static struct phhead phash[PHASH_SIZE];
83 static int kq;
84
85 static cpusetid_t
86 root_cpuset_id(void)
87 {
88         static cpusetid_t setid = CPUSET_INVALID;
89         static int error;
90
91         /* Only try to get the cpuset once. */
92         if (error == 0 && setid == CPUSET_INVALID)
93                 error = cpuset_getid(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, &setid);
94         if (error != 0)
95                 return (CPUSET_INVALID);
96         return (setid);
97 }
98
99 /*
100  * Run the next command associated with a jail.
101  */
102 int
103 next_command(struct cfjail *j)
104 {
105         enum intparam comparam;
106         int create_failed, stopping;
107
108         if (paralimit == 0) {
109                 if (j->flags & JF_FROM_RUNQ)
110                         requeue_head(j, &runnable);
111                 else
112                         requeue(j, &runnable);
113                 return 1;
114         }
115         j->flags &= ~JF_FROM_RUNQ;
116         create_failed = (j->flags & (JF_STOP | JF_FAILED)) == JF_FAILED;
117         stopping = (j->flags & JF_STOP) != 0;
118         comparam = *j->comparam;
119         for (;;) {
120                 if (j->comstring == NULL) {
121                         j->comparam += create_failed ? -1 : 1;
122                         switch ((comparam = *j->comparam)) {
123                         case IP__NULL:
124                                 return 0;
125                         case IP_MOUNT_DEVFS:
126                                 if (!bool_param(j->intparams[IP_MOUNT_DEVFS]))
127                                         continue;
128                                 j->comstring = &dummystring;
129                                 break;
130                         case IP_MOUNT_FDESCFS:
131                                 if (!bool_param(j->intparams[IP_MOUNT_FDESCFS]))
132                                         continue;
133                                 j->comstring = &dummystring;
134                                 break;
135                         case IP_MOUNT_PROCFS:
136                                 if (!bool_param(j->intparams[IP_MOUNT_PROCFS]))
137                                         continue;
138                                 j->comstring = &dummystring;
139                                 break;
140                         case IP__OP:
141                         case IP_STOP_TIMEOUT:
142                                 j->comstring = &dummystring;
143                                 break;
144                         default:
145                                 if (j->intparams[comparam] == NULL)
146                                         continue;
147                                 j->comstring = create_failed || (stopping &&
148                                     (j->intparams[comparam]->flags & PF_REV))
149                                     ? TAILQ_LAST(&j->intparams[comparam]->val,
150                                         cfstrings)
151                                     : TAILQ_FIRST(&j->intparams[comparam]->val);
152                         }
153                 } else {
154                         j->comstring = j->comstring == &dummystring ? NULL :
155                             create_failed || (stopping &&
156                             (j->intparams[comparam]->flags & PF_REV))
157                             ? TAILQ_PREV(j->comstring, cfstrings, tq)
158                             : TAILQ_NEXT(j->comstring, tq);
159                 }
160                 if (j->comstring == NULL || j->comstring->len == 0 ||
161                     (create_failed && (comparam == IP_EXEC_PRESTART ||
162                     comparam == IP_EXEC_CREATED || comparam == IP_EXEC_START ||
163                     comparam == IP_COMMAND || comparam == IP_EXEC_POSTSTART ||
164                     comparam == IP_EXEC_PREPARE)))
165                         continue;
166                 switch (run_command(j)) {
167                 case -1:
168                         failed(j);
169                         /* FALLTHROUGH */
170                 case 1:
171                         return 1;
172                 }
173         }
174 }
175
176 /*
177  * Check command exit status
178  */
179 int
180 finish_command(struct cfjail *j)
181 {
182         struct cfjail *rj;
183         int error;
184
185         if (!(j->flags & JF_SLEEPQ))
186                 return 0;
187         j->flags &= ~JF_SLEEPQ;
188         if (*j->comparam == IP_STOP_TIMEOUT) {
189                 j->flags &= ~JF_TIMEOUT;
190                 j->pstatus = 0;
191                 return 0;
192         }
193         paralimit++;
194         if (!TAILQ_EMPTY(&runnable)) {
195                 rj = TAILQ_FIRST(&runnable);
196                 rj->flags |= JF_FROM_RUNQ;
197                 requeue(rj, &ready);
198         }
199         error = 0;
200         if (j->flags & JF_TIMEOUT) {
201                 j->flags &= ~JF_TIMEOUT;
202                 if (*j->comparam != IP_STOP_TIMEOUT) {
203                         jail_warnx(j, "%s: timed out", j->comline);
204                         failed(j);
205                         error = -1;
206                 } else if (verbose > 0)
207                         jail_note(j, "timed out\n");
208         } else if (j->pstatus != 0) {
209                 if (WIFSIGNALED(j->pstatus))
210                         jail_warnx(j, "%s: exited on signal %d",
211                             j->comline, WTERMSIG(j->pstatus));
212                 else
213                         jail_warnx(j, "%s: failed", j->comline);
214                 j->pstatus = 0;
215                 failed(j);
216                 error = -1;
217         }
218         free(j->comline);
219         j->comline = NULL;
220         return error;
221 }
222
223 /*
224  * Check for finished processes or timeouts.
225  */
226 struct cfjail *
227 next_proc(int nonblock)
228 {
229         struct kevent ke;
230         struct timespec ts;
231         struct timespec *tsp;
232         struct cfjail *j;
233
234         if (!TAILQ_EMPTY(&sleeping)) {
235         again:
236                 tsp = NULL;
237                 if ((j = TAILQ_FIRST(&sleeping)) && j->timeout.tv_sec) {
238                         clock_gettime(CLOCK_REALTIME, &ts);
239                         ts.tv_sec = j->timeout.tv_sec - ts.tv_sec;
240                         ts.tv_nsec = j->timeout.tv_nsec - ts.tv_nsec;
241                         if (ts.tv_nsec < 0) {
242                                 ts.tv_sec--;
243                                 ts.tv_nsec += 1000000000;
244                         }
245                         if (ts.tv_sec < 0 ||
246                             (ts.tv_sec == 0 && ts.tv_nsec == 0)) {
247                                 j->flags |= JF_TIMEOUT;
248                                 clear_procs(j);
249                                 return j;
250                         }
251                         tsp = &ts;
252                 }
253                 if (nonblock) {
254                         ts.tv_sec = 0;
255                         ts.tv_nsec = 0;
256                         tsp = &ts;
257                 }
258                 switch (kevent(kq, NULL, 0, &ke, 1, tsp)) {
259                 case -1:
260                         if (errno != EINTR)
261                                 err(1, "kevent");
262                         goto again;
263                 case 0:
264                         if (!nonblock) {
265                                 j = TAILQ_FIRST(&sleeping);
266                                 j->flags |= JF_TIMEOUT;
267                                 clear_procs(j);
268                                 return j;
269                         }
270                         break;
271                 case 1:
272                         (void)waitpid(ke.ident, NULL, WNOHANG);
273                         if ((j = find_proc(ke.ident))) {
274                                 j->pstatus = ke.data;
275                                 return j;
276                         }
277                         goto again;
278                 }
279         }
280         return NULL;
281 }
282
283 /*
284  * Run a single command for a jail, possibly inside the jail.
285  */
286 static int
287 run_command(struct cfjail *j)
288 {
289         const struct passwd *pwd;
290         const struct cfstring *comstring, *s;
291         login_cap_t *lcap;
292         const char **argv;
293         char *acs, *cs, *comcs, *devpath;
294         const char *jidstr, *conslog, *fmt, *path, *ruleset, *term, *username;
295         enum intparam comparam;
296         size_t comlen, ret;
297         pid_t pid;
298         cpusetid_t setid;
299         int argc, bg, clean, consfd, down, fib, i, injail, sjuser, timeout;
300 #if defined(INET) || defined(INET6)
301         char *addr, *extrap, *p, *val;
302 #endif
303
304         static char *cleanenv;
305
306         /* Perform some operations that aren't actually commands */
307         comparam = *j->comparam;
308         down = j->flags & (JF_STOP | JF_FAILED);
309         switch (comparam) {
310         case IP_STOP_TIMEOUT:
311                 return term_procs(j);
312
313         case IP__OP:
314                 if (down) {
315                         if (jail_remove(j->jid) < 0 && errno == EPERM) {
316                                 jail_warnx(j, "jail_remove: %s",
317                                            strerror(errno));
318                                 return -1;
319                         }
320                         if (verbose > 0 || (verbose == 0 && (j->flags & JF_STOP
321                             ? note_remove : j->name != NULL)))
322                             jail_note(j, "removed\n");
323                         j->jid = -1;
324                         if (j->flags & JF_STOP)
325                                 dep_done(j, DF_LIGHT);
326                         else
327                                 j->flags &= ~JF_PERSIST;
328                 } else {
329                         if (create_jail(j) < 0)
330                                 return -1;
331                         if (iflag)
332                                 printf("%d\n", j->jid);
333                         if (verbose >= 0 && (j->name || verbose > 0))
334                                 jail_note(j, "created\n");
335                         dep_done(j, DF_LIGHT);
336                 }
337                 return 0;
338
339         default: ;
340         }
341         /*
342          * Collect exec arguments.  Internal commands for network and
343          * mounting build their own argument lists.
344          */
345         comstring = j->comstring;
346         bg = 0;
347         switch (comparam) {
348 #ifdef INET
349         case IP__IP4_IFADDR:
350                 argc = 0;
351                 val = alloca(strlen(comstring->s) + 1);
352                 strcpy(val, comstring->s);
353                 cs = val;
354                 extrap = NULL;
355                 while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
356                         if (extrap == NULL) {
357                                 *p = '\0';
358                                 extrap = p + 1;
359                         }
360                         cs = p + 1;
361                         argc++;
362                 }
363
364                 argv = alloca((8 + argc) * sizeof(char *));
365                 argv[0] = _PATH_IFCONFIG;
366                 if ((cs = strchr(val, '|'))) {
367                         argv[1] = acs = alloca(cs - val + 1);
368                         strlcpy(acs, val, cs - val + 1);
369                         addr = cs + 1;
370                 } else {
371                         argv[1] = string_param(j->intparams[IP_INTERFACE]);
372                         addr = val;
373                 }
374                 argv[2] = "inet";
375                 if (!(cs = strchr(addr, '/'))) {
376                         argv[3] = addr;
377                         argv[4] = "netmask";
378                         argv[5] = "255.255.255.255";
379                         argc = 6;
380                 } else if (strchr(cs + 1, '.')) {
381                         argv[3] = acs = alloca(cs - addr + 1);
382                         strlcpy(acs, addr, cs - addr + 1);
383                         argv[4] = "netmask";
384                         argv[5] = cs + 1;
385                         argc = 6;
386                 } else {
387                         argv[3] = addr;
388                         argc = 4;
389                 }
390
391                 if (!down && extrap != NULL) {
392                         for (cs = strtok(extrap, " "); cs;
393                              cs = strtok(NULL, " ")) {
394                                 size_t len = strlen(cs) + 1;
395                                 argv[argc++] = acs = alloca(len);
396                                 strlcpy(acs, cs, len);
397                         }
398                 }
399
400                 argv[argc] = down ? "-alias" : "alias";
401                 argv[argc + 1] = NULL;
402                 break;
403 #endif
404
405 #ifdef INET6
406         case IP__IP6_IFADDR:
407                 argc = 0;
408                 val = alloca(strlen(comstring->s) + 1);
409                 strcpy(val, comstring->s);
410                 cs = val;
411                 extrap = NULL;
412                 while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
413                         if (extrap == NULL) {
414                                 *p = '\0';
415                                 extrap = p + 1;
416                         }
417                         cs = p + 1;
418                         argc++;
419                 }
420
421                 argv = alloca((8 + argc) * sizeof(char *));
422                 argv[0] = _PATH_IFCONFIG;
423                 if ((cs = strchr(val, '|'))) {
424                         argv[1] = acs = alloca(cs - val + 1);
425                         strlcpy(acs, val, cs - val + 1);
426                         addr = cs + 1;
427                 } else {
428                         argv[1] = string_param(j->intparams[IP_INTERFACE]);
429                         addr = val;
430                 }
431                 argv[2] = "inet6";
432                 argv[3] = addr;
433                 if (!(cs = strchr(addr, '/'))) {
434                         argv[4] = "prefixlen";
435                         argv[5] = "128";
436                         argc = 6;
437                 } else
438                         argc = 4;
439
440                 if (!down && extrap != NULL) {
441                         for (cs = strtok(extrap, " "); cs;
442                              cs = strtok(NULL, " ")) {
443                                 size_t len = strlen(cs) + 1;
444                                 argv[argc++] = acs = alloca(len);
445                                 strlcpy(acs, cs, len);
446                         }
447                 }
448
449                 argv[argc] = down ? "-alias" : "alias";
450                 argv[argc + 1] = NULL;
451                 break;
452 #endif
453
454         case IP_VNET_INTERFACE:
455                 argv = alloca(5 * sizeof(char *));
456                 argv[0] = _PATH_IFCONFIG;
457                 argv[1] = comstring->s;
458                 argv[2] = down ? "-vnet" : "vnet";
459                 jidstr = string_param(j->intparams[KP_JID]);
460                 argv[3] = jidstr ? jidstr : string_param(j->intparams[KP_NAME]);
461                 argv[4] = NULL;
462                 break;
463
464         case IP_MOUNT:
465         case IP__MOUNT_FROM_FSTAB:
466                 argv = alloca(8 * sizeof(char *));
467                 comcs = alloca(comstring->len + 1);
468                 strcpy(comcs, comstring->s);
469                 argc = 0;
470                 for (cs = strtok(comcs, " \t\f\v\r\n"); cs && argc < 4;
471                      cs = strtok(NULL, " \t\f\v\r\n")) {
472                         if (argc <= 1 && strunvis(cs, cs) < 0) {
473                                 jail_warnx(j, "%s: %s: fstab parse error",
474                                     j->intparams[comparam]->name, comstring->s);
475                                 return -1;
476                         }
477                         argv[argc++] = cs;
478                 }
479                 if (argc == 0)
480                         return 0;
481                 if (argc < 3) {
482                         jail_warnx(j, "%s: %s: missing information",
483                             j->intparams[comparam]->name, comstring->s);
484                         return -1;
485                 }
486                 if (check_path(j, j->intparams[comparam]->name, argv[1], 0,
487                     down ? argv[2] : NULL) < 0)
488                         return -1;
489                 if (down) {
490                         argv[4] = NULL;
491                         argv[3] = argv[1];
492                         argv[0] = "/sbin/umount";
493                 } else {
494                         if (argc == 4) {
495                                 argv[7] = NULL;
496                                 argv[6] = argv[1];
497                                 argv[5] = argv[0];
498                                 argv[4] = argv[3];
499                                 argv[3] = "-o";
500                         } else {
501                                 argv[5] = NULL;
502                                 argv[4] = argv[1];
503                                 argv[3] = argv[0];
504                         }
505                         argv[0] = _PATH_MOUNT;
506                 }
507                 argv[1] = "-t";
508                 break;
509
510         case IP_MOUNT_DEVFS:
511                 argv = alloca(7 * sizeof(char *));
512                 path = string_param(j->intparams[KP_PATH]);
513                 if (path == NULL) {
514                         jail_warnx(j, "mount.devfs: no jail root path defined");
515                         return -1;
516                 }
517                 devpath = alloca(strlen(path) + 5);
518                 sprintf(devpath, "%s/dev", path);
519                 if (check_path(j, "mount.devfs", devpath, 0,
520                     down ? "devfs" : NULL) < 0)
521                         return -1;
522                 if (down) {
523                         argv[0] = "/sbin/umount";
524                         argv[1] = devpath;
525                         argv[2] = NULL;
526                 } else {
527                         argv[0] = _PATH_MOUNT;
528                         argv[1] = "-t";
529                         argv[2] = "devfs";
530                         ruleset = string_param(j->intparams[KP_DEVFS_RULESET]);
531                         if (!ruleset)
532                             ruleset = "4";      /* devfsrules_jail */
533                         argv[3] = acs = alloca(11 + strlen(ruleset));
534                         sprintf(acs, "-oruleset=%s", ruleset);
535                         argv[4] = ".";
536                         argv[5] = devpath;
537                         argv[6] = NULL;
538                 }
539                 break;
540
541         case IP_MOUNT_FDESCFS:
542                 argv = alloca(7 * sizeof(char *));
543                 path = string_param(j->intparams[KP_PATH]);
544                 if (path == NULL) {
545                         jail_warnx(j, "mount.fdescfs: no jail root path defined");
546                         return -1;
547                 }
548                 devpath = alloca(strlen(path) + 8);
549                 sprintf(devpath, "%s/dev/fd", path);
550                 if (check_path(j, "mount.fdescfs", devpath, 0,
551                     down ? "fdescfs" : NULL) < 0)
552                         return -1;
553                 if (down) {
554                         argv[0] = "/sbin/umount";
555                         argv[1] = devpath;
556                         argv[2] = NULL;
557                 } else {
558                         argv[0] = _PATH_MOUNT;
559                         argv[1] = "-t";
560                         argv[2] = "fdescfs";
561                         argv[3] = ".";
562                         argv[4] = devpath;
563                         argv[5] = NULL;
564                 }
565                 break;
566
567         case IP_MOUNT_PROCFS:
568                 argv = alloca(7 * sizeof(char *));
569                 path = string_param(j->intparams[KP_PATH]);
570                 if (path == NULL) {
571                         jail_warnx(j, "mount.procfs: no jail root path defined");
572                         return -1;
573                 }
574                 devpath = alloca(strlen(path) + 6);
575                 sprintf(devpath, "%s/proc", path);
576                 if (check_path(j, "mount.procfs", devpath, 0,
577                     down ? "procfs" : NULL) < 0)
578                         return -1;
579                 if (down) {
580                         argv[0] = "/sbin/umount";
581                         argv[1] = devpath;
582                         argv[2] = NULL;
583                 } else {
584                         argv[0] = _PATH_MOUNT;
585                         argv[1] = "-t";
586                         argv[2] = "procfs";
587                         argv[3] = ".";
588                         argv[4] = devpath;
589                         argv[5] = NULL;
590                 }
591                 break;
592
593         case IP_ZFS_DATASET:
594                 argv = alloca(4 * sizeof(char *));
595                 jidstr = string_param(j->intparams[KP_JID]) ?
596                     string_param(j->intparams[KP_JID]) :
597                     string_param(j->intparams[KP_NAME]);
598                 fmt = "if [ $(/sbin/zfs get -H -o value jailed %s) = on ]; then /sbin/zfs jail %s %s || echo error, attaching %s to jail %s failed; else echo error, you need to set jailed=on for dataset %s; fi";
599                 comlen = strlen(fmt)
600                     + 2 * strlen(jidstr)
601                     + 4 * comstring->len
602                     - 6 * 2     /* 6 * "%s" */
603                     + 1;
604                 comcs = alloca(comlen);
605                 ret = snprintf(comcs, comlen, fmt, comstring->s,
606                     jidstr, comstring->s, comstring->s, jidstr,
607                     comstring->s);
608                 if (ret >= comlen) {
609                         jail_warnx(j, "internal error in ZFS dataset handling");
610                         exit(1);
611                 }
612                 argv[0] = _PATH_BSHELL;
613                 argv[1] = "-c";
614                 argv[2] = comcs;
615                 argv[3] = NULL;
616                 break;
617
618         case IP_COMMAND:
619                 if (j->name != NULL)
620                         goto default_command;
621                 argc = 0;
622                 TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
623                         argc++;
624                 argv = alloca((argc + 1) * sizeof(char *));
625                 argc = 0;
626                 TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
627                         argv[argc++] = s->s;
628                 argv[argc] = NULL;
629                 j->comstring = &dummystring;
630                 break;
631
632         default:
633         default_command:
634                 if ((cs = strpbrk(comstring->s, "!\"$&'()*;<>?[\\]`{|}~")) &&
635                     !(cs[0] == '&' && cs[1] == '\0')) {
636                         argv = alloca(4 * sizeof(char *));
637                         argv[0] = _PATH_BSHELL;
638                         argv[1] = "-c";
639                         argv[2] = comstring->s;
640                         argv[3] = NULL;
641                 } else {
642                         if (cs) {
643                                 *cs = 0;
644                                 bg = 1;
645                         }
646                         comcs = alloca(comstring->len + 1);
647                         strcpy(comcs, comstring->s);
648                         argc = 0;
649                         for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
650                              cs = strtok(NULL, " \t\f\v\r\n"))
651                                 argc++;
652                         argv = alloca((argc + 1) * sizeof(char *));
653                         strcpy(comcs, comstring->s);
654                         argc = 0;
655                         for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
656                              cs = strtok(NULL, " \t\f\v\r\n"))
657                                 argv[argc++] = cs;
658                         argv[argc] = NULL;
659                 }
660         }
661         if (argv[0] == NULL)
662                 return 0;
663
664         if (int_param(j->intparams[IP_EXEC_TIMEOUT], &timeout) &&
665             timeout != 0) {
666                 clock_gettime(CLOCK_REALTIME, &j->timeout);
667                 j->timeout.tv_sec += timeout;
668         } else
669                 j->timeout.tv_sec = 0;
670
671         injail = comparam == IP_EXEC_START || comparam == IP_COMMAND ||
672             comparam == IP_EXEC_STOP;
673         if (injail)
674                 setid = root_cpuset_id();
675         else
676                 setid = CPUSET_INVALID;
677         clean = bool_param(j->intparams[IP_EXEC_CLEAN]);
678         username = string_param(j->intparams[injail
679             ? IP_EXEC_JAIL_USER : IP_EXEC_SYSTEM_USER]);
680         sjuser = bool_param(j->intparams[IP_EXEC_SYSTEM_JAIL_USER]);
681
682         consfd = 0;
683         if (injail &&
684             (conslog = string_param(j->intparams[IP_EXEC_CONSOLELOG]))) {
685                 if (check_path(j, "exec.consolelog", conslog, 1, NULL) < 0)
686                         return -1;
687                 consfd =
688                     open(conslog, O_WRONLY | O_CREAT | O_APPEND, DEFFILEMODE);
689                 if (consfd < 0) {
690                         jail_warnx(j, "open %s: %s", conslog, strerror(errno));
691                         return -1;
692                 }
693         }
694
695         comlen = 0;
696         for (i = 0; argv[i]; i++)
697                 comlen += strlen(argv[i]) + 1;
698         j->comline = cs = emalloc(comlen);
699         for (i = 0; argv[i]; i++) {
700                 strcpy(cs, argv[i]);
701                 if (argv[i + 1]) {
702                         cs += strlen(argv[i]) + 1;
703                         cs[-1] = ' ';
704                 }
705         }
706         if (verbose > 0)
707                 jail_note(j, "run command%s%s%s: %s\n",
708                     injail ? " in jail" : "", username ? " as " : "",
709                     username ? username : "", j->comline);
710
711         pid = fork();
712         if (pid < 0)
713                 err(1, "fork");
714         if (pid > 0) {
715                 if (bg || !add_proc(j, pid)) {
716                         free(j->comline);
717                         j->comline = NULL;
718                         return 0;
719                 } else {
720                         paralimit--;
721                         return 1;
722                 }
723         }
724         if (bg)
725                 setsid();
726
727         /* Set up the environment and run the command */
728         pwd = NULL;
729         lcap = NULL;
730         if ((clean || username) && injail && sjuser &&
731             get_user_info(j, username, &pwd, &lcap) < 0)
732                 exit(1);
733         if (injail) {
734                 /* jail_attach won't chdir along with its chroot. */
735                 path = string_param(j->intparams[KP_PATH]);
736                 if (path && chdir(path) < 0) {
737                         jail_warnx(j, "chdir %s: %s", path, strerror(errno));
738                         exit(1);
739                 }
740                 if (int_param(j->intparams[IP_EXEC_FIB], &fib) &&
741                     setfib(fib) < 0) {
742                         jail_warnx(j, "setfib: %s", strerror(errno));
743                         exit(1);
744                 }
745
746                 /*
747                  * We wouldn't have specialized our affinity, so just setid to
748                  * root.  We do this prior to attaching to avoid the kernel
749                  * having to create a transient cpuset that we'll promptly
750                  * free up with a reset to the jail's cpuset.
751                  *
752                  * This is just a best-effort to use as wide of mask as
753                  * possible.
754                  */
755                 if (setid != CPUSET_INVALID)
756                         (void)cpuset_setid(CPU_WHICH_PID, -1, setid);
757
758                 if (jail_attach(j->jid) < 0) {
759                         jail_warnx(j, "jail_attach: %s", strerror(errno));
760                         exit(1);
761                 }
762         }
763         if (clean || username) {
764                 if (!(injail && sjuser) &&
765                     get_user_info(j, username, &pwd, &lcap) < 0)
766                         exit(1);
767                 if (clean) {
768                         term = getenv("TERM");
769                         environ = &cleanenv;
770                         setenv("PATH", "/bin:/usr/bin", 0);
771                         if (term != NULL)
772                                 setenv("TERM", term, 1);
773                 }
774                 if (setgid(pwd->pw_gid) < 0) {
775                         jail_warnx(j, "setgid %d: %s", pwd->pw_gid,
776                             strerror(errno));
777                         exit(1);
778                 }
779                 if (setusercontext(lcap, pwd, pwd->pw_uid, username
780                     ? LOGIN_SETALL & ~LOGIN_SETGROUP & ~LOGIN_SETLOGIN
781                     : LOGIN_SETPATH | LOGIN_SETENV) < 0) {
782                         jail_warnx(j, "setusercontext %s: %s", pwd->pw_name,
783                             strerror(errno));
784                         exit(1);
785                 }
786                 login_close(lcap);
787                 setenv("USER", pwd->pw_name, 1);
788                 setenv("HOME", pwd->pw_dir, 1);
789                 setenv("SHELL",
790                     *pwd->pw_shell ? pwd->pw_shell : _PATH_BSHELL, 1);
791                 if (clean && chdir(pwd->pw_dir) < 0) {
792                         jail_warnx(j, "chdir %s: %s",
793                             pwd->pw_dir, strerror(errno));
794                         exit(1);
795                 }
796                 endpwent();
797         }
798
799         if (consfd != 0 && (dup2(consfd, 1) < 0 || dup2(consfd, 2) < 0)) {
800                 jail_warnx(j, "exec.consolelog: %s", strerror(errno));
801                 exit(1);
802         }
803         closefrom(3);
804         execvp(argv[0], __DECONST(char *const*, argv));
805         jail_warnx(j, "exec %s: %s", argv[0], strerror(errno));
806         exit(1);
807 }
808
809 /*
810  * Add a process to the hash, tied to a jail.
811  */
812 static int
813 add_proc(struct cfjail *j, pid_t pid)
814 {
815         struct kevent ke;
816         struct cfjail *tj;
817         struct phash *ph;
818
819         if (!kq && (kq = kqueue()) < 0)
820                 err(1, "kqueue");
821         EV_SET(&ke, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
822         if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
823                 if (errno == ESRCH)
824                         return 0;
825                 err(1, "kevent");
826         }
827         ph = emalloc(sizeof(struct phash));
828         ph->j = j;
829         ph->pid = pid;
830         LIST_INSERT_HEAD(&phash[pid % PHASH_SIZE], ph, le);
831         j->nprocs++;
832         j->flags |= JF_SLEEPQ;
833         if (j->timeout.tv_sec == 0)
834                 requeue(j, &sleeping);
835         else {
836                 /* File the jail in the sleep queue according to its timeout. */
837                 TAILQ_REMOVE(j->queue, j, tq);
838                 TAILQ_FOREACH(tj, &sleeping, tq) {
839                         if (!tj->timeout.tv_sec ||
840                             j->timeout.tv_sec < tj->timeout.tv_sec ||
841                             (j->timeout.tv_sec == tj->timeout.tv_sec &&
842                             j->timeout.tv_nsec <= tj->timeout.tv_nsec)) {
843                                 TAILQ_INSERT_BEFORE(tj, j, tq);
844                                 break;
845                         }
846                 }
847                 if (tj == NULL)
848                         TAILQ_INSERT_TAIL(&sleeping, j, tq);
849                 j->queue = &sleeping;
850         }
851         return 1;
852 }
853
854 /*
855  * Remove any processes from the hash that correspond to a jail.
856  */
857 static void
858 clear_procs(struct cfjail *j)
859 {
860         struct kevent ke;
861         struct phash *ph, *tph;
862         int i;
863
864         j->nprocs = 0;
865         for (i = 0; i < PHASH_SIZE; i++)
866                 LIST_FOREACH_SAFE(ph, &phash[i], le, tph)
867                         if (ph->j == j) {
868                                 EV_SET(&ke, ph->pid, EVFILT_PROC, EV_DELETE,
869                                     NOTE_EXIT, 0, NULL);
870                                 (void)kevent(kq, &ke, 1, NULL, 0, NULL);
871                                 LIST_REMOVE(ph, le);
872                                 free(ph);
873                         }
874 }
875
876 /*
877  * Find the jail that corresponds to an exited process.
878  */
879 static struct cfjail *
880 find_proc(pid_t pid)
881 {
882         struct cfjail *j;
883         struct phash *ph;
884
885         LIST_FOREACH(ph, &phash[pid % PHASH_SIZE], le)
886                 if (ph->pid == pid) {
887                         j = ph->j;
888                         LIST_REMOVE(ph, le);
889                         free(ph);
890                         return --j->nprocs ? NULL : j;
891                 }
892         return NULL;
893 }
894
895 /*
896  * Send SIGTERM to all processes in a jail and wait for them to die.
897  */
898 static int
899 term_procs(struct cfjail *j)
900 {
901         struct kinfo_proc *ki;
902         int i, noted, pcnt, timeout;
903
904         static kvm_t *kd;
905
906         if (!int_param(j->intparams[IP_STOP_TIMEOUT], &timeout))
907                 timeout = DEFAULT_STOP_TIMEOUT;
908         else if (timeout == 0)
909                 return 0;
910
911         if (kd == NULL) {
912                 kd = kvm_open(NULL, NULL, NULL, O_RDONLY, NULL);
913                 if (kd == NULL)
914                         return 0;
915         }
916
917         ki = kvm_getprocs(kd, KERN_PROC_PROC, 0, &pcnt);
918         if (ki == NULL)
919                 return 0;
920         noted = 0;
921         for (i = 0; i < pcnt; i++)
922                 if (ki[i].ki_jid == j->jid &&
923                     kill(ki[i].ki_pid, SIGTERM) == 0) {
924                         (void)add_proc(j, ki[i].ki_pid);
925                         if (verbose > 0) {
926                                 if (!noted) {
927                                         noted = 1;
928                                         jail_note(j, "sent SIGTERM to:");
929                                 }
930                                 printf(" %d", ki[i].ki_pid);
931                         }
932                 }
933         if (noted)
934                 printf("\n");
935         if (j->nprocs > 0) {
936                 clock_gettime(CLOCK_REALTIME, &j->timeout);
937                 j->timeout.tv_sec += timeout;
938                 return 1;
939         }
940         return 0;
941 }
942
943 /*
944  * Look up a user in the passwd and login.conf files.
945  */
946 static int
947 get_user_info(struct cfjail *j, const char *username,
948     const struct passwd **pwdp, login_cap_t **lcapp)
949 {
950         const struct passwd *pwd;
951
952         errno = 0;
953         *pwdp = pwd = username ? getpwnam(username) : getpwuid(getuid());
954         if (pwd == NULL) {
955                 if (errno)
956                         jail_warnx(j, "getpwnam%s%s: %s", username ? " " : "",
957                             username ? username : "", strerror(errno));
958                 else if (username)
959                         jail_warnx(j, "%s: no such user", username);
960                 else
961                         jail_warnx(j, "unknown uid %d", getuid());
962                 return -1;
963         }
964         *lcapp = login_getpwclass(pwd);
965         if (*lcapp == NULL) {
966                 jail_warnx(j, "getpwclass %s: %s", pwd->pw_name,
967                     strerror(errno));
968                 return -1;
969         }
970         /* Set the groups while the group file is still available */
971         if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) {
972                 jail_warnx(j, "initgroups %s: %s", pwd->pw_name,
973                     strerror(errno));
974                 return -1;
975         }
976         return 0;
977 }
978
979 /*
980  * Make sure a mount or consolelog path is a valid absolute pathname
981  * with no symlinks.
982  */
983 static int
984 check_path(struct cfjail *j, const char *pname, const char *path, int isfile,
985     const char *umount_type)
986 {
987         struct stat st, mpst;
988         struct statfs stfs;
989         char *tpath, *p;
990         const char *jailpath;
991         size_t jplen;
992
993         if (path[0] != '/') {
994                 jail_warnx(j, "%s: %s: not an absolute pathname",
995                     pname, path);
996                 return -1;
997         }
998         /*
999          * Only check for symlinks in components below the jail's path,
1000          * since that's where the security risk lies.
1001          */
1002         jailpath = string_param(j->intparams[KP_PATH]);
1003         if (jailpath == NULL)
1004                 jailpath = "";
1005         jplen = strlen(jailpath);
1006         if (!strncmp(path, jailpath, jplen) && path[jplen] == '/') {
1007                 tpath = alloca(strlen(path) + 1);
1008                 strcpy(tpath, path);
1009                 for (p = tpath + jplen; p != NULL; ) {
1010                         p = strchr(p + 1, '/');
1011                         if (p)
1012                                 *p = '\0';
1013                         if (lstat(tpath, &st) < 0) {
1014                                 if (errno == ENOENT && isfile && !p)
1015                                         break;
1016                                 jail_warnx(j, "%s: %s: %s", pname, tpath,
1017                                     strerror(errno));
1018                                 return -1;
1019                         }
1020                         if (S_ISLNK(st.st_mode)) {
1021                                 jail_warnx(j, "%s: %s is a symbolic link",
1022                                     pname, tpath);
1023                                 return -1;
1024                         }
1025                         if (p)
1026                                 *p = '/';
1027                 }
1028         }
1029         if (umount_type != NULL) {
1030                 if (stat(path, &st) < 0 || statfs(path, &stfs) < 0) {
1031                         jail_warnx(j, "%s: %s: %s", pname, path,
1032                             strerror(errno));
1033                         return -1;
1034                 }
1035                 if (stat(stfs.f_mntonname, &mpst) < 0) {
1036                         jail_warnx(j, "%s: %s: %s", pname, stfs.f_mntonname,
1037                             strerror(errno));
1038                         return -1;
1039                 }
1040                 if (st.st_ino != mpst.st_ino) {
1041                         jail_warnx(j, "%s: %s: not a mount point",
1042                             pname, path);
1043                         return -1;
1044                 }
1045                 if (strcmp(stfs.f_fstypename, umount_type)) {
1046                         jail_warnx(j, "%s: %s: not a %s mount",
1047                             pname, path, umount_type);
1048                         return -1;
1049                 }
1050         }
1051         return 0;
1052 }