]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/jail/command.c
jail(8): reset to root cpuset before attaching to run commands
[FreeBSD/FreeBSD.git] / usr.sbin / jail / command.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 James Gritton
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/types.h>
33 #include <sys/cpuset.h>
34 #include <sys/event.h>
35 #include <sys/mount.h>
36 #include <sys/stat.h>
37 #include <sys/sysctl.h>
38 #include <sys/user.h>
39 #include <sys/wait.h>
40
41 #include <err.h>
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <kvm.h>
45 #include <login_cap.h>
46 #include <paths.h>
47 #include <pwd.h>
48 #include <signal.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <unistd.h>
53 #include <vis.h>
54
55 #include "jailp.h"
56
57 #define DEFAULT_STOP_TIMEOUT    10
58 #define PHASH_SIZE              256
59
60 LIST_HEAD(phhead, phash);
61
62 struct phash {
63         LIST_ENTRY(phash)       le;
64         struct cfjail           *j;
65         pid_t                   pid;
66 };
67
68 int paralimit = -1;
69
70 extern char **environ;
71
72 static int run_command(struct cfjail *j);
73 static int add_proc(struct cfjail *j, pid_t pid);
74 static void clear_procs(struct cfjail *j);
75 static struct cfjail *find_proc(pid_t pid);
76 static int term_procs(struct cfjail *j);
77 static int get_user_info(struct cfjail *j, const char *username,
78     const struct passwd **pwdp, login_cap_t **lcapp);
79 static int check_path(struct cfjail *j, const char *pname, const char *path,
80     int isfile, const char *umount_type);
81
82 static struct cfjails sleeping = TAILQ_HEAD_INITIALIZER(sleeping);
83 static struct cfjails runnable = TAILQ_HEAD_INITIALIZER(runnable);
84 static struct cfstring dummystring = { .len = 1 };
85 static struct phhead phash[PHASH_SIZE];
86 static int kq;
87
88 static cpusetid_t
89 root_cpuset_id(void)
90 {
91         static cpusetid_t setid = CPUSET_INVALID;
92         static int error;
93
94         /* Only try to get the cpuset once. */
95         if (error == 0 && setid == CPUSET_INVALID)
96                 error = cpuset_getid(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, &setid);
97         if (error != 0)
98                 return (CPUSET_INVALID);
99         return (setid);
100 }
101
102 /*
103  * Run the next command associated with a jail.
104  */
105 int
106 next_command(struct cfjail *j)
107 {
108         enum intparam comparam;
109         int create_failed, stopping;
110
111         if (paralimit == 0) {
112                 if (j->flags & JF_FROM_RUNQ)
113                         requeue_head(j, &runnable);
114                 else
115                         requeue(j, &runnable);
116                 return 1;
117         }
118         j->flags &= ~JF_FROM_RUNQ;
119         create_failed = (j->flags & (JF_STOP | JF_FAILED)) == JF_FAILED;
120         stopping = (j->flags & JF_STOP) != 0;
121         comparam = *j->comparam;
122         for (;;) {
123                 if (j->comstring == NULL) {
124                         j->comparam += create_failed ? -1 : 1;
125                         switch ((comparam = *j->comparam)) {
126                         case IP__NULL:
127                                 return 0;
128                         case IP_MOUNT_DEVFS:
129                                 if (!bool_param(j->intparams[IP_MOUNT_DEVFS]))
130                                         continue;
131                                 j->comstring = &dummystring;
132                                 break;
133                         case IP_MOUNT_FDESCFS:
134                                 if (!bool_param(j->intparams[IP_MOUNT_FDESCFS]))
135                                         continue;
136                                 j->comstring = &dummystring;
137                                 break;
138                         case IP_MOUNT_PROCFS:
139                                 if (!bool_param(j->intparams[IP_MOUNT_PROCFS]))
140                                         continue;
141                                 j->comstring = &dummystring;
142                                 break;
143                         case IP__OP:
144                         case IP_STOP_TIMEOUT:
145                                 j->comstring = &dummystring;
146                                 break;
147                         default:
148                                 if (j->intparams[comparam] == NULL)
149                                         continue;
150                                 j->comstring = create_failed || (stopping &&
151                                     (j->intparams[comparam]->flags & PF_REV))
152                                     ? TAILQ_LAST(&j->intparams[comparam]->val,
153                                         cfstrings)
154                                     : TAILQ_FIRST(&j->intparams[comparam]->val);
155                         }
156                 } else {
157                         j->comstring = j->comstring == &dummystring ? NULL :
158                             create_failed || (stopping &&
159                             (j->intparams[comparam]->flags & PF_REV))
160                             ? TAILQ_PREV(j->comstring, cfstrings, tq)
161                             : TAILQ_NEXT(j->comstring, tq);
162                 }
163                 if (j->comstring == NULL || j->comstring->len == 0 ||
164                     (create_failed && (comparam == IP_EXEC_PRESTART ||
165                     comparam == IP_EXEC_CREATED || comparam == IP_EXEC_START ||
166                     comparam == IP_COMMAND || comparam == IP_EXEC_POSTSTART ||
167                     comparam == IP_EXEC_PREPARE)))
168                         continue;
169                 switch (run_command(j)) {
170                 case -1:
171                         failed(j);
172                         /* FALLTHROUGH */
173                 case 1:
174                         return 1;
175                 }
176         }
177 }
178
179 /*
180  * Check command exit status
181  */
182 int
183 finish_command(struct cfjail *j)
184 {
185         struct cfjail *rj;
186         int error;
187
188         if (!(j->flags & JF_SLEEPQ))
189                 return 0;
190         j->flags &= ~JF_SLEEPQ;
191         if (*j->comparam == IP_STOP_TIMEOUT) {
192                 j->flags &= ~JF_TIMEOUT;
193                 j->pstatus = 0;
194                 return 0;
195         }
196         paralimit++;
197         if (!TAILQ_EMPTY(&runnable)) {
198                 rj = TAILQ_FIRST(&runnable);
199                 rj->flags |= JF_FROM_RUNQ;
200                 requeue(rj, &ready);
201         }
202         error = 0;
203         if (j->flags & JF_TIMEOUT) {
204                 j->flags &= ~JF_TIMEOUT;
205                 if (*j->comparam != IP_STOP_TIMEOUT) {
206                         jail_warnx(j, "%s: timed out", j->comline);
207                         failed(j);
208                         error = -1;
209                 } else if (verbose > 0)
210                         jail_note(j, "timed out\n");
211         } else if (j->pstatus != 0) {
212                 if (WIFSIGNALED(j->pstatus))
213                         jail_warnx(j, "%s: exited on signal %d",
214                             j->comline, WTERMSIG(j->pstatus));
215                 else
216                         jail_warnx(j, "%s: failed", j->comline);
217                 j->pstatus = 0;
218                 failed(j);
219                 error = -1;
220         }
221         free(j->comline);
222         j->comline = NULL;
223         return error;
224 }
225
226 /*
227  * Check for finished processes or timeouts.
228  */
229 struct cfjail *
230 next_proc(int nonblock)
231 {
232         struct kevent ke;
233         struct timespec ts;
234         struct timespec *tsp;
235         struct cfjail *j;
236
237         if (!TAILQ_EMPTY(&sleeping)) {
238         again:
239                 tsp = NULL;
240                 if ((j = TAILQ_FIRST(&sleeping)) && j->timeout.tv_sec) {
241                         clock_gettime(CLOCK_REALTIME, &ts);
242                         ts.tv_sec = j->timeout.tv_sec - ts.tv_sec;
243                         ts.tv_nsec = j->timeout.tv_nsec - ts.tv_nsec;
244                         if (ts.tv_nsec < 0) {
245                                 ts.tv_sec--;
246                                 ts.tv_nsec += 1000000000;
247                         }
248                         if (ts.tv_sec < 0 ||
249                             (ts.tv_sec == 0 && ts.tv_nsec == 0)) {
250                                 j->flags |= JF_TIMEOUT;
251                                 clear_procs(j);
252                                 return j;
253                         }
254                         tsp = &ts;
255                 }
256                 if (nonblock) {
257                         ts.tv_sec = 0;
258                         ts.tv_nsec = 0;
259                         tsp = &ts;
260                 }
261                 switch (kevent(kq, NULL, 0, &ke, 1, tsp)) {
262                 case -1:
263                         if (errno != EINTR)
264                                 err(1, "kevent");
265                         goto again;
266                 case 0:
267                         if (!nonblock) {
268                                 j = TAILQ_FIRST(&sleeping);
269                                 j->flags |= JF_TIMEOUT;
270                                 clear_procs(j);
271                                 return j;
272                         }
273                         break;
274                 case 1:
275                         (void)waitpid(ke.ident, NULL, WNOHANG);
276                         if ((j = find_proc(ke.ident))) {
277                                 j->pstatus = ke.data;
278                                 return j;
279                         }
280                         goto again;
281                 }
282         }
283         return NULL;
284 }
285
286 /*
287  * Run a single command for a jail, possibly inside the jail.
288  */
289 static int
290 run_command(struct cfjail *j)
291 {
292         const struct passwd *pwd;
293         const struct cfstring *comstring, *s;
294         login_cap_t *lcap;
295         const char **argv;
296         char *acs, *cs, *comcs, *devpath;
297         const char *jidstr, *conslog, *path, *ruleset, *term, *username;
298         enum intparam comparam;
299         size_t comlen;
300         pid_t pid;
301         cpusetid_t setid;
302         int argc, bg, clean, consfd, down, fib, i, injail, sjuser, timeout;
303 #if defined(INET) || defined(INET6)
304         char *addr, *extrap, *p, *val;
305 #endif
306
307         static char *cleanenv;
308
309         /* Perform some operations that aren't actually commands */
310         comparam = *j->comparam;
311         down = j->flags & (JF_STOP | JF_FAILED);
312         switch (comparam) {
313         case IP_STOP_TIMEOUT:
314                 return term_procs(j);
315
316         case IP__OP:
317                 if (down) {
318                         if (jail_remove(j->jid) < 0 && errno == EPERM) {
319                                 jail_warnx(j, "jail_remove: %s",
320                                            strerror(errno));
321                                 return -1;
322                         }
323                         if (verbose > 0 || (verbose == 0 && (j->flags & JF_STOP
324                             ? note_remove : j->name != NULL)))
325                             jail_note(j, "removed\n");
326                         j->jid = -1;
327                         if (j->flags & JF_STOP)
328                                 dep_done(j, DF_LIGHT);
329                         else
330                                 j->flags &= ~JF_PERSIST;
331                 } else {
332                         if (create_jail(j) < 0)
333                                 return -1;
334                         if (iflag)
335                                 printf("%d\n", j->jid);
336                         if (verbose >= 0 && (j->name || verbose > 0))
337                                 jail_note(j, "created\n");
338                         dep_done(j, DF_LIGHT);
339                 }
340                 return 0;
341
342         default: ;
343         }
344         /*
345          * Collect exec arguments.  Internal commands for network and
346          * mounting build their own argument lists.
347          */
348         comstring = j->comstring;
349         bg = 0;
350         switch (comparam) {
351 #ifdef INET
352         case IP__IP4_IFADDR:
353                 argc = 0;
354                 val = alloca(strlen(comstring->s) + 1);
355                 strcpy(val, comstring->s);
356                 cs = val;
357                 extrap = NULL;
358                 while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
359                         if (extrap == NULL) {
360                                 *p = '\0';
361                                 extrap = p + 1;
362                         }
363                         cs = p + 1;
364                         argc++;
365                 }
366
367                 argv = alloca((8 + argc) * sizeof(char *));
368                 argv[0] = _PATH_IFCONFIG;
369                 if ((cs = strchr(val, '|'))) {
370                         argv[1] = acs = alloca(cs - val + 1);
371                         strlcpy(acs, val, cs - val + 1);
372                         addr = cs + 1;
373                 } else {
374                         argv[1] = string_param(j->intparams[IP_INTERFACE]);
375                         addr = val;
376                 }
377                 argv[2] = "inet";
378                 if (!(cs = strchr(addr, '/'))) {
379                         argv[3] = addr;
380                         argv[4] = "netmask";
381                         argv[5] = "255.255.255.255";
382                         argc = 6;
383                 } else if (strchr(cs + 1, '.')) {
384                         argv[3] = acs = alloca(cs - addr + 1);
385                         strlcpy(acs, addr, cs - addr + 1);
386                         argv[4] = "netmask";
387                         argv[5] = cs + 1;
388                         argc = 6;
389                 } else {
390                         argv[3] = addr;
391                         argc = 4;
392                 }
393
394                 if (!down && extrap != NULL) {
395                         for (cs = strtok(extrap, " "); cs;
396                              cs = strtok(NULL, " ")) {
397                                 size_t len = strlen(cs) + 1;
398                                 argv[argc++] = acs = alloca(len);
399                                 strlcpy(acs, cs, len);
400                         }
401                 }
402
403                 argv[argc] = down ? "-alias" : "alias";
404                 argv[argc + 1] = NULL;
405                 break;
406 #endif
407
408 #ifdef INET6
409         case IP__IP6_IFADDR:
410                 argc = 0;
411                 val = alloca(strlen(comstring->s) + 1);
412                 strcpy(val, comstring->s);
413                 cs = val;
414                 extrap = NULL;
415                 while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
416                         if (extrap == NULL) {
417                                 *p = '\0';
418                                 extrap = p + 1;
419                         }
420                         cs = p + 1;
421                         argc++;
422                 }
423
424                 argv = alloca((8 + argc) * sizeof(char *));
425                 argv[0] = _PATH_IFCONFIG;
426                 if ((cs = strchr(val, '|'))) {
427                         argv[1] = acs = alloca(cs - val + 1);
428                         strlcpy(acs, val, cs - val + 1);
429                         addr = cs + 1;
430                 } else {
431                         argv[1] = string_param(j->intparams[IP_INTERFACE]);
432                         addr = val;
433                 }
434                 argv[2] = "inet6";
435                 argv[3] = addr;
436                 if (!(cs = strchr(addr, '/'))) {
437                         argv[4] = "prefixlen";
438                         argv[5] = "128";
439                         argc = 6;
440                 } else
441                         argc = 4;
442
443                 if (!down) {
444                         for (cs = strtok(extrap, " "); cs;
445                              cs = strtok(NULL, " ")) {
446                                 size_t len = strlen(cs) + 1;
447                                 argv[argc++] = acs = alloca(len);
448                                 strlcpy(acs, cs, len);
449                         }
450                 }
451
452                 argv[argc] = down ? "-alias" : "alias";
453                 argv[argc + 1] = NULL;
454                 break;
455 #endif
456
457         case IP_VNET_INTERFACE:
458                 argv = alloca(5 * sizeof(char *));
459                 argv[0] = _PATH_IFCONFIG;
460                 argv[1] = comstring->s;
461                 argv[2] = down ? "-vnet" : "vnet";
462                 jidstr = string_param(j->intparams[KP_JID]);
463                 argv[3] = jidstr ? jidstr : string_param(j->intparams[KP_NAME]);
464                 argv[4] = NULL;
465                 break;
466
467         case IP_MOUNT:
468         case IP__MOUNT_FROM_FSTAB:
469                 argv = alloca(8 * sizeof(char *));
470                 comcs = alloca(comstring->len + 1);
471                 strcpy(comcs, comstring->s);
472                 argc = 0;
473                 for (cs = strtok(comcs, " \t\f\v\r\n"); cs && argc < 4;
474                      cs = strtok(NULL, " \t\f\v\r\n")) {
475                         if (argc <= 1 && strunvis(cs, cs) < 0) {
476                                 jail_warnx(j, "%s: %s: fstab parse error",
477                                     j->intparams[comparam]->name, comstring->s);
478                                 return -1;
479                         }
480                         argv[argc++] = cs;
481                 }
482                 if (argc == 0)
483                         return 0;
484                 if (argc < 3) {
485                         jail_warnx(j, "%s: %s: missing information",
486                             j->intparams[comparam]->name, comstring->s);
487                         return -1;
488                 }
489                 if (check_path(j, j->intparams[comparam]->name, argv[1], 0,
490                     down ? argv[2] : NULL) < 0)
491                         return -1;
492                 if (down) {
493                         argv[4] = NULL;
494                         argv[3] = argv[1];
495                         argv[0] = "/sbin/umount";
496                 } else {
497                         if (argc == 4) {
498                                 argv[7] = NULL;
499                                 argv[6] = argv[1];
500                                 argv[5] = argv[0];
501                                 argv[4] = argv[3];
502                                 argv[3] = "-o";
503                         } else {
504                                 argv[5] = NULL;
505                                 argv[4] = argv[1];
506                                 argv[3] = argv[0];
507                         }
508                         argv[0] = _PATH_MOUNT;
509                 }
510                 argv[1] = "-t";
511                 break;
512
513         case IP_MOUNT_DEVFS:
514                 argv = alloca(7 * sizeof(char *));
515                 path = string_param(j->intparams[KP_PATH]);
516                 if (path == NULL) {
517                         jail_warnx(j, "mount.devfs: no jail root path defined");
518                         return -1;
519                 }
520                 devpath = alloca(strlen(path) + 5);
521                 sprintf(devpath, "%s/dev", path);
522                 if (check_path(j, "mount.devfs", devpath, 0,
523                     down ? "devfs" : NULL) < 0)
524                         return -1;
525                 if (down) {
526                         argv[0] = "/sbin/umount";
527                         argv[1] = devpath;
528                         argv[2] = NULL;
529                 } else {
530                         argv[0] = _PATH_MOUNT;
531                         argv[1] = "-t";
532                         argv[2] = "devfs";
533                         ruleset = string_param(j->intparams[KP_DEVFS_RULESET]);
534                         if (!ruleset)
535                             ruleset = "4";      /* devfsrules_jail */
536                         argv[3] = acs = alloca(11 + strlen(ruleset));
537                         sprintf(acs, "-oruleset=%s", ruleset);
538                         argv[4] = ".";
539                         argv[5] = devpath;
540                         argv[6] = NULL;
541                 }
542                 break;
543
544         case IP_MOUNT_FDESCFS:
545                 argv = alloca(7 * sizeof(char *));
546                 path = string_param(j->intparams[KP_PATH]);
547                 if (path == NULL) {
548                         jail_warnx(j, "mount.fdescfs: no jail root path defined");
549                         return -1;
550                 }
551                 devpath = alloca(strlen(path) + 8);
552                 sprintf(devpath, "%s/dev/fd", path);
553                 if (check_path(j, "mount.fdescfs", devpath, 0,
554                     down ? "fdescfs" : NULL) < 0)
555                         return -1;
556                 if (down) {
557                         argv[0] = "/sbin/umount";
558                         argv[1] = devpath;
559                         argv[2] = NULL;
560                 } else {
561                         argv[0] = _PATH_MOUNT;
562                         argv[1] = "-t";
563                         argv[2] = "fdescfs";
564                         argv[3] = ".";
565                         argv[4] = devpath;
566                         argv[5] = NULL;
567                 }
568                 break;
569
570         case IP_MOUNT_PROCFS:
571                 argv = alloca(7 * sizeof(char *));
572                 path = string_param(j->intparams[KP_PATH]);
573                 if (path == NULL) {
574                         jail_warnx(j, "mount.procfs: no jail root path defined");
575                         return -1;
576                 }
577                 devpath = alloca(strlen(path) + 6);
578                 sprintf(devpath, "%s/proc", path);
579                 if (check_path(j, "mount.procfs", devpath, 0,
580                     down ? "procfs" : NULL) < 0)
581                         return -1;
582                 if (down) {
583                         argv[0] = "/sbin/umount";
584                         argv[1] = devpath;
585                         argv[2] = NULL;
586                 } else {
587                         argv[0] = _PATH_MOUNT;
588                         argv[1] = "-t";
589                         argv[2] = "procfs";
590                         argv[3] = ".";
591                         argv[4] = devpath;
592                         argv[5] = NULL;
593                 }
594                 break;
595
596         case IP_COMMAND:
597                 if (j->name != NULL)
598                         goto default_command;
599                 argc = 0;
600                 TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
601                         argc++;
602                 argv = alloca((argc + 1) * sizeof(char *));
603                 argc = 0;
604                 TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
605                         argv[argc++] = s->s;
606                 argv[argc] = NULL;
607                 j->comstring = &dummystring;
608                 break;
609
610         default:
611         default_command:
612                 if ((cs = strpbrk(comstring->s, "!\"$&'()*;<>?[\\]`{|}~")) &&
613                     !(cs[0] == '&' && cs[1] == '\0')) {
614                         argv = alloca(4 * sizeof(char *));
615                         argv[0] = _PATH_BSHELL;
616                         argv[1] = "-c";
617                         argv[2] = comstring->s;
618                         argv[3] = NULL;
619                 } else {
620                         if (cs) {
621                                 *cs = 0;
622                                 bg = 1;
623                         }
624                         comcs = alloca(comstring->len + 1);
625                         strcpy(comcs, comstring->s);
626                         argc = 0;
627                         for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
628                              cs = strtok(NULL, " \t\f\v\r\n"))
629                                 argc++;
630                         argv = alloca((argc + 1) * sizeof(char *));
631                         strcpy(comcs, comstring->s);
632                         argc = 0;
633                         for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
634                              cs = strtok(NULL, " \t\f\v\r\n"))
635                                 argv[argc++] = cs;
636                         argv[argc] = NULL;
637                 }
638         }
639         if (argv[0] == NULL)
640                 return 0;
641
642         if (int_param(j->intparams[IP_EXEC_TIMEOUT], &timeout) &&
643             timeout != 0) {
644                 clock_gettime(CLOCK_REALTIME, &j->timeout);
645                 j->timeout.tv_sec += timeout;
646         } else
647                 j->timeout.tv_sec = 0;
648
649         injail = comparam == IP_EXEC_START || comparam == IP_COMMAND ||
650             comparam == IP_EXEC_STOP;
651         if (injail)
652                 setid = root_cpuset_id();
653         else
654                 setid = CPUSET_INVALID;
655         clean = bool_param(j->intparams[IP_EXEC_CLEAN]);
656         username = string_param(j->intparams[injail
657             ? IP_EXEC_JAIL_USER : IP_EXEC_SYSTEM_USER]);
658         sjuser = bool_param(j->intparams[IP_EXEC_SYSTEM_JAIL_USER]);
659
660         consfd = 0;
661         if (injail &&
662             (conslog = string_param(j->intparams[IP_EXEC_CONSOLELOG]))) {
663                 if (check_path(j, "exec.consolelog", conslog, 1, NULL) < 0)
664                         return -1;
665                 consfd =
666                     open(conslog, O_WRONLY | O_CREAT | O_APPEND, DEFFILEMODE);
667                 if (consfd < 0) {
668                         jail_warnx(j, "open %s: %s", conslog, strerror(errno));
669                         return -1;
670                 }
671         }
672
673         comlen = 0;
674         for (i = 0; argv[i]; i++)
675                 comlen += strlen(argv[i]) + 1;
676         j->comline = cs = emalloc(comlen);
677         for (i = 0; argv[i]; i++) {
678                 strcpy(cs, argv[i]);
679                 if (argv[i + 1]) {
680                         cs += strlen(argv[i]) + 1;
681                         cs[-1] = ' ';
682                 }
683         }
684         if (verbose > 0)
685                 jail_note(j, "run command%s%s%s: %s\n",
686                     injail ? " in jail" : "", username ? " as " : "",
687                     username ? username : "", j->comline);
688
689         pid = fork();
690         if (pid < 0)
691                 err(1, "fork");
692         if (pid > 0) {
693                 if (bg || !add_proc(j, pid)) {
694                         free(j->comline);
695                         j->comline = NULL;
696                         return 0;
697                 } else {
698                         paralimit--;
699                         return 1;
700                 }
701         }
702         if (bg)
703                 setsid();
704
705         /* Set up the environment and run the command */
706         pwd = NULL;
707         lcap = NULL;
708         if ((clean || username) && injail && sjuser &&
709             get_user_info(j, username, &pwd, &lcap) < 0)
710                 exit(1);
711         if (injail) {
712                 /* jail_attach won't chdir along with its chroot. */
713                 path = string_param(j->intparams[KP_PATH]);
714                 if (path && chdir(path) < 0) {
715                         jail_warnx(j, "chdir %s: %s", path, strerror(errno));
716                         exit(1);
717                 }
718                 if (int_param(j->intparams[IP_EXEC_FIB], &fib) &&
719                     setfib(fib) < 0) {
720                         jail_warnx(j, "setfib: %s", strerror(errno));
721                         exit(1);
722                 }
723
724                 /*
725                  * We wouldn't have specialized our affinity, so just setid to
726                  * root.  We do this prior to attaching to avoid the kernel
727                  * having to create a transient cpuset that we'll promptly
728                  * free up with a reset to the jail's cpuset.
729                  *
730                  * This is just a best-effort to use as wide of mask as
731                  * possible.
732                  */
733                 if (setid != CPUSET_INVALID)
734                         (void)cpuset_setid(CPU_WHICH_PID, -1, setid);
735
736                 if (jail_attach(j->jid) < 0) {
737                         jail_warnx(j, "jail_attach: %s", strerror(errno));
738                         exit(1);
739                 }
740         }
741         if (clean || username) {
742                 if (!(injail && sjuser) &&
743                     get_user_info(j, username, &pwd, &lcap) < 0)
744                         exit(1);
745                 if (clean) {
746                         term = getenv("TERM");
747                         environ = &cleanenv;
748                         setenv("PATH", "/bin:/usr/bin", 0);
749                         if (term != NULL)
750                                 setenv("TERM", term, 1);
751                 }
752                 if (setgid(pwd->pw_gid) < 0) {
753                         jail_warnx(j, "setgid %d: %s", pwd->pw_gid,
754                             strerror(errno));
755                         exit(1);
756                 }
757                 if (setusercontext(lcap, pwd, pwd->pw_uid, username
758                     ? LOGIN_SETALL & ~LOGIN_SETGROUP & ~LOGIN_SETLOGIN
759                     : LOGIN_SETPATH | LOGIN_SETENV) < 0) {
760                         jail_warnx(j, "setusercontext %s: %s", pwd->pw_name,
761                             strerror(errno));
762                         exit(1);
763                 }
764                 login_close(lcap);
765                 setenv("USER", pwd->pw_name, 1);
766                 setenv("HOME", pwd->pw_dir, 1);
767                 setenv("SHELL",
768                     *pwd->pw_shell ? pwd->pw_shell : _PATH_BSHELL, 1);
769                 if (clean && chdir(pwd->pw_dir) < 0) {
770                         jail_warnx(j, "chdir %s: %s",
771                             pwd->pw_dir, strerror(errno));
772                         exit(1);
773                 }
774                 endpwent();
775         }
776
777         if (consfd != 0 && (dup2(consfd, 1) < 0 || dup2(consfd, 2) < 0)) {
778                 jail_warnx(j, "exec.consolelog: %s", strerror(errno));
779                 exit(1);
780         }
781         closefrom(3);
782         execvp(argv[0], __DECONST(char *const*, argv));
783         jail_warnx(j, "exec %s: %s", argv[0], strerror(errno));
784         exit(1);
785 }
786
787 /*
788  * Add a process to the hash, tied to a jail.
789  */
790 static int
791 add_proc(struct cfjail *j, pid_t pid)
792 {
793         struct kevent ke;
794         struct cfjail *tj;
795         struct phash *ph;
796
797         if (!kq && (kq = kqueue()) < 0)
798                 err(1, "kqueue");
799         EV_SET(&ke, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
800         if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
801                 if (errno == ESRCH)
802                         return 0;
803                 err(1, "kevent");
804         }
805         ph = emalloc(sizeof(struct phash));
806         ph->j = j;
807         ph->pid = pid;
808         LIST_INSERT_HEAD(&phash[pid % PHASH_SIZE], ph, le);
809         j->nprocs++;
810         j->flags |= JF_SLEEPQ;
811         if (j->timeout.tv_sec == 0)
812                 requeue(j, &sleeping);
813         else {
814                 /* File the jail in the sleep queue according to its timeout. */
815                 TAILQ_REMOVE(j->queue, j, tq);
816                 TAILQ_FOREACH(tj, &sleeping, tq) {
817                         if (!tj->timeout.tv_sec ||
818                             j->timeout.tv_sec < tj->timeout.tv_sec ||
819                             (j->timeout.tv_sec == tj->timeout.tv_sec &&
820                             j->timeout.tv_nsec <= tj->timeout.tv_nsec)) {
821                                 TAILQ_INSERT_BEFORE(tj, j, tq);
822                                 break;
823                         }
824                 }
825                 if (tj == NULL)
826                         TAILQ_INSERT_TAIL(&sleeping, j, tq);
827                 j->queue = &sleeping;
828         }
829         return 1;
830 }
831
832 /*
833  * Remove any processes from the hash that correspond to a jail.
834  */
835 static void
836 clear_procs(struct cfjail *j)
837 {
838         struct kevent ke;
839         struct phash *ph, *tph;
840         int i;
841
842         j->nprocs = 0;
843         for (i = 0; i < PHASH_SIZE; i++)
844                 LIST_FOREACH_SAFE(ph, &phash[i], le, tph)
845                         if (ph->j == j) {
846                                 EV_SET(&ke, ph->pid, EVFILT_PROC, EV_DELETE,
847                                     NOTE_EXIT, 0, NULL);
848                                 (void)kevent(kq, &ke, 1, NULL, 0, NULL);
849                                 LIST_REMOVE(ph, le);
850                                 free(ph);
851                         }
852 }
853
854 /*
855  * Find the jail that corresponds to an exited process.
856  */
857 static struct cfjail *
858 find_proc(pid_t pid)
859 {
860         struct cfjail *j;
861         struct phash *ph;
862
863         LIST_FOREACH(ph, &phash[pid % PHASH_SIZE], le)
864                 if (ph->pid == pid) {
865                         j = ph->j;
866                         LIST_REMOVE(ph, le);
867                         free(ph);
868                         return --j->nprocs ? NULL : j;
869                 }
870         return NULL;
871 }
872
873 /*
874  * Send SIGTERM to all processes in a jail and wait for them to die.
875  */
876 static int
877 term_procs(struct cfjail *j)
878 {
879         struct kinfo_proc *ki;
880         int i, noted, pcnt, timeout;
881
882         static kvm_t *kd;
883
884         if (!int_param(j->intparams[IP_STOP_TIMEOUT], &timeout))
885                 timeout = DEFAULT_STOP_TIMEOUT;
886         else if (timeout == 0)
887                 return 0;
888
889         if (kd == NULL) {
890                 kd = kvm_open(NULL, NULL, NULL, O_RDONLY, NULL);
891                 if (kd == NULL)
892                         return 0;
893         }
894
895         ki = kvm_getprocs(kd, KERN_PROC_PROC, 0, &pcnt);
896         if (ki == NULL)
897                 return 0;
898         noted = 0;
899         for (i = 0; i < pcnt; i++)
900                 if (ki[i].ki_jid == j->jid &&
901                     kill(ki[i].ki_pid, SIGTERM) == 0) {
902                         (void)add_proc(j, ki[i].ki_pid);
903                         if (verbose > 0) {
904                                 if (!noted) {
905                                         noted = 1;
906                                         jail_note(j, "sent SIGTERM to:");
907                                 }
908                                 printf(" %d", ki[i].ki_pid);
909                         }
910                 }
911         if (noted)
912                 printf("\n");
913         if (j->nprocs > 0) {
914                 clock_gettime(CLOCK_REALTIME, &j->timeout);
915                 j->timeout.tv_sec += timeout;
916                 return 1;
917         }
918         return 0;
919 }
920
921 /*
922  * Look up a user in the passwd and login.conf files.
923  */
924 static int
925 get_user_info(struct cfjail *j, const char *username,
926     const struct passwd **pwdp, login_cap_t **lcapp)
927 {
928         const struct passwd *pwd;
929
930         errno = 0;
931         *pwdp = pwd = username ? getpwnam(username) : getpwuid(getuid());
932         if (pwd == NULL) {
933                 if (errno)
934                         jail_warnx(j, "getpwnam%s%s: %s", username ? " " : "",
935                             username ? username : "", strerror(errno));
936                 else if (username)
937                         jail_warnx(j, "%s: no such user", username);
938                 else
939                         jail_warnx(j, "unknown uid %d", getuid());
940                 return -1;
941         }
942         *lcapp = login_getpwclass(pwd);
943         if (*lcapp == NULL) {
944                 jail_warnx(j, "getpwclass %s: %s", pwd->pw_name,
945                     strerror(errno));
946                 return -1;
947         }
948         /* Set the groups while the group file is still available */
949         if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) {
950                 jail_warnx(j, "initgroups %s: %s", pwd->pw_name,
951                     strerror(errno));
952                 return -1;
953         }
954         return 0;
955 }
956
957 /*
958  * Make sure a mount or consolelog path is a valid absolute pathname
959  * with no symlinks.
960  */
961 static int
962 check_path(struct cfjail *j, const char *pname, const char *path, int isfile,
963     const char *umount_type)
964 {
965         struct stat st, mpst;
966         struct statfs stfs;
967         char *tpath, *p;
968         const char *jailpath;
969         size_t jplen;
970
971         if (path[0] != '/') {
972                 jail_warnx(j, "%s: %s: not an absolute pathname",
973                     pname, path);
974                 return -1;
975         }
976         /*
977          * Only check for symlinks in components below the jail's path,
978          * since that's where the security risk lies.
979          */
980         jailpath = string_param(j->intparams[KP_PATH]);
981         if (jailpath == NULL)
982                 jailpath = "";
983         jplen = strlen(jailpath);
984         if (!strncmp(path, jailpath, jplen) && path[jplen] == '/') {
985                 tpath = alloca(strlen(path) + 1);
986                 strcpy(tpath, path);
987                 for (p = tpath + jplen; p != NULL; ) {
988                         p = strchr(p + 1, '/');
989                         if (p)
990                                 *p = '\0';
991                         if (lstat(tpath, &st) < 0) {
992                                 if (errno == ENOENT && isfile && !p)
993                                         break;
994                                 jail_warnx(j, "%s: %s: %s", pname, tpath,
995                                     strerror(errno));
996                                 return -1;
997                         }
998                         if (S_ISLNK(st.st_mode)) {
999                                 jail_warnx(j, "%s: %s is a symbolic link",
1000                                     pname, tpath);
1001                                 return -1;
1002                         }
1003                         if (p)
1004                                 *p = '/';
1005                 }
1006         }
1007         if (umount_type != NULL) {
1008                 if (stat(path, &st) < 0 || statfs(path, &stfs) < 0) {
1009                         jail_warnx(j, "%s: %s: %s", pname, path,
1010                             strerror(errno));
1011                         return -1;
1012                 }
1013                 if (stat(stfs.f_mntonname, &mpst) < 0) {
1014                         jail_warnx(j, "%s: %s: %s", pname, stfs.f_mntonname,
1015                             strerror(errno));
1016                         return -1;
1017                 }
1018                 if (st.st_ino != mpst.st_ino) {
1019                         jail_warnx(j, "%s: %s: not a mount point",
1020                             pname, path);
1021                         return -1;
1022                 }
1023                 if (strcmp(stfs.f_fstypename, umount_type)) {
1024                         jail_warnx(j, "%s: %s: not a %s mount",
1025                             pname, path, umount_type);
1026                         return -1;
1027                 }
1028         }
1029         return 0;
1030 }