2 * Copyright (c) 1999 Poul-Henning Kamp.
3 * Copyright (c) 2008 Bjoern A. Zeeb.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
33 #include "opt_inet6.h"
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/kernel.h>
39 #include <sys/systm.h>
40 #include <sys/errno.h>
41 #include <sys/sysproto.h>
42 #include <sys/malloc.h>
45 #include <sys/taskqueue.h>
46 #include <sys/fcntl.h>
49 #include <sys/mutex.h>
51 #include <sys/namei.h>
52 #include <sys/mount.h>
53 #include <sys/queue.h>
54 #include <sys/socket.h>
55 #include <sys/syscallsubr.h>
56 #include <sys/sysctl.h>
57 #include <sys/vnode.h>
58 #include <sys/vimage.h>
61 #include <netinet/in.h>
65 #include <netinet6/in6_var.h>
69 #include <security/mac/mac_framework.h>
71 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
73 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
76 int jail_set_hostname_allowed = 1;
77 SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
78 &jail_set_hostname_allowed, 0,
79 "Processes in jail can set their hostnames");
81 int jail_socket_unixiproute_only = 1;
82 SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
83 &jail_socket_unixiproute_only, 0,
84 "Processes in jail are limited to creating UNIX/IP/route sockets only");
86 int jail_sysvipc_allowed = 0;
87 SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
88 &jail_sysvipc_allowed, 0,
89 "Processes in jail can use System V IPC primitives");
91 static int jail_enforce_statfs = 2;
92 SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
93 &jail_enforce_statfs, 0,
94 "Processes in jail cannot see all mounted file systems");
96 int jail_allow_raw_sockets = 0;
97 SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
98 &jail_allow_raw_sockets, 0,
99 "Prison root can create raw sockets");
101 int jail_chflags_allowed = 0;
102 SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
103 &jail_chflags_allowed, 0,
104 "Processes in jail can alter system file flags");
106 int jail_mount_allowed = 0;
107 SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW,
108 &jail_mount_allowed, 0,
109 "Processes in jail can mount/unmount jail-friendly file systems");
111 int jail_max_af_ips = 255;
112 SYSCTL_INT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
114 "Number of IP addresses a jail may have at most per address family");
116 /* allprison, lastprid, and prisoncount are protected by allprison_lock. */
117 struct prisonlist allprison;
118 struct sx allprison_lock;
122 static void init_prison(void *);
123 static void prison_complete(void *context, int pending);
124 static int sysctl_jail_list(SYSCTL_HANDLER_ARGS);
126 static int _prison_check_ip4(struct prison *pr, struct in_addr *ia);
129 static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6);
133 init_prison(void *data __unused)
136 sx_init(&allprison_lock, "allprison");
137 LIST_INIT(&allprison);
140 SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
144 qcmp_v4(const void *ip1, const void *ip2)
149 * We need to compare in HBO here to get the list sorted as expected
150 * by the result of the code. Sorting NBO addresses gives you
151 * interesting results. If you do not understand, do not try.
153 iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
154 iab = ntohl(((const struct in_addr *)ip2)->s_addr);
157 * Do not simply return the difference of the two numbers, the int is
171 qcmp_v6(const void *ip1, const void *ip2)
173 const struct in6_addr *ia6a, *ia6b;
176 ia6a = (const struct in6_addr *)ip1;
177 ia6b = (const struct in6_addr *)ip2;
180 for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) {
181 if (ia6a->s6_addr[i] > ia6b->s6_addr[i])
183 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i])
190 #if defined(INET) || defined(INET6)
192 prison_check_conflicting_ips(struct prison *p)
197 sx_assert(&allprison_lock, SX_LOCKED);
199 if (p->pr_ip4s == 0 && p->pr_ip6s == 0)
202 LIST_FOREACH(pr, &allprison, pr_list) {
204 * Skip 'dying' prisons to avoid problems when
205 * restarting multi-IP jails.
207 if (pr->pr_state == PRISON_STATE_DYING)
211 * We permit conflicting IPs if there is no
212 * more than 1 IP on eeach jail.
213 * In case there is one duplicate on a jail with
214 * more than one IP stop checking and return error.
217 if ((p->pr_ip4s >= 1 && pr->pr_ip4s > 1) ||
218 (p->pr_ip4s > 1 && pr->pr_ip4s >= 1)) {
219 for (i = 0; i < p->pr_ip4s; i++) {
220 if (_prison_check_ip4(pr, &p->pr_ip4[i]) == 0)
226 if ((p->pr_ip6s >= 1 && pr->pr_ip6s > 1) ||
227 (p->pr_ip6s > 1 && pr->pr_ip6s >= 1)) {
228 for (i = 0; i < p->pr_ip6s; i++) {
229 if (_prison_check_ip6(pr, &p->pr_ip6[i]) == 0)
240 jail_copyin_ips(struct jail *j)
246 struct in6_addr *ip6;
251 * Copy in addresses, check for duplicate addresses and do some
252 * simple 0 and broadcast checks. If users give other bogus addresses
253 * it is their problem.
255 * IP addresses are all sorted but ip[0] to preserve the primary IP
256 * address as given from userland. This special IP is used for
257 * unbound outgoing connections as well for "loopback" traffic.
267 ip4 = (struct in_addr *)malloc(j->ip4s * sizeof(struct in_addr),
268 M_PRISON, M_WAITOK | M_ZERO);
269 error = copyin(j->ip4, ip4, j->ip4s * sizeof(struct in_addr));
272 /* Sort all but the first IPv4 address. */
274 qsort((ip4 + 1), j->ip4s - 1,
275 sizeof(struct in_addr), qcmp_v4);
278 * We do not have to care about byte order for these checks
279 * so we will do them in NBO.
281 for (i=0; i<j->ip4s; i++) {
282 if (ip4[i].s_addr == htonl(INADDR_ANY) ||
283 ip4[i].s_addr == htonl(INADDR_BROADCAST)) {
287 if ((i+1) < j->ip4s &&
288 (ip4[0].s_addr == ip4[i+1].s_addr ||
289 ip4[i].s_addr == ip4[i+1].s_addr)) {
301 ip6 = (struct in6_addr *)malloc(j->ip6s * sizeof(struct in6_addr),
302 M_PRISON, M_WAITOK | M_ZERO);
303 error = copyin(j->ip6, ip6, j->ip6s * sizeof(struct in6_addr));
306 /* Sort all but the first IPv6 address. */
308 qsort((ip6 + 1), j->ip6s - 1,
309 sizeof(struct in6_addr), qcmp_v6);
310 for (i=0; i<j->ip6s; i++) {
311 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[i])) {
315 if ((i+1) < j->ip6s &&
316 (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[i+1]) ||
317 IN6_ARE_ADDR_EQUAL(&ip6[i], &ip6[i+1]))) {
338 #endif /* INET || INET6 */
341 jail_handle_ips(struct jail *j)
343 #if defined(INET) || defined(INET6)
348 * Finish conversion for older versions, copyin and setup IPs.
350 switch (j->version) {
354 /* FreeBSD single IPv4 jails. */
357 if (j->ip4s == INADDR_ANY || j->ip4s == INADDR_BROADCAST)
359 ip4 = (struct in_addr *)malloc(sizeof(struct in_addr),
360 M_PRISON, M_WAITOK | M_ZERO);
363 * Jail version 0 still used HBO for the IPv4 address.
365 ip4->s_addr = htonl(j->ip4s);
376 * Version 1 was used by multi-IPv4 jail implementations
377 * that never made it into the official kernel.
378 * We should never hit this here; jail() should catch it.
382 case 2: /* JAIL_API_VERSION */
383 /* FreeBSD multi-IPv4/IPv6,noIP jails. */
384 #if defined(INET) || defined(INET6)
386 if (j->ip4s > jail_max_af_ips)
393 if (j->ip6s > jail_max_af_ips)
399 error = jail_copyin_ips(j);
406 /* Sci-Fi jails are not supported, sorry. */
420 jail(struct thread *td, struct jail_args *uap)
426 error = copyin(uap->jail, &version, sizeof(uint32_t));
432 /* FreeBSD single IPv4 jails. */
436 bzero(&j, sizeof(struct jail));
437 error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
440 j.version = j0.version;
442 j.hostname = j0.hostname;
443 j.ip4s = j0.ip_number;
449 * Version 1 was used by multi-IPv4 jail implementations
450 * that never made it into the official kernel.
454 case 2: /* JAIL_API_VERSION */
455 /* FreeBSD multi-IPv4/IPv6,noIP jails. */
456 error = copyin(uap->jail, &j, sizeof(struct jail));
462 /* Sci-Fi jails are not supported, sorry. */
465 return (kern_jail(td, &j));
469 kern_jail(struct thread *td, struct jail *j)
472 struct prison *pr, *tpr;
473 struct jail_attach_args jaa;
474 int vfslocked, error, tryprid;
476 KASSERT(j != NULL, ("%s: j is NULL", __func__));
478 /* Handle addresses - convert old structs, copyin, check IPs. */
479 error = jail_handle_ips(j);
483 /* Allocate struct prison and fill it with life. */
484 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
485 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
487 error = copyinstr(j->path, &pr->pr_path, sizeof(pr->pr_path), NULL);
490 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
495 vfslocked = NDHASGIANT(&nd);
496 pr->pr_root = nd.ni_vp;
497 VOP_UNLOCK(nd.ni_vp, 0);
498 NDFREE(&nd, NDF_ONLY_PNBUF);
499 VFS_UNLOCK_GIANT(vfslocked);
500 error = copyinstr(j->hostname, &pr->pr_host, sizeof(pr->pr_host), NULL);
503 if (j->jailname != NULL) {
504 error = copyinstr(j->jailname, &pr->pr_name,
505 sizeof(pr->pr_name), NULL);
511 pr->pr_ip4s = j->ip4s;
516 pr->pr_ip6s = j->ip6s;
520 pr->pr_securelevel = securelevel;
521 bzero(&pr->pr_osd, sizeof(pr->pr_osd));
524 * Pre-set prison state to ALIVE upon cration. This is needed so we
525 * can later attach the process to it, etc (avoiding another extra
526 * state for ther process of creation, complicating things).
528 pr->pr_state = PRISON_STATE_ALIVE;
530 /* Allocate a dedicated cpuset for each jail. */
531 error = cpuset_create_root(td, &pr->pr_cpuset);
535 sx_xlock(&allprison_lock);
536 /* Make sure we cannot run into problems with ambiguous bind()ings. */
537 #if defined(INET) || defined(INET6)
538 error = prison_check_conflicting_ips(pr);
540 sx_xunlock(&allprison_lock);
545 /* Determine next pr_id and add prison to allprison list. */
546 tryprid = lastprid + 1;
547 if (tryprid == JAIL_MAX)
550 LIST_FOREACH(tpr, &allprison, pr_list) {
551 if (tpr->pr_id == tryprid) {
553 if (tryprid == JAIL_MAX) {
554 sx_xunlock(&allprison_lock);
561 pr->pr_id = jaa.jid = lastprid = tryprid;
562 LIST_INSERT_HEAD(&allprison, pr, pr_list);
564 sx_xunlock(&allprison_lock);
566 error = jail_attach(td, &jaa);
569 mtx_lock(&pr->pr_mtx);
571 mtx_unlock(&pr->pr_mtx);
572 td->td_retval[0] = jaa.jid;
575 sx_xlock(&allprison_lock);
576 LIST_REMOVE(pr, pr_list);
578 sx_xunlock(&allprison_lock);
580 cpuset_rel(pr->pr_cpuset);
582 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
584 VFS_UNLOCK_GIANT(vfslocked);
586 mtx_destroy(&pr->pr_mtx);
589 free(j->ip6, M_PRISON);
592 free(j->ip4, M_PRISON);
599 * struct jail_attach_args {
604 jail_attach(struct thread *td, struct jail_attach_args *uap)
607 struct ucred *newcred, *oldcred;
609 int vfslocked, error;
612 * XXX: Note that there is a slight race here if two threads
613 * in the same privileged process attempt to attach to two
614 * different jails at the same time. It is important for
615 * user processes not to do this, or they might end up with
616 * a process root from one prison, but attached to the jail
619 error = priv_check(td, PRIV_JAIL_ATTACH);
624 sx_slock(&allprison_lock);
625 pr = prison_find(uap->jid);
627 sx_sunlock(&allprison_lock);
632 * Do not allow a process to attach to a prison that is not
633 * considered to be "ALIVE".
635 if (pr->pr_state != PRISON_STATE_ALIVE) {
636 mtx_unlock(&pr->pr_mtx);
637 sx_sunlock(&allprison_lock);
641 mtx_unlock(&pr->pr_mtx);
642 sx_sunlock(&allprison_lock);
645 * Reparent the newly attached process to this jail.
647 error = cpuset_setproc_update_set(p, pr->pr_cpuset);
651 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
652 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
653 if ((error = change_dir(pr->pr_root, td)) != 0)
656 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
659 VOP_UNLOCK(pr->pr_root, 0);
660 change_root(pr->pr_root, td);
661 VFS_UNLOCK_GIANT(vfslocked);
665 oldcred = p->p_ucred;
667 crcopy(newcred, oldcred);
668 newcred->cr_prison = pr;
669 p->p_ucred = newcred;
670 prison_proc_hold(pr);
675 VOP_UNLOCK(pr->pr_root, 0);
676 VFS_UNLOCK_GIANT(vfslocked);
678 mtx_lock(&pr->pr_mtx);
680 mtx_unlock(&pr->pr_mtx);
685 * Returns a locked prison instance, or NULL on failure.
688 prison_find(int prid)
692 sx_assert(&allprison_lock, SX_LOCKED);
693 LIST_FOREACH(pr, &allprison, pr_list) {
694 if (pr->pr_id == prid) {
695 mtx_lock(&pr->pr_mtx);
696 if (pr->pr_ref == 0) {
697 mtx_unlock(&pr->pr_mtx);
707 prison_free_locked(struct prison *pr)
710 mtx_assert(&pr->pr_mtx, MA_OWNED);
712 if (pr->pr_ref == 0) {
713 mtx_unlock(&pr->pr_mtx);
714 TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
715 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
718 mtx_unlock(&pr->pr_mtx);
722 prison_free(struct prison *pr)
725 mtx_lock(&pr->pr_mtx);
726 prison_free_locked(pr);
730 prison_complete(void *context, int pending)
735 pr = (struct prison *)context;
737 sx_xlock(&allprison_lock);
738 LIST_REMOVE(pr, pr_list);
740 sx_xunlock(&allprison_lock);
742 cpuset_rel(pr->pr_cpuset);
744 /* Free all OSD associated to this jail. */
747 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
749 VFS_UNLOCK_GIANT(vfslocked);
751 mtx_destroy(&pr->pr_mtx);
752 free(pr->pr_linux, M_PRISON);
754 free(pr->pr_ip6, M_PRISON);
757 free(pr->pr_ip4, M_PRISON);
763 prison_hold_locked(struct prison *pr)
766 mtx_assert(&pr->pr_mtx, MA_OWNED);
767 KASSERT(pr->pr_ref > 0,
768 ("Trying to hold dead prison (id=%d).", pr->pr_id));
773 prison_hold(struct prison *pr)
776 mtx_lock(&pr->pr_mtx);
777 prison_hold_locked(pr);
778 mtx_unlock(&pr->pr_mtx);
782 prison_proc_hold(struct prison *pr)
785 mtx_lock(&pr->pr_mtx);
786 KASSERT(pr->pr_state == PRISON_STATE_ALIVE,
787 ("Cannot add a process to a non-alive prison (id=%d).", pr->pr_id));
789 mtx_unlock(&pr->pr_mtx);
793 prison_proc_free(struct prison *pr)
796 mtx_lock(&pr->pr_mtx);
797 KASSERT(pr->pr_state == PRISON_STATE_ALIVE && pr->pr_nprocs > 0,
798 ("Trying to kill a process in a dead prison (id=%d).", pr->pr_id));
800 if (pr->pr_nprocs == 0)
801 pr->pr_state = PRISON_STATE_DYING;
802 mtx_unlock(&pr->pr_mtx);
808 * Pass back primary IPv4 address of this jail.
810 * If not jailed return success but do not alter the address. Caller has to
811 * make sure to initialize it correctly (e.g. INADDR_ANY).
813 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
814 * Address returned in NBO.
817 prison_get_ip4(struct ucred *cred, struct in_addr *ia)
820 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
821 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
824 /* Do not change address passed in. */
826 if (cred->cr_prison->pr_ip4 == NULL)
827 return (EAFNOSUPPORT);
829 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
834 * Make sure our (source) address is set to something meaningful to this
837 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
838 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv4.
839 * Address passed in in NBO and returned in NBO.
842 prison_local_ip4(struct ucred *cred, struct in_addr *ia)
846 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
847 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
851 if (cred->cr_prison->pr_ip4 == NULL)
852 return (EAFNOSUPPORT);
854 ia0.s_addr = ntohl(ia->s_addr);
855 if (ia0.s_addr == INADDR_LOOPBACK) {
856 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
860 if (ia0.s_addr == INADDR_ANY) {
862 * In case there is only 1 IPv4 address, bind directly.
864 if (cred->cr_prison->pr_ip4s == 1)
865 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
869 return (_prison_check_ip4(cred->cr_prison, ia));
873 * Rewrite destination address in case we will connect to loopback address.
875 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
876 * Address passed in in NBO and returned in NBO.
879 prison_remote_ip4(struct ucred *cred, struct in_addr *ia)
882 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
883 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
887 if (cred->cr_prison->pr_ip4 == NULL)
888 return (EAFNOSUPPORT);
890 if (ntohl(ia->s_addr) == INADDR_LOOPBACK) {
891 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
896 * Return success because nothing had to be changed.
902 * Check if given address belongs to the jail referenced by cred/prison.
904 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
905 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv4.
906 * Address passed in in NBO.
909 _prison_check_ip4(struct prison *pr, struct in_addr *ia)
914 * Check the primary IP.
916 if (pr->pr_ip4[0].s_addr == ia->s_addr)
920 * All the other IPs are sorted so we can do a binary search.
926 d = qcmp_v4(&pr->pr_ip4[i+1], ia);
935 return (EADDRNOTAVAIL);
939 prison_check_ip4(struct ucred *cred, struct in_addr *ia)
942 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
943 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
947 if (cred->cr_prison->pr_ip4 == NULL)
948 return (EAFNOSUPPORT);
950 return (_prison_check_ip4(cred->cr_prison, ia));
956 * Pass back primary IPv6 address for this jail.
958 * If not jailed return success but do not alter the address. Caller has to
959 * make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT).
961 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
964 prison_get_ip6(struct ucred *cred, struct in6_addr *ia6)
967 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
968 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
972 if (cred->cr_prison->pr_ip6 == NULL)
973 return (EAFNOSUPPORT);
975 bcopy(&cred->cr_prison->pr_ip6[0], ia6, sizeof(struct in6_addr));
980 * Make sure our (source) address is set to something meaningful to this jail.
982 * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0)
983 * when needed while binding.
985 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
986 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv6.
989 prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only)
992 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
993 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
997 if (cred->cr_prison->pr_ip6 == NULL)
998 return (EAFNOSUPPORT);
1000 if (IN6_IS_ADDR_LOOPBACK(ia6)) {
1001 bcopy(&cred->cr_prison->pr_ip6[0], ia6,
1002 sizeof(struct in6_addr));
1006 if (IN6_IS_ADDR_UNSPECIFIED(ia6)) {
1008 * In case there is only 1 IPv6 address, and v6only is true,
1009 * then bind directly.
1011 if (v6only != 0 && cred->cr_prison->pr_ip6s == 1)
1012 bcopy(&cred->cr_prison->pr_ip6[0], ia6,
1013 sizeof(struct in6_addr));
1017 return (_prison_check_ip6(cred->cr_prison, ia6));
1021 * Rewrite destination address in case we will connect to loopback address.
1023 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
1026 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6)
1029 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1030 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1034 if (cred->cr_prison->pr_ip6 == NULL)
1035 return (EAFNOSUPPORT);
1037 if (IN6_IS_ADDR_LOOPBACK(ia6)) {
1038 bcopy(&cred->cr_prison->pr_ip6[0], ia6,
1039 sizeof(struct in6_addr));
1044 * Return success because nothing had to be changed.
1050 * Check if given address belongs to the jail referenced by cred/prison.
1052 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
1053 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv6.
1056 _prison_check_ip6(struct prison *pr, struct in6_addr *ia6)
1061 * Check the primary IP.
1063 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6))
1067 * All the other IPs are sorted so we can do a binary search.
1070 z = pr->pr_ip6s - 2;
1073 d = qcmp_v6(&pr->pr_ip6[i+1], ia6);
1082 return (EADDRNOTAVAIL);
1086 prison_check_ip6(struct ucred *cred, struct in6_addr *ia6)
1089 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1090 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1094 if (cred->cr_prison->pr_ip6 == NULL)
1095 return (EAFNOSUPPORT);
1097 return (_prison_check_ip6(cred->cr_prison, ia6));
1102 * Check if a jail supports the given address family.
1104 * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT
1108 prison_check_af(struct ucred *cred, int af)
1112 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1123 if (cred->cr_prison->pr_ip4 == NULL)
1124 error = EAFNOSUPPORT;
1129 if (cred->cr_prison->pr_ip6 == NULL)
1130 error = EAFNOSUPPORT;
1137 if (jail_socket_unixiproute_only)
1138 error = EAFNOSUPPORT;
1144 * Check if given address belongs to the jail referenced by cred (wrapper to
1145 * prison_check_ip[46]).
1147 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
1148 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow
1149 * the address family. IPv4 Address passed in in NBO.
1152 prison_if(struct ucred *cred, struct sockaddr *sa)
1155 struct sockaddr_in *sai;
1158 struct sockaddr_in6 *sai6;
1162 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1163 KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
1166 switch (sa->sa_family)
1170 sai = (struct sockaddr_in *)sa;
1171 error = prison_check_ip4(cred, &sai->sin_addr);
1176 sai6 = (struct sockaddr_in6 *)sa;
1177 error = prison_check_ip6(cred, &sai6->sin6_addr);
1181 if (jailed(cred) && jail_socket_unixiproute_only)
1182 error = EAFNOSUPPORT;
1188 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
1191 prison_check(struct ucred *cred1, struct ucred *cred2)
1194 if (jailed(cred1)) {
1197 if (cred2->cr_prison != cred1->cr_prison)
1205 * Return 1 if the passed credential is in a jail, otherwise 0.
1208 jailed(struct ucred *cred)
1211 return (cred->cr_prison != NULL);
1215 * Return the correct hostname for the passed credential.
1218 getcredhostname(struct ucred *cred, char *buf, size_t size)
1220 INIT_VPROCG(cred->cr_vimage->v_procg);
1223 mtx_lock(&cred->cr_prison->pr_mtx);
1224 strlcpy(buf, cred->cr_prison->pr_host, size);
1225 mtx_unlock(&cred->cr_prison->pr_mtx);
1227 mtx_lock(&hostname_mtx);
1228 strlcpy(buf, V_hostname, size);
1229 mtx_unlock(&hostname_mtx);
1234 * Determine whether the subject represented by cred can "see"
1235 * status of a mount point.
1236 * Returns: 0 for permitted, ENOENT otherwise.
1237 * XXX: This function should be called cr_canseemount() and should be
1238 * placed in kern_prot.c.
1241 prison_canseemount(struct ucred *cred, struct mount *mp)
1247 if (!jailed(cred) || jail_enforce_statfs == 0)
1249 pr = cred->cr_prison;
1250 if (pr->pr_root->v_mount == mp)
1252 if (jail_enforce_statfs == 2)
1255 * If jail's chroot directory is set to "/" we should be able to see
1256 * all mount-points from inside a jail.
1257 * This is ugly check, but this is the only situation when jail's
1258 * directory ends with '/'.
1260 if (strcmp(pr->pr_path, "/") == 0)
1262 len = strlen(pr->pr_path);
1264 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
1267 * Be sure that we don't have situation where jail's root directory
1268 * is "/some/path" and mount point is "/some/pathpath".
1270 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
1276 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
1278 char jpath[MAXPATHLEN];
1282 if (!jailed(cred) || jail_enforce_statfs == 0)
1284 pr = cred->cr_prison;
1285 if (prison_canseemount(cred, mp) != 0) {
1286 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1287 strlcpy(sp->f_mntonname, "[restricted]",
1288 sizeof(sp->f_mntonname));
1291 if (pr->pr_root->v_mount == mp) {
1293 * Clear current buffer data, so we are sure nothing from
1294 * the valid path left there.
1296 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1297 *sp->f_mntonname = '/';
1301 * If jail's chroot directory is set to "/" we should be able to see
1302 * all mount-points from inside a jail.
1304 if (strcmp(pr->pr_path, "/") == 0)
1306 len = strlen(pr->pr_path);
1307 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
1309 * Clear current buffer data, so we are sure nothing from
1310 * the valid path left there.
1312 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1313 if (*jpath == '\0') {
1314 /* Should never happen. */
1315 *sp->f_mntonname = '/';
1317 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
1322 * Check with permission for a specific privilege is granted within jail. We
1323 * have a specific list of accepted privileges; the rest are denied.
1326 prison_priv_check(struct ucred *cred, int priv)
1335 * Allow ktrace privileges for root in jail.
1341 * Allow jailed processes to configure audit identity and
1342 * submit audit records (login, etc). In the future we may
1343 * want to further refine the relationship between audit and
1346 case PRIV_AUDIT_GETAUDIT:
1347 case PRIV_AUDIT_SETAUDIT:
1348 case PRIV_AUDIT_SUBMIT:
1352 * Allow jailed processes to manipulate process UNIX
1353 * credentials in any way they see fit.
1355 case PRIV_CRED_SETUID:
1356 case PRIV_CRED_SETEUID:
1357 case PRIV_CRED_SETGID:
1358 case PRIV_CRED_SETEGID:
1359 case PRIV_CRED_SETGROUPS:
1360 case PRIV_CRED_SETREUID:
1361 case PRIV_CRED_SETREGID:
1362 case PRIV_CRED_SETRESUID:
1363 case PRIV_CRED_SETRESGID:
1366 * Jail implements visibility constraints already, so allow
1367 * jailed root to override uid/gid-based constraints.
1369 case PRIV_SEEOTHERGIDS:
1370 case PRIV_SEEOTHERUIDS:
1373 * Jail implements inter-process debugging limits already, so
1374 * allow jailed root various debugging privileges.
1376 case PRIV_DEBUG_DIFFCRED:
1377 case PRIV_DEBUG_SUGID:
1378 case PRIV_DEBUG_UNPRIV:
1381 * Allow jail to set various resource limits and login
1382 * properties, and for now, exceed process resource limits.
1384 case PRIV_PROC_LIMIT:
1385 case PRIV_PROC_SETLOGIN:
1386 case PRIV_PROC_SETRLIMIT:
1389 * System V and POSIX IPC privileges are granted in jail.
1392 case PRIV_IPC_WRITE:
1393 case PRIV_IPC_ADMIN:
1394 case PRIV_IPC_MSGSIZE:
1398 * Jail implements its own inter-process limits, so allow
1399 * root processes in jail to change scheduling on other
1400 * processes in the same jail. Likewise for signalling.
1402 case PRIV_SCHED_DIFFCRED:
1403 case PRIV_SCHED_CPUSET:
1404 case PRIV_SIGNAL_DIFFCRED:
1405 case PRIV_SIGNAL_SUGID:
1408 * Allow jailed processes to write to sysctls marked as jail
1411 case PRIV_SYSCTL_WRITEJAIL:
1414 * Allow root in jail to manage a variety of quota
1415 * properties. These should likely be conditional on a
1416 * configuration option.
1418 case PRIV_VFS_GETQUOTA:
1419 case PRIV_VFS_SETQUOTA:
1422 * Since Jail relies on chroot() to implement file system
1423 * protections, grant many VFS privileges to root in jail.
1424 * Be careful to exclude mount-related and NFS-related
1428 case PRIV_VFS_WRITE:
1429 case PRIV_VFS_ADMIN:
1431 case PRIV_VFS_LOOKUP:
1432 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */
1433 case PRIV_VFS_CHFLAGS_DEV:
1434 case PRIV_VFS_CHOWN:
1435 case PRIV_VFS_CHROOT:
1436 case PRIV_VFS_RETAINSUGID:
1437 case PRIV_VFS_FCHROOT:
1439 case PRIV_VFS_SETGID:
1441 case PRIV_VFS_STICKYFILE:
1445 * Depending on the global setting, allow privilege of
1446 * setting system flags.
1448 case PRIV_VFS_SYSFLAGS:
1449 if (jail_chflags_allowed)
1455 * Depending on the global setting, allow privilege of
1456 * mounting/unmounting file systems.
1458 case PRIV_VFS_MOUNT:
1459 case PRIV_VFS_UNMOUNT:
1460 case PRIV_VFS_MOUNT_NONUSER:
1461 case PRIV_VFS_MOUNT_OWNER:
1462 if (jail_mount_allowed)
1468 * Allow jailed root to bind reserved ports and reuse in-use
1471 case PRIV_NETINET_RESERVEDPORT:
1472 case PRIV_NETINET_REUSEPORT:
1476 * Allow jailed root to set certian IPv4/6 (option) headers.
1478 case PRIV_NETINET_SETHDROPTS:
1482 * Conditionally allow creating raw sockets in jail.
1484 case PRIV_NETINET_RAW:
1485 if (jail_allow_raw_sockets)
1491 * Since jail implements its own visibility limits on netstat
1492 * sysctls, allow getcred. This allows identd to work in
1495 case PRIV_NETINET_GETCRED:
1500 * In all remaining cases, deny the privilege request. This
1501 * includes almost all network privileges, many system
1502 * configuration privileges.
1509 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
1511 struct xprison *xp, *sxp;
1517 if (jailed(req->td->td_ucred))
1520 sx_slock(&allprison_lock);
1521 if ((count = prisoncount) == 0) {
1522 sx_sunlock(&allprison_lock);
1526 len = sizeof(*xp) * count;
1527 LIST_FOREACH(pr, &allprison, pr_list) {
1529 len += pr->pr_ip4s * sizeof(struct in_addr);
1532 len += pr->pr_ip6s * sizeof(struct in6_addr);
1536 sxp = xp = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
1538 LIST_FOREACH(pr, &allprison, pr_list) {
1539 xp->pr_version = XPRISON_VERSION;
1540 xp->pr_id = pr->pr_id;
1541 xp->pr_state = pr->pr_state;
1542 xp->pr_cpusetid = pr->pr_cpuset->cs_id;
1543 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
1544 mtx_lock(&pr->pr_mtx);
1545 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
1546 strlcpy(xp->pr_name, pr->pr_name, sizeof(xp->pr_name));
1547 mtx_unlock(&pr->pr_mtx);
1549 xp->pr_ip4s = pr->pr_ip4s;
1552 xp->pr_ip6s = pr->pr_ip6s;
1554 p = (char *)(xp + 1);
1556 if (pr->pr_ip4s > 0) {
1557 bcopy(pr->pr_ip4, (struct in_addr *)p,
1558 pr->pr_ip4s * sizeof(struct in_addr));
1559 p += (pr->pr_ip4s * sizeof(struct in_addr));
1563 if (pr->pr_ip6s > 0) {
1564 bcopy(pr->pr_ip6, (struct in6_addr *)p,
1565 pr->pr_ip6s * sizeof(struct in6_addr));
1566 p += (pr->pr_ip6s * sizeof(struct in6_addr));
1569 xp = (struct xprison *)p;
1571 sx_sunlock(&allprison_lock);
1573 error = SYSCTL_OUT(req, sxp, len);
1578 SYSCTL_OID(_security_jail, OID_AUTO, list,
1579 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1580 sysctl_jail_list, "S", "List of active jails");
1583 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
1587 injail = jailed(req->td->td_ucred);
1588 error = SYSCTL_OUT(req, &injail, sizeof(injail));
1592 SYSCTL_PROC(_security_jail, OID_AUTO, jailed,
1593 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1594 sysctl_jail_jailed, "I", "Process in jail?");
1597 DB_SHOW_COMMAND(jails, db_show_jails)
1604 char ip6buf[INET6_ADDRSTRLEN];
1607 #if defined(INET) || defined(INET6)
1612 " JID pr_ref pr_nprocs pr_ip4s pr_ip6s\n");
1614 " Hostname Path\n");
1620 " IP Address(es)\n");
1621 LIST_FOREACH(pr, &allprison, pr_list) {
1622 db_printf("%6d %6d %9d %7d %7d\n",
1623 pr->pr_id, pr->pr_ref, pr->pr_nprocs,
1624 pr->pr_ip4s, pr->pr_ip6s);
1625 db_printf("%6s %-29.29s %.74s\n",
1626 "", pr->pr_host, pr->pr_path);
1627 if (pr->pr_state < 0 || pr->pr_state >= (int)((sizeof(
1628 prison_states) / sizeof(struct prison_state))))
1631 state = prison_states[pr->pr_state].state_name;
1632 db_printf("%6s %-29.29s %.74s\n",
1633 "", (pr->pr_name[0] != '\0') ? pr->pr_name : "", state);
1634 db_printf("%6s %-6d\n",
1635 "", pr->pr_cpuset->cs_id);
1637 for (i=0; i < pr->pr_ip4s; i++) {
1638 ia.s_addr = pr->pr_ip4[i].s_addr;
1639 db_printf("%6s %s\n", "", inet_ntoa(ia));
1643 for (i=0; i < pr->pr_ip6s; i++)
1644 db_printf("%6s %s\n",
1645 "", ip6_sprintf(ip6buf, &pr->pr_ip6[i]));