From 34f8675c0e53a620591c663817300d58240d508a Mon Sep 17 00:00:00 2001 From: mjg Date: Wed, 15 Oct 2014 16:54:18 +0000 Subject: [PATCH] MFC r273109: fget_unlocked currently reads 'fde' which is a structure consisting of serveral fields. In effect the read is inatomic and may result in obtaining file pointer with stale or incorrect capabilities. Example race is with dup2. Side effect is that capability checks can be circumvented. Fix the problem with introduction of sequence counters. MFC r269023,r272503,r272505,r272523,r272567,r272569,r272574: Prepare fget_unlocked for reading fd table only once. Some capsicum functions accept fdp + fd and lookup fde based on that. Add variants which accept fde. =============================== Add sequence counters with memory barriers. Current implementation is somewhat simplistic and hackish, will be improved later after possible memory barrier overhaul. =============================== Plug capability races. fp and appropriate capability lookups were not atomic, which could result in improper capabilities being checked. This could result either in protection bypass or in a spurious ENOTCAPABLE. Make fp + capability check atomic with the help of sequence counters. =============================== Put and #ifdef _KERNEL around the #include for opt_capsicum.h to hopefully allow the build to finish after r272505. =============================== filedesc: fix up breakage introduced in 272505 Include sequence counter supports incoditionally [1]. This fixes reprted build problems with e.g. nvidia driver due to missing opt_capsicum.h. Replace fishy looking sizeof with offsetof. Make fde_seq the last member in order to simplify calculations. =============================== Keep struct filedescent comments within 80-char limit. =============================== seq_t needs to be visible to userspace Approved by: re (kib) git-svn-id: svn://svn.freebsd.org/base/releng/10.1@273137 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- sys/kern/kern_descrip.c | 71 +++++++++++++++--- sys/kern/sys_capability.c | 27 +++++-- sys/sys/capability.h | 3 + sys/sys/filedesc.h | 10 ++- sys/sys/seq.h | 146 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 239 insertions(+), 18 deletions(-) create mode 100644 sys/sys/seq.h diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 14a2fcfc0..dcef6531a 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -304,11 +304,18 @@ _fdfree(struct filedesc *fdp, int fd, int last) struct filedescent *fde; fde = &fdp->fd_ofiles[fd]; +#ifdef CAPABILITIES + if (!last) + seq_write_begin(&fde->fde_seq); +#endif filecaps_free(&fde->fde_caps); if (last) return; - bzero(fde, sizeof(*fde)); + bzero(fde, fde_change_size); fdunused(fdp, fd); +#ifdef CAPABILITIES + seq_write_end(&fde->fde_seq); +#endif } static inline void @@ -899,13 +906,19 @@ do_dup(struct thread *td, int flags, int old, int new, /* * Duplicate the source descriptor. */ +#ifdef CAPABILITIES + seq_write_begin(&newfde->fde_seq); +#endif filecaps_free(&newfde->fde_caps); - *newfde = *oldfde; + memcpy(newfde, oldfde, fde_change_size); filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps); if ((flags & DUP_CLOEXEC) != 0) newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; else newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE; +#ifdef CAPABILITIES + seq_write_end(&newfde->fde_seq); +#endif *retval = new; if (delfp != NULL) { @@ -1804,6 +1817,9 @@ finstall(struct thread *td, struct file *fp, int *fd, int flags, } fhold(fp); fde = &fdp->fd_ofiles[*fd]; +#ifdef CAPABILITIES + seq_write_begin(&fde->fde_seq); +#endif fde->fde_file = fp; if ((flags & O_CLOEXEC) != 0) fde->fde_flags |= UF_EXCLOSE; @@ -1811,6 +1827,9 @@ finstall(struct thread *td, struct file *fp, int *fd, int flags, filecaps_move(fcaps, &fde->fde_caps); else filecaps_fill(&fde->fde_caps); +#ifdef CAPABILITIES + seq_write_end(&fde->fde_seq); +#endif FILEDESC_XUNLOCK(fdp); return (0); } @@ -2336,9 +2355,13 @@ int fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, int needfcntl, struct file **fpp, cap_rights_t *haverightsp) { +#ifdef CAPABILITIES + struct filedescent fde; +#endif struct file *fp; u_int count; #ifdef CAPABILITIES + seq_t seq; cap_rights_t haverights; int error; #endif @@ -2358,17 +2381,27 @@ fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, * due to preemption. */ for (;;) { +#ifdef CAPABILITIES + seq = seq_read(fd_seq(fdp, fd)); + fde = fdp->fd_ofiles[fd]; + if (!seq_consistent(fd_seq(fdp, fd), seq)) { + cpu_spinwait(); + continue; + } + fp = fde.fde_file; +#else fp = fdp->fd_ofiles[fd].fde_file; +#endif if (fp == NULL) return (EBADF); #ifdef CAPABILITIES - haverights = *cap_rights(fdp, fd); + haverights = *cap_rights_fde(&fde); if (needrightsp != NULL) { error = cap_check(&haverights, needrightsp); if (error != 0) return (error); if (cap_rights_is_set(needrightsp, CAP_FCNTL)) { - error = cap_fcntl_check(fdp, fd, needfcntl); + error = cap_fcntl_check_fde(&fde, needfcntl); if (error != 0) return (error); } @@ -2383,7 +2416,11 @@ fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, */ if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1) continue; +#ifdef CAPABILITIES + if (seq_consistent_nomb(fd_seq(fdp, fd), seq)) +#else if (fp == fdp->fd_ofiles[fd].fde_file) +#endif break; fdrop(fp, curthread); } @@ -2740,6 +2777,7 @@ int dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp) { + struct filedescent *newfde, *oldfde; struct file *fp; int error, indx; @@ -2783,17 +2821,32 @@ dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, return (EACCES); } fhold(fp); - fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; - filecaps_copy(&fdp->fd_ofiles[dfd].fde_caps, - &fdp->fd_ofiles[indx].fde_caps); + newfde = &fdp->fd_ofiles[indx]; + oldfde = &fdp->fd_ofiles[dfd]; +#ifdef CAPABILITIES + seq_write_begin(&newfde->fde_seq); +#endif + memcpy(newfde, oldfde, fde_change_size); + filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps); +#ifdef CAPABILITIES + seq_write_end(&newfde->fde_seq); +#endif break; case ENXIO: /* * Steal away the file pointer from dfd and stuff it into indx. */ - fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; - bzero(&fdp->fd_ofiles[dfd], sizeof(fdp->fd_ofiles[dfd])); + newfde = &fdp->fd_ofiles[indx]; + oldfde = &fdp->fd_ofiles[dfd]; +#ifdef CAPABILITIES + seq_write_begin(&newfde->fde_seq); +#endif + memcpy(newfde, oldfde, fde_change_size); + bzero(oldfde, fde_change_size); fdunused(fdp, dfd); +#ifdef CAPABILITIES + seq_write_end(&newfde->fde_seq); +#endif break; } FILEDESC_XUNLOCK(fdp); diff --git a/sys/kern/sys_capability.c b/sys/kern/sys_capability.c index 7a8201796..44a195a9e 100644 --- a/sys/kern/sys_capability.c +++ b/sys/kern/sys_capability.c @@ -199,11 +199,19 @@ cap_rights_to_vmprot(cap_rights_t *havep) * any other way, as we want to keep all capability permission evaluation in * this one file. */ + +cap_rights_t * +cap_rights_fde(struct filedescent *fde) +{ + + return (&fde->fde_rights); +} + cap_rights_t * cap_rights(struct filedesc *fdp, int fd) { - return (&fdp->fd_ofiles[fd].fde_rights); + return (cap_rights_fde(&fdp->fd_ofiles[fd])); } /* @@ -486,23 +494,30 @@ out: * Test whether a capability grants the given fcntl command. */ int -cap_fcntl_check(struct filedesc *fdp, int fd, int cmd) +cap_fcntl_check_fde(struct filedescent *fde, int cmd) { uint32_t fcntlcap; - KASSERT(fd >= 0 && fd < fdp->fd_nfiles, - ("%s: invalid fd=%d", __func__, fd)); - fcntlcap = (1 << cmd); KASSERT((CAP_FCNTL_ALL & fcntlcap) != 0, ("Unsupported fcntl=%d.", cmd)); - if ((fdp->fd_ofiles[fd].fde_fcntls & fcntlcap) != 0) + if ((fde->fde_fcntls & fcntlcap) != 0) return (0); return (ENOTCAPABLE); } +int +cap_fcntl_check(struct filedesc *fdp, int fd, int cmd) +{ + + KASSERT(fd >= 0 && fd < fdp->fd_nfiles, + ("%s: invalid fd=%d", __func__, fd)); + + return (cap_fcntl_check_fde(&fdp->fd_ofiles[fd], cmd)); +} + int sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { diff --git a/sys/sys/capability.h b/sys/sys/capability.h index fcde6c165..d982f8f1a 100644 --- a/sys/sys/capability.h +++ b/sys/sys/capability.h @@ -343,6 +343,7 @@ bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little); #define IN_CAPABILITY_MODE(td) ((td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) != 0) struct filedesc; +struct filedescent; /* * Test whether a capability grants the requested rights. @@ -357,9 +358,11 @@ u_char cap_rights_to_vmprot(cap_rights_t *havep); * For the purposes of procstat(1) and similar tools, allow kern_descrip.c to * extract the rights from a capability. */ +cap_rights_t *cap_rights_fde(struct filedescent *fde); cap_rights_t *cap_rights(struct filedesc *fdp, int fd); int cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd); +int cap_fcntl_check_fde(struct filedescent *fde, int cmd); int cap_fcntl_check(struct filedesc *fdp, int fd, int cmd); #else /* !_KERNEL */ diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index e1d236306..ca980ef08 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -50,14 +51,16 @@ struct filecaps { }; struct filedescent { - struct file *fde_file; /* file structure for open file */ - struct filecaps fde_caps; /* per-descriptor rights */ - uint8_t fde_flags; /* per-process open file flags */ + struct file *fde_file; /* file structure for open file */ + struct filecaps fde_caps; /* per-descriptor rights */ + uint8_t fde_flags; /* per-process open file flags */ + seq_t fde_seq; /* keep file and caps in sync */ }; #define fde_rights fde_caps.fc_rights #define fde_fcntls fde_caps.fc_fcntls #define fde_ioctls fde_caps.fc_ioctls #define fde_nioctls fde_caps.fc_nioctls +#define fde_change_size (offsetof(struct filedescent, fde_seq)) /* * This structure is used for the management of descriptors. It may be @@ -82,6 +85,7 @@ struct filedesc { int fd_holdleaderscount; /* block fdfree() for shared close() */ int fd_holdleaderswakeup; /* fdfree() needs wakeup */ }; +#define fd_seq(fdp, fd) (&(fdp)->fd_ofiles[(fd)].fde_seq) /* * Structure to keep track of (process leader, struct fildedesc) tuples. diff --git a/sys/sys/seq.h b/sys/sys/seq.h new file mode 100644 index 000000000..799b3a994 --- /dev/null +++ b/sys/sys/seq.h @@ -0,0 +1,146 @@ +/*- + * Copyright (c) 2014 Mateusz Guzik + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_SEQ_H_ +#define _SYS_SEQ_H_ + +#ifdef _KERNEL +#include +#endif +#include + +/* + * seq_t may be included in structs visible to userspace + */ +typedef uint32_t seq_t; + +#ifdef _KERNEL + +/* + * Typical usage: + * + * writers: + * lock_exclusive(&obj->lock); + * seq_write_begin(&obj->seq); + * ..... + * seq_write_end(&obj->seq); + * unlock_exclusive(&obj->unlock); + * + * readers: + * obj_t lobj; + * seq_t seq; + * + * for (;;) { + * seq = seq_read(&gobj->seq); + * lobj = gobj; + * if (seq_consistent(&gobj->seq, seq)) + * break; + * cpu_spinwait(); + * } + * foo(lobj); + */ + +/* A hack to get MPASS macro */ +#include + +#include + +/* + * This is a temporary hack until memory barriers are cleaned up. + * + * atomic_load_acq_int at least on amd64 provides a full memory barrier, + * in a way which affects perforance. + * + * Hack below covers all architectures and avoids most of the penalty at least + * on amd64. + */ +static __inline int +atomic_load_acq_rmb_int(volatile u_int *p) +{ + volatile u_int v; + + v = *p; + atomic_load_acq_int(&v); + return (v); +} + +static __inline bool +seq_in_modify(seq_t seqp) +{ + + return (seqp & 1); +} + +static __inline void +seq_write_begin(seq_t *seqp) +{ + + MPASS(!seq_in_modify(*seqp)); + atomic_add_acq_int(seqp, 1); +} + +static __inline void +seq_write_end(seq_t *seqp) +{ + + atomic_add_rel_int(seqp, 1); + MPASS(!seq_in_modify(*seqp)); +} + +static __inline seq_t +seq_read(seq_t *seqp) +{ + seq_t ret; + + for (;;) { + ret = atomic_load_acq_rmb_int(seqp); + if (seq_in_modify(ret)) { + cpu_spinwait(); + continue; + } + break; + } + + return (ret); +} + +static __inline seq_t +seq_consistent(seq_t *seqp, seq_t oldseq) +{ + + return (atomic_load_acq_rmb_int(seqp) == oldseq); +} + +static __inline seq_t +seq_consistent_nomb(seq_t *seqp, seq_t oldseq) +{ + + return (*seqp == oldseq); +} + +#endif /* _KERNEL */ +#endif /* _SYS_SEQ_H_ */ -- 2.45.0