From c105662cf0c42f1daddcec11ea9f8549f2408178 Mon Sep 17 00:00:00 2001 From: jhb Date: Thu, 30 May 2013 19:24:29 +0000 Subject: [PATCH] MFC 246417,247116,248584: Rework the handling of stop signals in the NFS client. The changes in 195702, 195703, and 195821 prevented a thread from suspending while holding locks inside of NFS by forcing the thread to fail sleeps with EINTR or ERESTART but defer the thread suspension to the user boundary. However, this had the effect that stopping a process during an NFS request could abort the request and trigger EINTR errors that were visible to userland processes (previously the thread would have suspended and completed the request once it was resumed). This change instead effectively masks stop signals while in the NFS client. It uses the existing TDF_SBDRY flag to effect this since SIGSTOP cannot be masked directly. Instead of setting PBDRY on individual sleeps, change the VFS_*() and VOP_*() methods to defer stop signals for filesystems which request this behavior via a new VFCF_SBDRY flag. Note that this has to be a VFC flag rather than a MNTK flag so that it works properly with VFS_MOUNT() when the mount is not yet fully constructed. For now, only the NFS clients set this new flag in VFS_SET(). A few other related changes: - Add an assertion to ensure that TDF_SBDRY doesn't leak to userland. - When a lookup request uses VOP_READLINK() to follow a symlink, mark the request as being on behalf of the thread performing the lookup (cnp_thread) rather than using a NULL thread pointer. This causes NFS to properly handle signals during this VOP on an interruptible mount. - Ignore thread suspend requests due to SIGSTOP if stop signals are currently deferred. This can occur if a process is stopped via SIGSTOP while a thread is running or runnable but before it has set TDF_SBDRY. git-svn-id: svn://svn.freebsd.org/base/stable/8@251148 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- sys/fs/nfs/nfs_commonkrpc.c | 6 +- sys/fs/nfsclient/nfs_clvfsops.c | 2 +- sys/gnu/fs/ext2fs/ext2_alloc.c | 1 + sys/kern/kern_sig.c | 60 +++++++++++--- sys/kern/kern_thread.c | 11 +++ sys/kern/subr_sleepqueue.c | 6 +- sys/kern/subr_trap.c | 2 + sys/kern/vfs_export.c | 1 + sys/kern/vfs_lookup.c | 2 +- sys/nfsclient/nfs_krpc.c | 6 +- sys/nfsclient/nfs_vfsops.c | 2 +- sys/sys/mount.h | 139 +++++++++++++++++++++++++++----- sys/sys/signalvar.h | 2 + sys/tools/vnode_if.awk | 3 + 14 files changed, 197 insertions(+), 46 deletions(-) diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c index 3c6e9b403..d17517f2f 100644 --- a/sys/fs/nfs/nfs_commonkrpc.c +++ b/sys/fs/nfs/nfs_commonkrpc.c @@ -885,7 +885,6 @@ int newnfs_sig_set[] = { SIGTERM, SIGHUP, SIGKILL, - SIGSTOP, SIGQUIT }; @@ -906,7 +905,7 @@ nfs_sig_pending(sigset_t set) /* * The set/restore sigmask functions are used to (temporarily) overwrite - * the process p_sigmask during an RPC call (for example). These are also + * the thread td_sigmask during an RPC call (for example). These are also * used in other places in the NFS client that might tsleep(). */ void @@ -935,8 +934,9 @@ newnfs_set_sigmask(struct thread *td, sigset_t *oldset) SIGDELSET(newset, newnfs_sig_set[i]); } mtx_unlock(&p->p_sigacts->ps_mtx); + kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, + SIGPROCMASK_PROC_LOCKED); PROC_UNLOCK(p); - kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0); } void diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index d6b1d7c54..a14bde0ac 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -130,7 +130,7 @@ static struct vfsops nfs_vfsops = { .vfs_unmount = nfs_unmount, .vfs_sysctl = nfs_sysctl, }; -VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK); +VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK | VFCF_SBDRY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(newnfs, 1); diff --git a/sys/gnu/fs/ext2fs/ext2_alloc.c b/sys/gnu/fs/ext2fs/ext2_alloc.c index 190a47264..d3faf5cac 100644 --- a/sys/gnu/fs/ext2fs/ext2_alloc.c +++ b/sys/gnu/fs/ext2fs/ext2_alloc.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 7533ad6d5..7de35908e 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -2350,6 +2350,13 @@ tdsigwakeup(struct thread *td, int sig, sig_t action, int intrval) return; } + /* + * Don't awaken a sleeping thread for SIGSTOP if the + * STOP signal is deferred. + */ + if ((prop & SA_STOP) && (td->td_flags & TDF_SBDRY)) + goto out; + /* * Give low priority threads a better chance to run. */ @@ -2391,12 +2398,13 @@ sig_suspend_threads(struct thread *td, struct proc *p, int sending) if ((TD_IS_SLEEPING(td2) || TD_IS_SWAPPED(td2)) && (td2->td_flags & TDF_SINTR)) { if (td2->td_flags & TDF_SBDRY) { - if (TD_IS_SUSPENDED(td2)) - wakeup_swapper |= - thread_unsuspend_one(td2); - if (TD_ON_SLEEPQ(td2)) - wakeup_swapper |= - sleepq_abort(td2, ERESTART); + /* + * Once a thread is asleep with + * TDF_SBDRY set, it should never + * become suspended due to this check. + */ + KASSERT(!TD_IS_SUSPENDED(td2), + ("thread with deferred stops suspended")); } else if (!TD_IS_SUSPENDED(td2)) { thread_suspend_one(td2); } @@ -2518,6 +2526,40 @@ tdsigcleanup(struct thread *td) } +/* + * Defer the delivery of SIGSTOP for the current thread. Returns true + * if stops were deferred and false if they were already deferred. + */ +int +sigdeferstop(void) +{ + struct thread *td; + + td = curthread; + if (td->td_flags & TDF_SBDRY) + return (0); + thread_lock(td); + td->td_flags |= TDF_SBDRY; + thread_unlock(td); + return (1); +} + +/* + * Permit the delivery of SIGSTOP for the current thread. This does + * not immediately suspend if a stop was posted. Instead, the thread + * will suspend either via ast() or a subsequent interruptible sleep. + */ +void +sigallowstop() +{ + struct thread *td; + + td = curthread; + thread_lock(td); + td->td_flags &= ~TDF_SBDRY; + thread_unlock(td); +} + /* * If the current process has received a signal (should be caught or cause * termination, should interrupt current syscall), return the signal number. @@ -2550,7 +2592,7 @@ issignal(struct thread *td, int stop_allowed) SIGSETOR(sigpending, p->p_sigqueue.sq_signals); SIGSETNAND(sigpending, td->td_sigmask); - if (p->p_flag & P_PPWAIT) + if (p->p_flag & P_PPWAIT || td->td_flags & TDF_SBDRY) SIG_STOPSIGMASK(sigpending); if (SIGISEMPTY(sigpending)) /* no signal to send */ return (0); @@ -2665,10 +2707,6 @@ issignal(struct thread *td, int stop_allowed) (p->p_pgrp->pg_jobc == 0 && prop & SA_TTYSTOP)) break; /* == ignore */ - - /* Ignore, but do not drop the stop signal. */ - if (stop_allowed != SIG_STOP_ALLOWED) - return (sig); mtx_unlock(&ps->ps_mtx); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.lock_object, "Catching SIGSTOP"); diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index 5cb8276b8..b367f2091 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -741,6 +741,17 @@ thread_suspend_check(int return_instead) (p->p_flag & P_SINGLE_BOUNDARY) && return_instead) return (ERESTART); + /* + * Ignore suspend requests for stop signals if they + * are deferred. + */ + if (P_SHOULDSTOP(p) == P_STOPPED_SIG && + td->td_flags & TDF_SBDRY) { + KASSERT(return_instead, + ("TDF_SBDRY set for unsafe thread_suspend_check")); + return (0); + } + /* If thread will exit, flush its pending signals */ if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) sigqueue_flush(&td->td_sigqueue); diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c index f70f75ea9..ae38e6eed 100644 --- a/sys/kern/subr_sleepqueue.c +++ b/sys/kern/subr_sleepqueue.c @@ -351,8 +351,6 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, if (flags & SLEEPQ_INTERRUPTIBLE) { td->td_flags |= TDF_SINTR; td->td_flags &= ~TDF_SLEEPABORT; - if (flags & SLEEPQ_STOP_ON_BDRY) - td->td_flags |= TDF_SBDRY; } thread_unlock(td); } @@ -591,7 +589,7 @@ sleepq_check_signals(void) /* We are no longer in an interruptible sleep. */ if (td->td_flags & TDF_SINTR) - td->td_flags &= ~(TDF_SINTR | TDF_SBDRY); + td->td_flags &= ~TDF_SINTR; if (td->td_flags & TDF_SLEEPABORT) { td->td_flags &= ~TDF_SLEEPABORT; @@ -738,7 +736,7 @@ sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri) td->td_wmesg = NULL; td->td_wchan = NULL; - td->td_flags &= ~(TDF_SINTR | TDF_SBDRY); + td->td_flags &= ~TDF_SINTR; CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, td->td_name); diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 63b072c68..fc77baa6c 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -134,6 +134,8 @@ userret(struct thread *td, struct trapframe *frame) ("userret: Returning with %d locks held.", td->td_locks)); KASSERT(td->td_vp_reserv == 0, ("userret: Returning while holding vnode reservation")); + KASSERT((td->td_flags & TDF_SBDRY) == 0, + ("userret: Returning with stop signals deferred")); #ifdef VIMAGE /* Unfortunately td_vnet_lpush needs VNET_DEBUG. */ VNET_ASSERT(curvnet == NULL, diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index 114c23ed5..6a3f2916c 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 5797e7a16..48b28e667 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -323,7 +323,7 @@ namei(struct nameidata *ndp) auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; - auio.uio_td = (struct thread *)0; + auio.uio_td = td; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { diff --git a/sys/nfsclient/nfs_krpc.c b/sys/nfsclient/nfs_krpc.c index 51b7b237f..73455382e 100644 --- a/sys/nfsclient/nfs_krpc.c +++ b/sys/nfsclient/nfs_krpc.c @@ -699,7 +699,6 @@ int nfs_sig_set[] = { SIGTERM, SIGHUP, SIGKILL, - SIGSTOP, SIGQUIT }; @@ -720,7 +719,7 @@ nfs_sig_pending(sigset_t set) /* * The set/restore sigmask functions are used to (temporarily) overwrite - * the process p_sigmask during an RPC call (for example). These are also + * the thread td_sigmask during an RPC call (for example). These are also * used in other places in the NFS client that might tsleep(). */ void @@ -749,8 +748,9 @@ nfs_set_sigmask(struct thread *td, sigset_t *oldset) SIGDELSET(newset, nfs_sig_set[i]); } mtx_unlock(&p->p_sigacts->ps_mtx); + kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, + SIGPROCMASK_PROC_LOCKED); PROC_UNLOCK(p); - kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0); } void diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c index b9dc8a13c..5f50dae10 100644 --- a/sys/nfsclient/nfs_vfsops.c +++ b/sys/nfsclient/nfs_vfsops.c @@ -144,7 +144,7 @@ static struct vfsops nfs_vfsops = { .vfs_unmount = nfs_unmount, .vfs_sysctl = nfs_sysctl, }; -VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK); +VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfs, 1); diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 62dd7595d..0d79eba02 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -465,6 +465,7 @@ struct ovfsconf { #define VFCF_UNICODE 0x00200000 /* stores file names as Unicode */ #define VFCF_JAIL 0x00400000 /* can be mounted from within a jail */ #define VFCF_DELEGADMIN 0x00800000 /* supports delegated administration */ +#define VFCF_SBDRY 0x01000000 /* defer stop requests */ typedef uint32_t fsctlop_t; @@ -598,28 +599,6 @@ struct vfsops { vfs_statfs_t __vfs_statfs; -#define VFS_MOUNT(MP) (*(MP)->mnt_op->vfs_mount)(MP) -#define VFS_UNMOUNT(MP, FORCE) (*(MP)->mnt_op->vfs_unmount)(MP, FORCE) -#define VFS_ROOT(MP, FLAGS, VPP) \ - (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP) -#define VFS_QUOTACTL(MP, C, U, A) \ - (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A) -#define VFS_STATFS(MP, SBP) __vfs_statfs((MP), (SBP)) -#define VFS_SYNC(MP, WAIT) (*(MP)->mnt_op->vfs_sync)(MP, WAIT) -#define VFS_VGET(MP, INO, FLAGS, VPP) \ - (*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP) -#define VFS_FHTOVP(MP, FIDP, VPP) \ - (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, VPP) -#define VFS_CHECKEXP(MP, NAM, EXFLG, CRED, NUMSEC, SEC) \ - (*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED, NUMSEC, SEC) -#define VFS_EXTATTRCTL(MP, C, FN, NS, N) \ - (*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N) -#define VFS_SYSCTL(MP, OP, REQ) \ - (*(MP)->mnt_op->vfs_sysctl)(MP, OP, REQ) -#define VFS_SUSP_CLEAN(MP) \ - ({if (*(MP)->mnt_op->vfs_susp_clean != NULL) \ - (*(MP)->mnt_op->vfs_susp_clean)(MP); }) - #define VFS_NEEDSGIANT_(MP) \ ((MP) != NULL && ((MP)->mnt_kern_flag & MNTK_MPSAFE) == 0) @@ -651,6 +630,122 @@ vfs_statfs_t __vfs_statfs; mtx_assert(&Giant, MA_OWNED); \ } while (0) +#define VFS_PROLOGUE(MP) do { \ + int _enable_stops; \ + \ + _enable_stops = ((MP) != NULL && \ + ((MP)->mnt_vfc->vfc_flags & VFCF_SBDRY) && sigdeferstop()) + +#define VFS_EPILOGUE(MP) \ + if (_enable_stops) \ + sigallowstop(); \ +} while (0) + +#define VFS_MOUNT(MP) ({ \ + int _rc; \ + \ + VFS_PROLOGUE(MP); \ + _rc = (*(MP)->mnt_op->vfs_mount)(MP); \ + VFS_EPILOGUE(MP); \ + _rc; }) + +#define VFS_UNMOUNT(MP, FORCE) ({ \ + int _rc; \ + \ + VFS_PROLOGUE(MP); \ + _rc = (*(MP)->mnt_op->vfs_unmount)(MP, FORCE); \ + VFS_EPILOGUE(MP); \ + _rc; }) + +#define VFS_ROOT(MP, FLAGS, VPP) ({ \ + int _rc; \ + \ + VFS_PROLOGUE(MP); \ + _rc = (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP); \ + VFS_EPILOGUE(MP); \ + _rc; }) + +#define VFS_QUOTACTL(MP, C, U, A) ({ \ + int _rc; \ + \ + VFS_PROLOGUE(MP); \ + _rc = (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A); \ + VFS_EPILOGUE(MP); \ + _rc; }) + +#define VFS_STATFS(MP, SBP) ({ \ + int _rc; \ + \ + VFS_PROLOGUE(MP); \ + _rc = __vfs_statfs((MP), (SBP)); \ + VFS_EPILOGUE(MP); \ + _rc; }) + +#define VFS_SYNC(MP, WAIT) ({ \ + int _rc; \ + \ + VFS_PROLOGUE(MP); \ + _rc = (*(MP)->mnt_op->vfs_sync)(MP, WAIT); \ + VFS_EPILOGUE(MP); \ + _rc; }) + +#define VFS_VGET(MP, INO, FLAGS, VPP) ({ \ + int _rc; \ + \ + VFS_PROLOGUE(MP); \ + _rc = (*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP); \ + VFS_EPILOGUE(MP); \ + _rc; }) + +#define VFS_FHTOVP(MP, FIDP, VPP) ({ \ + int _rc; \ + \ + VFS_PROLOGUE(MP); \ + _rc = (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, VPP); \ + VFS_EPILOGUE(MP); \ + _rc; }) + +#define VFS_CHECKEXP(MP, NAM, EXFLG, CRED, NUMSEC, SEC) ({ \ + int _rc; \ + \ + VFS_PROLOGUE(MP); \ + _rc = (*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED, NUMSEC,\ + SEC); \ + VFS_EPILOGUE(MP); \ + _rc; }) + +#define VFS_EXTATTRCTL(MP, C, FN, NS, N) ({ \ + int _rc; \ + \ + VFS_PROLOGUE(MP); \ + _rc = (*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N); \ + VFS_EPILOGUE(MP); \ + _rc; }) + +#define VFS_SYSCTL(MP, OP, REQ) ({ \ + int _rc; \ + \ + VFS_PROLOGUE(MP); \ + _rc = (*(MP)->mnt_op->vfs_sysctl)(MP, OP, REQ); \ + VFS_EPILOGUE(MP); \ + _rc; }) + +#define VFS_SUSP_CLEAN(MP) do { \ + if (*(MP)->mnt_op->vfs_susp_clean != NULL) { \ + VFS_PROLOGUE(MP); \ + (*(MP)->mnt_op->vfs_susp_clean)(MP); \ + VFS_EPILOGUE(MP); \ + } \ +} while (0) + +#define VFS_RECLAIM_LOWERVP(MP, VP) do { \ + if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL) { \ + VFS_PROLOGUE(MP); \ + (*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP)); \ + VFS_EPILOGUE(MP); \ + } \ +} while (0) + #define VFS_KNOTE_LOCKED(vp, hint) do \ { \ if (((vp)->v_vflag & VV_NOKNOTE) == 0) \ diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h index d29890a36..4535e0d1c 100644 --- a/sys/sys/signalvar.h +++ b/sys/sys/signalvar.h @@ -327,6 +327,8 @@ extern int kern_logsigexit; /* Sysctl variable kern.logsigexit */ * Machine-independent functions: */ int cursig(struct thread *td, int stop_allowed); +int sigdeferstop(void); +void sigallowstop(void); void execsigs(struct proc *p); void gsignal(int pgid, int sig, ksiginfo_t *ksi); void killproc(struct proc *p, char *why); diff --git a/sys/tools/vnode_if.awk b/sys/tools/vnode_if.awk index 3fdea0a24..eb2d868cd 100644 --- a/sys/tools/vnode_if.awk +++ b/sys/tools/vnode_if.awk @@ -172,6 +172,7 @@ if (cfile) { "#include \n" \ "#include \n" \ "#include \n" \ + "#include \n" \ "#include \n" \ "#include \n" \ "\n" \ @@ -378,10 +379,12 @@ while ((getline < srcfile) > 0) { for (i = 0; i < numargs; ++i) add_debug_code(name, args[i], "Entry", "\t"); add_pre(name); + printc("\tVFS_PROLOGUE(a->a_" args[0]"->v_mount);") printc("\tif (vop->"name" != NULL)") printc("\t\trc = vop->"name"(a);") printc("\telse") printc("\t\trc = vop->vop_bypass(&a->a_gen);") + printc("\tVFS_EPILOGUE(a->a_" args[0]"->v_mount);") printc(ctrstr); printc("\tSDT_PROBE(vfs, vop, " name ", return, a->a_" args[0] ", a, rc, 0, 0);\n"); printc("\tif (rc == 0) {"); -- 2.45.0