]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_procctl.c
vfs: trylock vnode requeue
[FreeBSD/FreeBSD.git] / sys / kern / kern_procctl.c
1 /*-
2  * Copyright (c) 2014 John Baldwin
3  * Copyright (c) 2014, 2016 The FreeBSD Foundation
4  *
5  * Portions of this software were developed by Konstantin Belousov
6  * under sponsorship from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/_unrhdr.h>
35 #include <sys/systm.h>
36 #include <sys/capsicum.h>
37 #include <sys/lock.h>
38 #include <sys/mman.h>
39 #include <sys/mutex.h>
40 #include <sys/priv.h>
41 #include <sys/proc.h>
42 #include <sys/procctl.h>
43 #include <sys/sx.h>
44 #include <sys/syscallsubr.h>
45 #include <sys/sysproto.h>
46 #include <sys/taskqueue.h>
47 #include <sys/wait.h>
48
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_extern.h>
53
54 static int
55 protect_setchild(struct thread *td, struct proc *p, int flags)
56 {
57
58         PROC_LOCK_ASSERT(p, MA_OWNED);
59         if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0)
60                 return (0);
61         if (flags & PPROT_SET) {
62                 p->p_flag |= P_PROTECTED;
63                 if (flags & PPROT_INHERIT)
64                         p->p_flag2 |= P2_INHERIT_PROTECTED;
65         } else {
66                 p->p_flag &= ~P_PROTECTED;
67                 p->p_flag2 &= ~P2_INHERIT_PROTECTED;
68         }
69         return (1);
70 }
71
72 static int
73 protect_setchildren(struct thread *td, struct proc *top, int flags)
74 {
75         struct proc *p;
76         int ret;
77
78         p = top;
79         ret = 0;
80         sx_assert(&proctree_lock, SX_LOCKED);
81         for (;;) {
82                 ret |= protect_setchild(td, p, flags);
83                 PROC_UNLOCK(p);
84                 /*
85                  * If this process has children, descend to them next,
86                  * otherwise do any siblings, and if done with this level,
87                  * follow back up the tree (but not past top).
88                  */
89                 if (!LIST_EMPTY(&p->p_children))
90                         p = LIST_FIRST(&p->p_children);
91                 else for (;;) {
92                         if (p == top) {
93                                 PROC_LOCK(p);
94                                 return (ret);
95                         }
96                         if (LIST_NEXT(p, p_sibling)) {
97                                 p = LIST_NEXT(p, p_sibling);
98                                 break;
99                         }
100                         p = p->p_pptr;
101                 }
102                 PROC_LOCK(p);
103         }
104 }
105
106 static int
107 protect_set(struct thread *td, struct proc *p, void *data)
108 {
109         int error, flags, ret;
110
111         flags = *(int *)data;
112         switch (PPROT_OP(flags)) {
113         case PPROT_SET:
114         case PPROT_CLEAR:
115                 break;
116         default:
117                 return (EINVAL);
118         }
119
120         if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
121                 return (EINVAL);
122
123         error = priv_check(td, PRIV_VM_MADV_PROTECT);
124         if (error)
125                 return (error);
126
127         if (flags & PPROT_DESCEND)
128                 ret = protect_setchildren(td, p, flags);
129         else
130                 ret = protect_setchild(td, p, flags);
131         if (ret == 0)
132                 return (EPERM);
133         return (0);
134 }
135
136 static int
137 reap_acquire(struct thread *td, struct proc *p, void *data __unused)
138 {
139
140         sx_assert(&proctree_lock, SX_XLOCKED);
141         if (p != td->td_proc)
142                 return (EPERM);
143         if ((p->p_treeflag & P_TREE_REAPER) != 0)
144                 return (EBUSY);
145         p->p_treeflag |= P_TREE_REAPER;
146         /*
147          * We do not reattach existing children and the whole tree
148          * under them to us, since p->p_reaper already seen them.
149          */
150         return (0);
151 }
152
153 static int
154 reap_release(struct thread *td, struct proc *p, void *data __unused)
155 {
156
157         sx_assert(&proctree_lock, SX_XLOCKED);
158         if (p != td->td_proc)
159                 return (EPERM);
160         if (p == initproc)
161                 return (EINVAL);
162         if ((p->p_treeflag & P_TREE_REAPER) == 0)
163                 return (EINVAL);
164         reaper_abandon_children(p, false);
165         return (0);
166 }
167
168 static int
169 reap_status(struct thread *td, struct proc *p, void *data)
170 {
171         struct proc *reap, *p2, *first_p;
172         struct procctl_reaper_status *rs;
173
174         rs = data;
175         sx_assert(&proctree_lock, SX_LOCKED);
176         if ((p->p_treeflag & P_TREE_REAPER) == 0) {
177                 reap = p->p_reaper;
178         } else {
179                 reap = p;
180                 rs->rs_flags |= REAPER_STATUS_OWNED;
181         }
182         if (reap == initproc)
183                 rs->rs_flags |= REAPER_STATUS_REALINIT;
184         rs->rs_reaper = reap->p_pid;
185         rs->rs_descendants = 0;
186         rs->rs_children = 0;
187         if (!LIST_EMPTY(&reap->p_reaplist)) {
188                 first_p = LIST_FIRST(&reap->p_children);
189                 if (first_p == NULL)
190                         first_p = LIST_FIRST(&reap->p_reaplist);
191                 rs->rs_pid = first_p->p_pid;
192                 LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
193                         if (proc_realparent(p2) == reap)
194                                 rs->rs_children++;
195                         rs->rs_descendants++;
196                 }
197         } else {
198                 rs->rs_pid = -1;
199         }
200         return (0);
201 }
202
203 static int
204 reap_getpids(struct thread *td, struct proc *p, void *data)
205 {
206         struct proc *reap, *p2;
207         struct procctl_reaper_pidinfo *pi, *pip;
208         struct procctl_reaper_pids *rp;
209         u_int i, n;
210         int error;
211
212         rp = data;
213         sx_assert(&proctree_lock, SX_LOCKED);
214         PROC_UNLOCK(p);
215         reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
216         n = i = 0;
217         error = 0;
218         LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling)
219                 n++;
220         sx_unlock(&proctree_lock);
221         if (rp->rp_count < n)
222                 n = rp->rp_count;
223         pi = malloc(n * sizeof(*pi), M_TEMP, M_WAITOK);
224         sx_slock(&proctree_lock);
225         LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
226                 if (i == n)
227                         break;
228                 pip = &pi[i];
229                 bzero(pip, sizeof(*pip));
230                 pip->pi_pid = p2->p_pid;
231                 pip->pi_subtree = p2->p_reapsubtree;
232                 pip->pi_flags = REAPER_PIDINFO_VALID;
233                 if (proc_realparent(p2) == reap)
234                         pip->pi_flags |= REAPER_PIDINFO_CHILD;
235                 if ((p2->p_treeflag & P_TREE_REAPER) != 0)
236                         pip->pi_flags |= REAPER_PIDINFO_REAPER;
237                 i++;
238         }
239         sx_sunlock(&proctree_lock);
240         error = copyout(pi, rp->rp_pids, i * sizeof(*pi));
241         free(pi, M_TEMP);
242         sx_slock(&proctree_lock);
243         PROC_LOCK(p);
244         return (error);
245 }
246
247 struct reap_kill_proc_work {
248         struct ucred *cr;
249         struct proc *target;
250         ksiginfo_t *ksi;
251         struct procctl_reaper_kill *rk;
252         int *error;
253         struct task t;
254 };
255
256 static void
257 reap_kill_proc_locked(struct reap_kill_proc_work *w)
258 {
259         int error1;
260         bool need_stop;
261
262         PROC_LOCK_ASSERT(w->target, MA_OWNED);
263         PROC_ASSERT_HELD(w->target);
264
265         error1 = cr_cansignal(w->cr, w->target, w->rk->rk_sig);
266         if (error1 != 0) {
267                 if (*w->error == ESRCH) {
268                         w->rk->rk_fpid = w->target->p_pid;
269                         *w->error = error1;
270                 }
271                 return;
272         }
273
274         /*
275          * The need_stop indicates if the target process needs to be
276          * suspended before being signalled.  This is needed when we
277          * guarantee that all processes in subtree are signalled,
278          * avoiding the race with some process not yet fully linked
279          * into all structures during fork, ignored by iterator, and
280          * then escaping signalling.
281          *
282          * The thread cannot usefully stop itself anyway, and if other
283          * thread of the current process forks while the current
284          * thread signals the whole subtree, it is an application
285          * race.
286          */
287         if ((w->target->p_flag & (P_KPROC | P_SYSTEM | P_STOPPED)) == 0)
288                 need_stop = thread_single(w->target, SINGLE_ALLPROC) == 0;
289         else
290                 need_stop = false;
291
292         (void)pksignal(w->target, w->rk->rk_sig, w->ksi);
293         w->rk->rk_killed++;
294         *w->error = error1;
295
296         if (need_stop)
297                 thread_single_end(w->target, SINGLE_ALLPROC);
298 }
299
300 static void
301 reap_kill_proc_work(void *arg, int pending __unused)
302 {
303         struct reap_kill_proc_work *w;
304
305         w = arg;
306         PROC_LOCK(w->target);
307         if ((w->target->p_flag2 & P2_WEXIT) == 0)
308                 reap_kill_proc_locked(w);
309         PROC_UNLOCK(w->target);
310
311         sx_xlock(&proctree_lock);
312         w->target = NULL;
313         wakeup(&w->target);
314         sx_xunlock(&proctree_lock);
315 }
316
317 struct reap_kill_tracker {
318         struct proc *parent;
319         TAILQ_ENTRY(reap_kill_tracker) link;
320 };
321
322 TAILQ_HEAD(reap_kill_tracker_head, reap_kill_tracker);
323
324 static void
325 reap_kill_sched(struct reap_kill_tracker_head *tracker, struct proc *p2)
326 {
327         struct reap_kill_tracker *t;
328
329         PROC_LOCK(p2);
330         if ((p2->p_flag2 & P2_WEXIT) != 0) {
331                 PROC_UNLOCK(p2);
332                 return;
333         }
334         _PHOLD_LITE(p2);
335         PROC_UNLOCK(p2);
336         t = malloc(sizeof(struct reap_kill_tracker), M_TEMP, M_WAITOK);
337         t->parent = p2;
338         TAILQ_INSERT_TAIL(tracker, t, link);
339 }
340
341 static void
342 reap_kill_sched_free(struct reap_kill_tracker *t)
343 {
344         PRELE(t->parent);
345         free(t, M_TEMP);
346 }
347
348 static void
349 reap_kill_children(struct thread *td, struct proc *reaper,
350     struct procctl_reaper_kill *rk, ksiginfo_t *ksi, int *error)
351 {
352         struct proc *p2;
353         int error1;
354
355         LIST_FOREACH(p2, &reaper->p_children, p_sibling) {
356                 PROC_LOCK(p2);
357                 if ((p2->p_flag2 & P2_WEXIT) == 0) {
358                         error1 = p_cansignal(td, p2, rk->rk_sig);
359                         if (error1 != 0) {
360                                 if (*error == ESRCH) {
361                                         rk->rk_fpid = p2->p_pid;
362                                         *error = error1;
363                                 }
364
365                                 /*
366                                  * Do not end the loop on error,
367                                  * signal everything we can.
368                                  */
369                         } else {
370                                 (void)pksignal(p2, rk->rk_sig, ksi);
371                                 rk->rk_killed++;
372                         }
373                 }
374                 PROC_UNLOCK(p2);
375         }
376 }
377
378 static bool
379 reap_kill_subtree_once(struct thread *td, struct proc *p, struct proc *reaper,
380     struct unrhdr *pids, struct reap_kill_proc_work *w)
381 {
382         struct reap_kill_tracker_head tracker;
383         struct reap_kill_tracker *t;
384         struct proc *p2;
385         int r, xlocked;
386         bool res, st;
387
388         res = false;
389         TAILQ_INIT(&tracker);
390         reap_kill_sched(&tracker, reaper);
391         while ((t = TAILQ_FIRST(&tracker)) != NULL) {
392                 TAILQ_REMOVE(&tracker, t, link);
393
394                 /*
395                  * Since reap_kill_proc() drops proctree_lock sx, it
396                  * is possible that the tracked reaper is no longer.
397                  * In this case the subtree is reparented to the new
398                  * reaper, which should handle it.
399                  */
400                 if ((t->parent->p_treeflag & P_TREE_REAPER) == 0) {
401                         reap_kill_sched_free(t);
402                         res = true;
403                         continue;
404                 }
405
406                 LIST_FOREACH(p2, &t->parent->p_reaplist, p_reapsibling) {
407                         if (t->parent == reaper &&
408                             (w->rk->rk_flags & REAPER_KILL_SUBTREE) != 0 &&
409                             p2->p_reapsubtree != w->rk->rk_subtree)
410                                 continue;
411                         if ((p2->p_treeflag & P_TREE_REAPER) != 0)
412                                 reap_kill_sched(&tracker, p2);
413                         if (alloc_unr_specific(pids, p2->p_pid) != p2->p_pid)
414                                 continue;
415                         if (p2 == td->td_proc) {
416                                 if ((p2->p_flag & P_HADTHREADS) != 0 &&
417                                     (p2->p_flag2 & P2_WEXIT) == 0) {
418                                         xlocked = sx_xlocked(&proctree_lock);
419                                         sx_unlock(&proctree_lock);
420                                         st = true;
421                                 } else {
422                                         st = false;
423                                 }
424                                 PROC_LOCK(p2);
425                                 if (st)
426                                         r = thread_single(p2, SINGLE_NO_EXIT);
427                                 (void)pksignal(p2, w->rk->rk_sig, w->ksi);
428                                 w->rk->rk_killed++;
429                                 if (st && r == 0)
430                                         thread_single_end(p2, SINGLE_NO_EXIT);
431                                 PROC_UNLOCK(p2);
432                                 if (st) {
433                                         if (xlocked)
434                                                 sx_xlock(&proctree_lock);
435                                         else
436                                                 sx_slock(&proctree_lock);
437                                 }
438                         } else {
439                                 PROC_LOCK(p2);
440                                 if ((p2->p_flag2 & P2_WEXIT) == 0) {
441                                         _PHOLD_LITE(p2);
442                                         PROC_UNLOCK(p2);
443                                         w->target = p2;
444                                         taskqueue_enqueue(taskqueue_thread,
445                                             &w->t);
446                                         while (w->target != NULL) {
447                                                 sx_sleep(&w->target,
448                                                     &proctree_lock, PWAIT,
449                                                     "reapst", 0);
450                                         }
451                                         PROC_LOCK(p2);
452                                         _PRELE(p2);
453                                 }
454                                 PROC_UNLOCK(p2);
455                         }
456                         res = true;
457                 }
458                 reap_kill_sched_free(t);
459         }
460         return (res);
461 }
462
463 static void
464 reap_kill_subtree(struct thread *td, struct proc *p, struct proc *reaper,
465     struct reap_kill_proc_work *w)
466 {
467         struct unrhdr pids;
468
469         /*
470          * pids records processes which were already signalled, to
471          * avoid doubling signals to them if iteration needs to be
472          * repeated.
473          */
474         init_unrhdr(&pids, 1, PID_MAX, UNR_NO_MTX);
475         PROC_LOCK(td->td_proc);
476         if ((td->td_proc->p_flag2 & P2_WEXIT) != 0) {
477                 PROC_UNLOCK(td->td_proc);
478                 goto out;
479         }
480         PROC_UNLOCK(td->td_proc);
481         while (reap_kill_subtree_once(td, p, reaper, &pids, w))
482                ;
483 out:
484         clean_unrhdr(&pids);
485         clear_unrhdr(&pids);
486 }
487
488 static bool
489 reap_kill_sapblk(struct thread *td __unused, void *data)
490 {
491         struct procctl_reaper_kill *rk;
492
493         rk = data;
494         return ((rk->rk_flags & REAPER_KILL_CHILDREN) == 0);
495 }
496
497 static int
498 reap_kill(struct thread *td, struct proc *p, void *data)
499 {
500         struct reap_kill_proc_work w;
501         struct proc *reaper;
502         ksiginfo_t ksi;
503         struct procctl_reaper_kill *rk;
504         int error;
505
506         rk = data;
507         sx_assert(&proctree_lock, SX_LOCKED);
508         if (IN_CAPABILITY_MODE(td))
509                 return (ECAPMODE);
510         if (rk->rk_sig <= 0 || rk->rk_sig > _SIG_MAXSIG ||
511             (rk->rk_flags & ~(REAPER_KILL_CHILDREN |
512             REAPER_KILL_SUBTREE)) != 0 || (rk->rk_flags &
513             (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE)) ==
514             (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE))
515                 return (EINVAL);
516         PROC_UNLOCK(p);
517         reaper = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
518         ksiginfo_init(&ksi);
519         ksi.ksi_signo = rk->rk_sig;
520         ksi.ksi_code = SI_USER;
521         ksi.ksi_pid = td->td_proc->p_pid;
522         ksi.ksi_uid = td->td_ucred->cr_ruid;
523         error = ESRCH;
524         rk->rk_killed = 0;
525         rk->rk_fpid = -1;
526         if ((rk->rk_flags & REAPER_KILL_CHILDREN) != 0) {
527                 reap_kill_children(td, reaper, rk, &ksi, &error);
528         } else {
529                 w.cr = crhold(td->td_ucred);
530                 w.ksi = &ksi;
531                 w.rk = rk;
532                 w.error = &error;
533                 TASK_INIT(&w.t, 0, reap_kill_proc_work, &w);
534
535                 /*
536                  * Prevent swapout, since w, ksi, and possibly rk, are
537                  * allocated on the stack.  We sleep in
538                  * reap_kill_subtree_once() waiting for task to
539                  * complete single-threading.
540                  */
541                 PHOLD(td->td_proc);
542
543                 reap_kill_subtree(td, p, reaper, &w);
544                 PRELE(td->td_proc);
545                 crfree(w.cr);
546         }
547         PROC_LOCK(p);
548         return (error);
549 }
550
551 static int
552 trace_ctl(struct thread *td, struct proc *p, void *data)
553 {
554         int state;
555
556         PROC_LOCK_ASSERT(p, MA_OWNED);
557         state = *(int *)data;
558
559         /*
560          * Ktrace changes p_traceflag from or to zero under the
561          * process lock, so the test does not need to acquire ktrace
562          * mutex.
563          */
564         if ((p->p_flag & P_TRACED) != 0 || p->p_traceflag != 0)
565                 return (EBUSY);
566
567         switch (state) {
568         case PROC_TRACE_CTL_ENABLE:
569                 if (td->td_proc != p)
570                         return (EPERM);
571                 p->p_flag2 &= ~(P2_NOTRACE | P2_NOTRACE_EXEC);
572                 break;
573         case PROC_TRACE_CTL_DISABLE_EXEC:
574                 p->p_flag2 |= P2_NOTRACE_EXEC | P2_NOTRACE;
575                 break;
576         case PROC_TRACE_CTL_DISABLE:
577                 if ((p->p_flag2 & P2_NOTRACE_EXEC) != 0) {
578                         KASSERT((p->p_flag2 & P2_NOTRACE) != 0,
579                             ("dandling P2_NOTRACE_EXEC"));
580                         if (td->td_proc != p)
581                                 return (EPERM);
582                         p->p_flag2 &= ~P2_NOTRACE_EXEC;
583                 } else {
584                         p->p_flag2 |= P2_NOTRACE;
585                 }
586                 break;
587         default:
588                 return (EINVAL);
589         }
590         return (0);
591 }
592
593 static int
594 trace_status(struct thread *td, struct proc *p, void *data)
595 {
596         int *status;
597
598         status = data;
599         if ((p->p_flag2 & P2_NOTRACE) != 0) {
600                 KASSERT((p->p_flag & P_TRACED) == 0,
601                     ("%d traced but tracing disabled", p->p_pid));
602                 *status = -1;
603         } else if ((p->p_flag & P_TRACED) != 0) {
604                 *status = p->p_pptr->p_pid;
605         } else {
606                 *status = 0;
607         }
608         return (0);
609 }
610
611 static int
612 trapcap_ctl(struct thread *td, struct proc *p, void *data)
613 {
614         int state;
615
616         PROC_LOCK_ASSERT(p, MA_OWNED);
617         state = *(int *)data;
618
619         switch (state) {
620         case PROC_TRAPCAP_CTL_ENABLE:
621                 p->p_flag2 |= P2_TRAPCAP;
622                 break;
623         case PROC_TRAPCAP_CTL_DISABLE:
624                 p->p_flag2 &= ~P2_TRAPCAP;
625                 break;
626         default:
627                 return (EINVAL);
628         }
629         return (0);
630 }
631
632 static int
633 trapcap_status(struct thread *td, struct proc *p, void *data)
634 {
635         int *status;
636
637         status = data;
638         *status = (p->p_flag2 & P2_TRAPCAP) != 0 ? PROC_TRAPCAP_CTL_ENABLE :
639             PROC_TRAPCAP_CTL_DISABLE;
640         return (0);
641 }
642
643 static int
644 no_new_privs_ctl(struct thread *td, struct proc *p, void *data)
645 {
646         int state;
647
648         PROC_LOCK_ASSERT(p, MA_OWNED);
649         state = *(int *)data;
650
651         if (state != PROC_NO_NEW_PRIVS_ENABLE)
652                 return (EINVAL);
653         p->p_flag2 |= P2_NO_NEW_PRIVS;
654         return (0);
655 }
656
657 static int
658 no_new_privs_status(struct thread *td, struct proc *p, void *data)
659 {
660
661         *(int *)data = (p->p_flag2 & P2_NO_NEW_PRIVS) != 0 ?
662             PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE;
663         return (0);
664 }
665
666 static int
667 protmax_ctl(struct thread *td, struct proc *p, void *data)
668 {
669         int state;
670
671         PROC_LOCK_ASSERT(p, MA_OWNED);
672         state = *(int *)data;
673
674         switch (state) {
675         case PROC_PROTMAX_FORCE_ENABLE:
676                 p->p_flag2 &= ~P2_PROTMAX_DISABLE;
677                 p->p_flag2 |= P2_PROTMAX_ENABLE;
678                 break;
679         case PROC_PROTMAX_FORCE_DISABLE:
680                 p->p_flag2 |= P2_PROTMAX_DISABLE;
681                 p->p_flag2 &= ~P2_PROTMAX_ENABLE;
682                 break;
683         case PROC_PROTMAX_NOFORCE:
684                 p->p_flag2 &= ~(P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE);
685                 break;
686         default:
687                 return (EINVAL);
688         }
689         return (0);
690 }
691
692 static int
693 protmax_status(struct thread *td, struct proc *p, void *data)
694 {
695         int d;
696
697         switch (p->p_flag2 & (P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE)) {
698         case 0:
699                 d = PROC_PROTMAX_NOFORCE;
700                 break;
701         case P2_PROTMAX_ENABLE:
702                 d = PROC_PROTMAX_FORCE_ENABLE;
703                 break;
704         case P2_PROTMAX_DISABLE:
705                 d = PROC_PROTMAX_FORCE_DISABLE;
706                 break;
707         }
708         if (kern_mmap_maxprot(p, PROT_READ) == PROT_READ)
709                 d |= PROC_PROTMAX_ACTIVE;
710         *(int *)data = d;
711         return (0);
712 }
713
714 static int
715 aslr_ctl(struct thread *td, struct proc *p, void *data)
716 {
717         int state;
718
719         PROC_LOCK_ASSERT(p, MA_OWNED);
720         state = *(int *)data;
721
722         switch (state) {
723         case PROC_ASLR_FORCE_ENABLE:
724                 p->p_flag2 &= ~P2_ASLR_DISABLE;
725                 p->p_flag2 |= P2_ASLR_ENABLE;
726                 break;
727         case PROC_ASLR_FORCE_DISABLE:
728                 p->p_flag2 |= P2_ASLR_DISABLE;
729                 p->p_flag2 &= ~P2_ASLR_ENABLE;
730                 break;
731         case PROC_ASLR_NOFORCE:
732                 p->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
733                 break;
734         default:
735                 return (EINVAL);
736         }
737         return (0);
738 }
739
740 static int
741 aslr_status(struct thread *td, struct proc *p, void *data)
742 {
743         struct vmspace *vm;
744         int d;
745
746         switch (p->p_flag2 & (P2_ASLR_ENABLE | P2_ASLR_DISABLE)) {
747         case 0:
748                 d = PROC_ASLR_NOFORCE;
749                 break;
750         case P2_ASLR_ENABLE:
751                 d = PROC_ASLR_FORCE_ENABLE;
752                 break;
753         case P2_ASLR_DISABLE:
754                 d = PROC_ASLR_FORCE_DISABLE;
755                 break;
756         }
757         if ((p->p_flag & P_WEXIT) == 0) {
758                 _PHOLD(p);
759                 PROC_UNLOCK(p);
760                 vm = vmspace_acquire_ref(p);
761                 if (vm != NULL) {
762                         if ((vm->vm_map.flags & MAP_ASLR) != 0)
763                                 d |= PROC_ASLR_ACTIVE;
764                         vmspace_free(vm);
765                 }
766                 PROC_LOCK(p);
767                 _PRELE(p);
768         }
769         *(int *)data = d;
770         return (0);
771 }
772
773 static int
774 stackgap_ctl(struct thread *td, struct proc *p, void *data)
775 {
776         int state;
777
778         PROC_LOCK_ASSERT(p, MA_OWNED);
779         state = *(int *)data;
780
781         if ((state & ~(PROC_STACKGAP_ENABLE | PROC_STACKGAP_DISABLE |
782             PROC_STACKGAP_ENABLE_EXEC | PROC_STACKGAP_DISABLE_EXEC)) != 0)
783                 return (EINVAL);
784         switch (state & (PROC_STACKGAP_ENABLE | PROC_STACKGAP_DISABLE)) {
785         case PROC_STACKGAP_ENABLE:
786                 if ((p->p_flag2 & P2_STKGAP_DISABLE) != 0)
787                         return (EINVAL);
788                 break;
789         case PROC_STACKGAP_DISABLE:
790                 p->p_flag2 |= P2_STKGAP_DISABLE;
791                 break;
792         case 0:
793                 break;
794         default:
795                 return (EINVAL);
796         }
797         switch (state & (PROC_STACKGAP_ENABLE_EXEC |
798             PROC_STACKGAP_DISABLE_EXEC)) {
799         case PROC_STACKGAP_ENABLE_EXEC:
800                 p->p_flag2 &= ~P2_STKGAP_DISABLE_EXEC;
801                 break;
802         case PROC_STACKGAP_DISABLE_EXEC:
803                 p->p_flag2 |= P2_STKGAP_DISABLE_EXEC;
804                 break;
805         case 0:
806                 break;
807         default:
808                 return (EINVAL);
809         }
810         return (0);
811 }
812
813 static int
814 stackgap_status(struct thread *td, struct proc *p, void *data)
815 {
816         int d;
817
818         PROC_LOCK_ASSERT(p, MA_OWNED);
819
820         d = (p->p_flag2 & P2_STKGAP_DISABLE) != 0 ? PROC_STACKGAP_DISABLE :
821             PROC_STACKGAP_ENABLE;
822         d |= (p->p_flag2 & P2_STKGAP_DISABLE_EXEC) != 0 ?
823             PROC_STACKGAP_DISABLE_EXEC : PROC_STACKGAP_ENABLE_EXEC;
824         *(int *)data = d;
825         return (0);
826 }
827
828 static int
829 wxmap_ctl(struct thread *td, struct proc *p, void *data)
830 {
831         struct vmspace *vm;
832         vm_map_t map;
833         int state;
834
835         PROC_LOCK_ASSERT(p, MA_OWNED);
836         if ((p->p_flag & P_WEXIT) != 0)
837                 return (ESRCH);
838         state = *(int *)data;
839
840         switch (state) {
841         case PROC_WX_MAPPINGS_PERMIT:
842                 p->p_flag2 |= P2_WXORX_DISABLE;
843                 _PHOLD(p);
844                 PROC_UNLOCK(p);
845                 vm = vmspace_acquire_ref(p);
846                 if (vm != NULL) {
847                         map = &vm->vm_map;
848                         vm_map_lock(map);
849                         map->flags &= ~MAP_WXORX;
850                         vm_map_unlock(map);
851                         vmspace_free(vm);
852                 }
853                 PROC_LOCK(p);
854                 _PRELE(p);
855                 break;
856         case PROC_WX_MAPPINGS_DISALLOW_EXEC:
857                 p->p_flag2 |= P2_WXORX_ENABLE_EXEC;
858                 break;
859         default:
860                 return (EINVAL);
861         }
862
863         return (0);
864 }
865
866 static int
867 wxmap_status(struct thread *td, struct proc *p, void *data)
868 {
869         struct vmspace *vm;
870         int d;
871
872         PROC_LOCK_ASSERT(p, MA_OWNED);
873         if ((p->p_flag & P_WEXIT) != 0)
874                 return (ESRCH);
875
876         d = 0;
877         if ((p->p_flag2 & P2_WXORX_DISABLE) != 0)
878                 d |= PROC_WX_MAPPINGS_PERMIT;
879         if ((p->p_flag2 & P2_WXORX_ENABLE_EXEC) != 0)
880                 d |= PROC_WX_MAPPINGS_DISALLOW_EXEC;
881         _PHOLD(p);
882         PROC_UNLOCK(p);
883         vm = vmspace_acquire_ref(p);
884         if (vm != NULL) {
885                 if ((vm->vm_map.flags & MAP_WXORX) != 0)
886                         d |= PROC_WXORX_ENFORCE;
887                 vmspace_free(vm);
888         }
889         PROC_LOCK(p);
890         _PRELE(p);
891         *(int *)data = d;
892         return (0);
893 }
894
895 static int
896 pdeathsig_ctl(struct thread *td, struct proc *p, void *data)
897 {
898         int signum;
899
900         signum = *(int *)data;
901         if (p != td->td_proc || (signum != 0 && !_SIG_VALID(signum)))
902                 return (EINVAL);
903         p->p_pdeathsig = signum;
904         return (0);
905 }
906
907 static int
908 pdeathsig_status(struct thread *td, struct proc *p, void *data)
909 {
910         if (p != td->td_proc)
911                 return (EINVAL);
912         *(int *)data = p->p_pdeathsig;
913         return (0);
914 }
915
916 enum {
917         PCTL_SLOCKED,
918         PCTL_XLOCKED,
919         PCTL_UNLOCKED,
920 };
921
922 struct procctl_cmd_info {
923         int lock_tree;
924         bool one_proc : 1;
925         bool esrch_is_einval : 1;
926         bool copyout_on_error : 1;
927         bool no_nonnull_data : 1;
928         bool need_candebug : 1;
929         int copyin_sz;
930         int copyout_sz;
931         int (*exec)(struct thread *, struct proc *, void *);
932         bool (*sapblk)(struct thread *, void *);
933 };
934 static const struct procctl_cmd_info procctl_cmds_info[] = {
935         [PROC_SPROTECT] =
936             { .lock_tree = PCTL_SLOCKED, .one_proc = false,
937               .esrch_is_einval = false, .no_nonnull_data = false,
938               .need_candebug = false,
939               .copyin_sz = sizeof(int), .copyout_sz = 0,
940               .exec = protect_set, .copyout_on_error = false, },
941         [PROC_REAP_ACQUIRE] =
942             { .lock_tree = PCTL_XLOCKED, .one_proc = true,
943               .esrch_is_einval = false, .no_nonnull_data = true,
944               .need_candebug = false,
945               .copyin_sz = 0, .copyout_sz = 0,
946               .exec = reap_acquire, .copyout_on_error = false, },
947         [PROC_REAP_RELEASE] =
948             { .lock_tree = PCTL_XLOCKED, .one_proc = true,
949               .esrch_is_einval = false, .no_nonnull_data = true,
950               .need_candebug = false,
951               .copyin_sz = 0, .copyout_sz = 0,
952               .exec = reap_release, .copyout_on_error = false, },
953         [PROC_REAP_STATUS] =
954             { .lock_tree = PCTL_SLOCKED, .one_proc = true,
955               .esrch_is_einval = false, .no_nonnull_data = false,
956               .need_candebug = false,
957               .copyin_sz = 0,
958               .copyout_sz = sizeof(struct procctl_reaper_status),
959               .exec = reap_status, .copyout_on_error = false, },
960         [PROC_REAP_GETPIDS] =
961             { .lock_tree = PCTL_SLOCKED, .one_proc = true,
962               .esrch_is_einval = false, .no_nonnull_data = false,
963               .need_candebug = false,
964               .copyin_sz = sizeof(struct procctl_reaper_pids),
965               .copyout_sz = 0,
966               .exec = reap_getpids, .copyout_on_error = false, },
967         [PROC_REAP_KILL] =
968             { .lock_tree = PCTL_SLOCKED, .one_proc = true,
969               .esrch_is_einval = false, .no_nonnull_data = false,
970               .need_candebug = false,
971               .copyin_sz = sizeof(struct procctl_reaper_kill),
972               .copyout_sz = sizeof(struct procctl_reaper_kill),
973               .exec = reap_kill, .copyout_on_error = true,
974               .sapblk = reap_kill_sapblk, },
975         [PROC_TRACE_CTL] =
976             { .lock_tree = PCTL_SLOCKED, .one_proc = false,
977               .esrch_is_einval = false, .no_nonnull_data = false,
978               .need_candebug = true,
979               .copyin_sz = sizeof(int), .copyout_sz = 0,
980               .exec = trace_ctl, .copyout_on_error = false, },
981         [PROC_TRACE_STATUS] =
982             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
983               .esrch_is_einval = false, .no_nonnull_data = false,
984               .need_candebug = false,
985               .copyin_sz = 0, .copyout_sz = sizeof(int),
986               .exec = trace_status, .copyout_on_error = false, },
987         [PROC_TRAPCAP_CTL] =
988             { .lock_tree = PCTL_SLOCKED, .one_proc = false,
989               .esrch_is_einval = false, .no_nonnull_data = false,
990               .need_candebug = true,
991               .copyin_sz = sizeof(int), .copyout_sz = 0,
992               .exec = trapcap_ctl, .copyout_on_error = false, },
993         [PROC_TRAPCAP_STATUS] =
994             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
995               .esrch_is_einval = false, .no_nonnull_data = false,
996               .need_candebug = false,
997               .copyin_sz = 0, .copyout_sz = sizeof(int),
998               .exec = trapcap_status, .copyout_on_error = false, },
999         [PROC_PDEATHSIG_CTL] =
1000             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1001               .esrch_is_einval = true, .no_nonnull_data = false,
1002               .need_candebug = false,
1003               .copyin_sz = sizeof(int), .copyout_sz = 0,
1004               .exec = pdeathsig_ctl, .copyout_on_error = false, },
1005         [PROC_PDEATHSIG_STATUS] =
1006             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1007               .esrch_is_einval = true, .no_nonnull_data = false,
1008               .need_candebug = false,
1009               .copyin_sz = 0, .copyout_sz = sizeof(int),
1010               .exec = pdeathsig_status, .copyout_on_error = false, },
1011         [PROC_ASLR_CTL] =
1012             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1013               .esrch_is_einval = false, .no_nonnull_data = false,
1014               .need_candebug = true,
1015               .copyin_sz = sizeof(int), .copyout_sz = 0,
1016               .exec = aslr_ctl, .copyout_on_error = false, },
1017         [PROC_ASLR_STATUS] =
1018             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1019               .esrch_is_einval = false, .no_nonnull_data = false,
1020               .need_candebug = false,
1021               .copyin_sz = 0, .copyout_sz = sizeof(int),
1022               .exec = aslr_status, .copyout_on_error = false, },
1023         [PROC_PROTMAX_CTL] =
1024             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1025               .esrch_is_einval = false, .no_nonnull_data = false,
1026               .need_candebug = true,
1027               .copyin_sz = sizeof(int), .copyout_sz = 0,
1028               .exec = protmax_ctl, .copyout_on_error = false, },
1029         [PROC_PROTMAX_STATUS] =
1030             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1031               .esrch_is_einval = false, .no_nonnull_data = false,
1032               .need_candebug = false,
1033               .copyin_sz = 0, .copyout_sz = sizeof(int),
1034               .exec = protmax_status, .copyout_on_error = false, },
1035         [PROC_STACKGAP_CTL] =
1036             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1037               .esrch_is_einval = false, .no_nonnull_data = false,
1038               .need_candebug = true,
1039               .copyin_sz = sizeof(int), .copyout_sz = 0,
1040               .exec = stackgap_ctl, .copyout_on_error = false, },
1041         [PROC_STACKGAP_STATUS] =
1042             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1043               .esrch_is_einval = false, .no_nonnull_data = false,
1044               .need_candebug = false,
1045               .copyin_sz = 0, .copyout_sz = sizeof(int),
1046               .exec = stackgap_status, .copyout_on_error = false, },
1047         [PROC_NO_NEW_PRIVS_CTL] =
1048             { .lock_tree = PCTL_SLOCKED, .one_proc = true,
1049               .esrch_is_einval = false, .no_nonnull_data = false,
1050               .need_candebug = true,
1051               .copyin_sz = sizeof(int), .copyout_sz = 0,
1052               .exec = no_new_privs_ctl, .copyout_on_error = false, },
1053         [PROC_NO_NEW_PRIVS_STATUS] =
1054             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1055               .esrch_is_einval = false, .no_nonnull_data = false,
1056               .need_candebug = false,
1057               .copyin_sz = 0, .copyout_sz = sizeof(int),
1058               .exec = no_new_privs_status, .copyout_on_error = false, },
1059         [PROC_WXMAP_CTL] =
1060             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1061               .esrch_is_einval = false, .no_nonnull_data = false,
1062               .need_candebug = true,
1063               .copyin_sz = sizeof(int), .copyout_sz = 0,
1064               .exec = wxmap_ctl, .copyout_on_error = false, },
1065         [PROC_WXMAP_STATUS] =
1066             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1067               .esrch_is_einval = false, .no_nonnull_data = false,
1068               .need_candebug = false,
1069               .copyin_sz = 0, .copyout_sz = sizeof(int),
1070               .exec = wxmap_status, .copyout_on_error = false, },
1071 };
1072
1073 int
1074 sys_procctl(struct thread *td, struct procctl_args *uap)
1075 {
1076         union {
1077                 struct procctl_reaper_status rs;
1078                 struct procctl_reaper_pids rp;
1079                 struct procctl_reaper_kill rk;
1080                 int flags;
1081         } x;
1082         const struct procctl_cmd_info *cmd_info;
1083         int error, error1;
1084
1085         if (uap->com >= PROC_PROCCTL_MD_MIN)
1086                 return (cpu_procctl(td, uap->idtype, uap->id,
1087                     uap->com, uap->data));
1088         if (uap->com == 0 || uap->com >= nitems(procctl_cmds_info))
1089                 return (EINVAL);
1090         cmd_info = &procctl_cmds_info[uap->com];
1091         bzero(&x, sizeof(x));
1092
1093         if (cmd_info->copyin_sz > 0) {
1094                 error = copyin(uap->data, &x, cmd_info->copyin_sz);
1095                 if (error != 0)
1096                         return (error);
1097         } else if (cmd_info->no_nonnull_data && uap->data != NULL) {
1098                 return (EINVAL);
1099         }
1100
1101         error = kern_procctl(td, uap->idtype, uap->id, uap->com, &x);
1102
1103         if (cmd_info->copyout_sz > 0 && (error == 0 ||
1104             cmd_info->copyout_on_error)) {
1105                 error1 = copyout(&x, uap->data, cmd_info->copyout_sz);
1106                 if (error == 0)
1107                         error = error1;
1108         }
1109         return (error);
1110 }
1111
1112 static int
1113 kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
1114 {
1115
1116         PROC_LOCK_ASSERT(p, MA_OWNED);
1117         return (procctl_cmds_info[com].exec(td, p, data));
1118 }
1119
1120 int
1121 kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
1122 {
1123         struct pgrp *pg;
1124         struct proc *p;
1125         const struct procctl_cmd_info *cmd_info;
1126         int error, first_error, ok;
1127         bool sapblk;
1128
1129         MPASS(com > 0 && com < nitems(procctl_cmds_info));
1130         cmd_info = &procctl_cmds_info[com];
1131         if (idtype != P_PID && cmd_info->one_proc)
1132                 return (EINVAL);
1133
1134         sapblk = false;
1135         if (cmd_info->sapblk != NULL) {
1136                 sapblk = cmd_info->sapblk(td, data);
1137                 if (sapblk && !stop_all_proc_block())
1138                         return (ERESTART);
1139         }
1140
1141         switch (cmd_info->lock_tree) {
1142         case PCTL_XLOCKED:
1143                 sx_xlock(&proctree_lock);
1144                 break;
1145         case PCTL_SLOCKED:
1146                 sx_slock(&proctree_lock);
1147                 break;
1148         default:
1149                 break;
1150         }
1151
1152         switch (idtype) {
1153         case P_PID:
1154                 if (id == 0) {
1155                         p = td->td_proc;
1156                         error = 0;
1157                         PROC_LOCK(p);
1158                 } else {
1159                         p = pfind(id);
1160                         if (p == NULL) {
1161                                 error = cmd_info->esrch_is_einval ?
1162                                     EINVAL : ESRCH;
1163                                 break;
1164                         }
1165                         error = cmd_info->need_candebug ? p_candebug(td, p) :
1166                             p_cansee(td, p);
1167                 }
1168                 if (error == 0)
1169                         error = kern_procctl_single(td, p, com, data);
1170                 PROC_UNLOCK(p);
1171                 break;
1172         case P_PGID:
1173                 /*
1174                  * Attempt to apply the operation to all members of the
1175                  * group.  Ignore processes in the group that can't be
1176                  * seen.  Ignore errors so long as at least one process is
1177                  * able to complete the request successfully.
1178                  */
1179                 pg = pgfind(id);
1180                 if (pg == NULL) {
1181                         error = ESRCH;
1182                         break;
1183                 }
1184                 PGRP_UNLOCK(pg);
1185                 ok = 0;
1186                 first_error = 0;
1187                 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1188                         PROC_LOCK(p);
1189                         if (p->p_state == PRS_NEW ||
1190                             p->p_state == PRS_ZOMBIE ||
1191                             (cmd_info->need_candebug ? p_candebug(td, p) :
1192                             p_cansee(td, p)) != 0) {
1193                                 PROC_UNLOCK(p);
1194                                 continue;
1195                         }
1196                         error = kern_procctl_single(td, p, com, data);
1197                         PROC_UNLOCK(p);
1198                         if (error == 0)
1199                                 ok = 1;
1200                         else if (first_error == 0)
1201                                 first_error = error;
1202                 }
1203                 if (ok)
1204                         error = 0;
1205                 else if (first_error != 0)
1206                         error = first_error;
1207                 else
1208                         /*
1209                          * Was not able to see any processes in the
1210                          * process group.
1211                          */
1212                         error = ESRCH;
1213                 break;
1214         default:
1215                 error = EINVAL;
1216                 break;
1217         }
1218
1219         switch (cmd_info->lock_tree) {
1220         case PCTL_XLOCKED:
1221                 sx_xunlock(&proctree_lock);
1222                 break;
1223         case PCTL_SLOCKED:
1224                 sx_sunlock(&proctree_lock);
1225                 break;
1226         default:
1227                 break;
1228         }
1229         if (sapblk)
1230                 stop_all_proc_unblock();
1231         return (error);
1232 }