]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_procctl.c
zfs: merge openzfs/zfs@8e8acabdc
[FreeBSD/FreeBSD.git] / sys / kern / kern_procctl.c
1 /*-
2  * Copyright (c) 2014 John Baldwin
3  * Copyright (c) 2014, 2016 The FreeBSD Foundation
4  *
5  * Portions of this software were developed by Konstantin Belousov
6  * under sponsorship from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/_unrhdr.h>
35 #include <sys/systm.h>
36 #include <sys/capsicum.h>
37 #include <sys/lock.h>
38 #include <sys/mman.h>
39 #include <sys/mutex.h>
40 #include <sys/priv.h>
41 #include <sys/proc.h>
42 #include <sys/procctl.h>
43 #include <sys/sx.h>
44 #include <sys/syscallsubr.h>
45 #include <sys/sysproto.h>
46 #include <sys/taskqueue.h>
47 #include <sys/wait.h>
48
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_extern.h>
53
54 static int
55 protect_setchild(struct thread *td, struct proc *p, int flags)
56 {
57
58         PROC_LOCK_ASSERT(p, MA_OWNED);
59         if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0)
60                 return (0);
61         if (flags & PPROT_SET) {
62                 p->p_flag |= P_PROTECTED;
63                 if (flags & PPROT_INHERIT)
64                         p->p_flag2 |= P2_INHERIT_PROTECTED;
65         } else {
66                 p->p_flag &= ~P_PROTECTED;
67                 p->p_flag2 &= ~P2_INHERIT_PROTECTED;
68         }
69         return (1);
70 }
71
72 static int
73 protect_setchildren(struct thread *td, struct proc *top, int flags)
74 {
75         struct proc *p;
76         int ret;
77
78         p = top;
79         ret = 0;
80         sx_assert(&proctree_lock, SX_LOCKED);
81         for (;;) {
82                 ret |= protect_setchild(td, p, flags);
83                 PROC_UNLOCK(p);
84                 /*
85                  * If this process has children, descend to them next,
86                  * otherwise do any siblings, and if done with this level,
87                  * follow back up the tree (but not past top).
88                  */
89                 if (!LIST_EMPTY(&p->p_children))
90                         p = LIST_FIRST(&p->p_children);
91                 else for (;;) {
92                         if (p == top) {
93                                 PROC_LOCK(p);
94                                 return (ret);
95                         }
96                         if (LIST_NEXT(p, p_sibling)) {
97                                 p = LIST_NEXT(p, p_sibling);
98                                 break;
99                         }
100                         p = p->p_pptr;
101                 }
102                 PROC_LOCK(p);
103         }
104 }
105
106 static int
107 protect_set(struct thread *td, struct proc *p, void *data)
108 {
109         int error, flags, ret;
110
111         flags = *(int *)data;
112         switch (PPROT_OP(flags)) {
113         case PPROT_SET:
114         case PPROT_CLEAR:
115                 break;
116         default:
117                 return (EINVAL);
118         }
119
120         if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
121                 return (EINVAL);
122
123         error = priv_check(td, PRIV_VM_MADV_PROTECT);
124         if (error)
125                 return (error);
126
127         if (flags & PPROT_DESCEND)
128                 ret = protect_setchildren(td, p, flags);
129         else
130                 ret = protect_setchild(td, p, flags);
131         if (ret == 0)
132                 return (EPERM);
133         return (0);
134 }
135
136 static int
137 reap_acquire(struct thread *td, struct proc *p, void *data __unused)
138 {
139
140         sx_assert(&proctree_lock, SX_XLOCKED);
141         if (p != td->td_proc)
142                 return (EPERM);
143         if ((p->p_treeflag & P_TREE_REAPER) != 0)
144                 return (EBUSY);
145         p->p_treeflag |= P_TREE_REAPER;
146         /*
147          * We do not reattach existing children and the whole tree
148          * under them to us, since p->p_reaper already seen them.
149          */
150         return (0);
151 }
152
153 static int
154 reap_release(struct thread *td, struct proc *p, void *data __unused)
155 {
156
157         sx_assert(&proctree_lock, SX_XLOCKED);
158         if (p != td->td_proc)
159                 return (EPERM);
160         if (p == initproc)
161                 return (EINVAL);
162         if ((p->p_treeflag & P_TREE_REAPER) == 0)
163                 return (EINVAL);
164         reaper_abandon_children(p, false);
165         return (0);
166 }
167
168 static int
169 reap_status(struct thread *td, struct proc *p, void *data)
170 {
171         struct proc *reap, *p2, *first_p;
172         struct procctl_reaper_status *rs;
173
174         rs = data;
175         sx_assert(&proctree_lock, SX_LOCKED);
176         if ((p->p_treeflag & P_TREE_REAPER) == 0) {
177                 reap = p->p_reaper;
178         } else {
179                 reap = p;
180                 rs->rs_flags |= REAPER_STATUS_OWNED;
181         }
182         if (reap == initproc)
183                 rs->rs_flags |= REAPER_STATUS_REALINIT;
184         rs->rs_reaper = reap->p_pid;
185         rs->rs_descendants = 0;
186         rs->rs_children = 0;
187         if (!LIST_EMPTY(&reap->p_reaplist)) {
188                 first_p = LIST_FIRST(&reap->p_children);
189                 if (first_p == NULL)
190                         first_p = LIST_FIRST(&reap->p_reaplist);
191                 rs->rs_pid = first_p->p_pid;
192                 LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
193                         if (proc_realparent(p2) == reap)
194                                 rs->rs_children++;
195                         rs->rs_descendants++;
196                 }
197         } else {
198                 rs->rs_pid = -1;
199         }
200         return (0);
201 }
202
203 static int
204 reap_getpids(struct thread *td, struct proc *p, void *data)
205 {
206         struct proc *reap, *p2;
207         struct procctl_reaper_pidinfo *pi, *pip;
208         struct procctl_reaper_pids *rp;
209         u_int i, n;
210         int error;
211
212         rp = data;
213         sx_assert(&proctree_lock, SX_LOCKED);
214         PROC_UNLOCK(p);
215         reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
216         n = i = 0;
217         error = 0;
218         LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling)
219                 n++;
220         sx_unlock(&proctree_lock);
221         if (rp->rp_count < n)
222                 n = rp->rp_count;
223         pi = malloc(n * sizeof(*pi), M_TEMP, M_WAITOK);
224         sx_slock(&proctree_lock);
225         LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
226                 if (i == n)
227                         break;
228                 pip = &pi[i];
229                 bzero(pip, sizeof(*pip));
230                 pip->pi_pid = p2->p_pid;
231                 pip->pi_subtree = p2->p_reapsubtree;
232                 pip->pi_flags = REAPER_PIDINFO_VALID;
233                 if (proc_realparent(p2) == reap)
234                         pip->pi_flags |= REAPER_PIDINFO_CHILD;
235                 if ((p2->p_treeflag & P_TREE_REAPER) != 0)
236                         pip->pi_flags |= REAPER_PIDINFO_REAPER;
237                 if ((p2->p_flag & P_STOPPED) != 0)
238                         pip->pi_flags |= REAPER_PIDINFO_STOPPED;
239                 if (p2->p_state == PRS_ZOMBIE)
240                         pip->pi_flags |= REAPER_PIDINFO_ZOMBIE;
241                 else if ((p2->p_flag & P_WEXIT) != 0)
242                         pip->pi_flags |= REAPER_PIDINFO_EXITING;
243                 i++;
244         }
245         sx_sunlock(&proctree_lock);
246         error = copyout(pi, rp->rp_pids, i * sizeof(*pi));
247         free(pi, M_TEMP);
248         sx_slock(&proctree_lock);
249         PROC_LOCK(p);
250         return (error);
251 }
252
253 struct reap_kill_proc_work {
254         struct ucred *cr;
255         struct proc *target;
256         ksiginfo_t *ksi;
257         struct procctl_reaper_kill *rk;
258         int *error;
259         struct task t;
260 };
261
262 static void
263 reap_kill_proc_locked(struct reap_kill_proc_work *w)
264 {
265         int error1;
266         bool need_stop;
267
268         PROC_LOCK_ASSERT(w->target, MA_OWNED);
269         PROC_ASSERT_HELD(w->target);
270
271         error1 = cr_cansignal(w->cr, w->target, w->rk->rk_sig);
272         if (error1 != 0) {
273                 if (*w->error == ESRCH) {
274                         w->rk->rk_fpid = w->target->p_pid;
275                         *w->error = error1;
276                 }
277                 return;
278         }
279
280         /*
281          * The need_stop indicates if the target process needs to be
282          * suspended before being signalled.  This is needed when we
283          * guarantee that all processes in subtree are signalled,
284          * avoiding the race with some process not yet fully linked
285          * into all structures during fork, ignored by iterator, and
286          * then escaping signalling.
287          *
288          * The thread cannot usefully stop itself anyway, and if other
289          * thread of the current process forks while the current
290          * thread signals the whole subtree, it is an application
291          * race.
292          */
293         if ((w->target->p_flag & (P_KPROC | P_SYSTEM | P_STOPPED)) == 0)
294                 need_stop = thread_single(w->target, SINGLE_ALLPROC) == 0;
295         else
296                 need_stop = false;
297
298         (void)pksignal(w->target, w->rk->rk_sig, w->ksi);
299         w->rk->rk_killed++;
300         *w->error = error1;
301
302         if (need_stop)
303                 thread_single_end(w->target, SINGLE_ALLPROC);
304 }
305
306 static void
307 reap_kill_proc_work(void *arg, int pending __unused)
308 {
309         struct reap_kill_proc_work *w;
310
311         w = arg;
312         PROC_LOCK(w->target);
313         if ((w->target->p_flag2 & P2_WEXIT) == 0)
314                 reap_kill_proc_locked(w);
315         PROC_UNLOCK(w->target);
316
317         sx_xlock(&proctree_lock);
318         w->target = NULL;
319         wakeup(&w->target);
320         sx_xunlock(&proctree_lock);
321 }
322
323 struct reap_kill_tracker {
324         struct proc *parent;
325         TAILQ_ENTRY(reap_kill_tracker) link;
326 };
327
328 TAILQ_HEAD(reap_kill_tracker_head, reap_kill_tracker);
329
330 static void
331 reap_kill_sched(struct reap_kill_tracker_head *tracker, struct proc *p2)
332 {
333         struct reap_kill_tracker *t;
334
335         PROC_LOCK(p2);
336         if ((p2->p_flag2 & P2_WEXIT) != 0) {
337                 PROC_UNLOCK(p2);
338                 return;
339         }
340         _PHOLD_LITE(p2);
341         PROC_UNLOCK(p2);
342         t = malloc(sizeof(struct reap_kill_tracker), M_TEMP, M_WAITOK);
343         t->parent = p2;
344         TAILQ_INSERT_TAIL(tracker, t, link);
345 }
346
347 static void
348 reap_kill_sched_free(struct reap_kill_tracker *t)
349 {
350         PRELE(t->parent);
351         free(t, M_TEMP);
352 }
353
354 static void
355 reap_kill_children(struct thread *td, struct proc *reaper,
356     struct procctl_reaper_kill *rk, ksiginfo_t *ksi, int *error)
357 {
358         struct proc *p2;
359         int error1;
360
361         LIST_FOREACH(p2, &reaper->p_children, p_sibling) {
362                 PROC_LOCK(p2);
363                 if ((p2->p_flag2 & P2_WEXIT) == 0) {
364                         error1 = p_cansignal(td, p2, rk->rk_sig);
365                         if (error1 != 0) {
366                                 if (*error == ESRCH) {
367                                         rk->rk_fpid = p2->p_pid;
368                                         *error = error1;
369                                 }
370
371                                 /*
372                                  * Do not end the loop on error,
373                                  * signal everything we can.
374                                  */
375                         } else {
376                                 (void)pksignal(p2, rk->rk_sig, ksi);
377                                 rk->rk_killed++;
378                         }
379                 }
380                 PROC_UNLOCK(p2);
381         }
382 }
383
384 static bool
385 reap_kill_subtree_once(struct thread *td, struct proc *p, struct proc *reaper,
386     struct unrhdr *pids, struct reap_kill_proc_work *w)
387 {
388         struct reap_kill_tracker_head tracker;
389         struct reap_kill_tracker *t;
390         struct proc *p2;
391         int r, xlocked;
392         bool res, st;
393
394         res = false;
395         TAILQ_INIT(&tracker);
396         reap_kill_sched(&tracker, reaper);
397         while ((t = TAILQ_FIRST(&tracker)) != NULL) {
398                 TAILQ_REMOVE(&tracker, t, link);
399
400                 /*
401                  * Since reap_kill_proc() drops proctree_lock sx, it
402                  * is possible that the tracked reaper is no longer.
403                  * In this case the subtree is reparented to the new
404                  * reaper, which should handle it.
405                  */
406                 if ((t->parent->p_treeflag & P_TREE_REAPER) == 0) {
407                         reap_kill_sched_free(t);
408                         res = true;
409                         continue;
410                 }
411
412                 LIST_FOREACH(p2, &t->parent->p_reaplist, p_reapsibling) {
413                         if (t->parent == reaper &&
414                             (w->rk->rk_flags & REAPER_KILL_SUBTREE) != 0 &&
415                             p2->p_reapsubtree != w->rk->rk_subtree)
416                                 continue;
417                         if ((p2->p_treeflag & P_TREE_REAPER) != 0)
418                                 reap_kill_sched(&tracker, p2);
419
420                         /*
421                          * Handle possible pid reuse.  If we recorded
422                          * p2 as killed but its p_flag2 does not
423                          * confirm it, that means that the process
424                          * terminated and its id was reused by other
425                          * process in the reaper subtree.
426                          *
427                          * Unlocked read of p2->p_flag2 is fine, it is
428                          * our thread that set the tested flag.
429                          */
430                         if (alloc_unr_specific(pids, p2->p_pid) != p2->p_pid &&
431                             (atomic_load_int(&p2->p_flag2) &
432                             (P2_REAPKILLED | P2_WEXIT)) != 0)
433                                 continue;
434
435                         if (p2 == td->td_proc) {
436                                 if ((p2->p_flag & P_HADTHREADS) != 0 &&
437                                     (p2->p_flag2 & P2_WEXIT) == 0) {
438                                         xlocked = sx_xlocked(&proctree_lock);
439                                         sx_unlock(&proctree_lock);
440                                         st = true;
441                                 } else {
442                                         st = false;
443                                 }
444                                 PROC_LOCK(p2);
445                                 /*
446                                  * sapblk ensures that only one thread
447                                  * in the system sets this flag.
448                                  */
449                                 p2->p_flag2 |= P2_REAPKILLED;
450                                 if (st)
451                                         r = thread_single(p2, SINGLE_NO_EXIT);
452                                 (void)pksignal(p2, w->rk->rk_sig, w->ksi);
453                                 w->rk->rk_killed++;
454                                 if (st && r == 0)
455                                         thread_single_end(p2, SINGLE_NO_EXIT);
456                                 PROC_UNLOCK(p2);
457                                 if (st) {
458                                         if (xlocked)
459                                                 sx_xlock(&proctree_lock);
460                                         else
461                                                 sx_slock(&proctree_lock);
462                                 }
463                         } else {
464                                 PROC_LOCK(p2);
465                                 if ((p2->p_flag2 & P2_WEXIT) == 0) {
466                                         _PHOLD_LITE(p2);
467                                         p2->p_flag2 |= P2_REAPKILLED;
468                                         PROC_UNLOCK(p2);
469                                         w->target = p2;
470                                         taskqueue_enqueue(taskqueue_thread,
471                                             &w->t);
472                                         while (w->target != NULL) {
473                                                 sx_sleep(&w->target,
474                                                     &proctree_lock, PWAIT,
475                                                     "reapst", 0);
476                                         }
477                                         PROC_LOCK(p2);
478                                         _PRELE(p2);
479                                 }
480                                 PROC_UNLOCK(p2);
481                         }
482                         res = true;
483                 }
484                 reap_kill_sched_free(t);
485         }
486         return (res);
487 }
488
489 static void
490 reap_kill_subtree(struct thread *td, struct proc *p, struct proc *reaper,
491     struct reap_kill_proc_work *w)
492 {
493         struct unrhdr pids;
494         void *ihandle;
495         struct proc *p2;
496         int pid;
497
498         /*
499          * pids records processes which were already signalled, to
500          * avoid doubling signals to them if iteration needs to be
501          * repeated.
502          */
503         init_unrhdr(&pids, 1, PID_MAX, UNR_NO_MTX);
504         PROC_LOCK(td->td_proc);
505         if ((td->td_proc->p_flag2 & P2_WEXIT) != 0) {
506                 PROC_UNLOCK(td->td_proc);
507                 goto out;
508         }
509         PROC_UNLOCK(td->td_proc);
510         while (reap_kill_subtree_once(td, p, reaper, &pids, w))
511                ;
512
513         ihandle = create_iter_unr(&pids);
514         while ((pid = next_iter_unr(ihandle)) != -1) {
515                 p2 = pfind(pid);
516                 if (p2 != NULL) {
517                         p2->p_flag2 &= ~P2_REAPKILLED;
518                         PROC_UNLOCK(p2);
519                 }
520         }
521         free_iter_unr(ihandle);
522
523 out:
524         clean_unrhdr(&pids);
525         clear_unrhdr(&pids);
526 }
527
528 static bool
529 reap_kill_sapblk(struct thread *td __unused, void *data)
530 {
531         struct procctl_reaper_kill *rk;
532
533         rk = data;
534         return ((rk->rk_flags & REAPER_KILL_CHILDREN) == 0);
535 }
536
537 static int
538 reap_kill(struct thread *td, struct proc *p, void *data)
539 {
540         struct reap_kill_proc_work w;
541         struct proc *reaper;
542         ksiginfo_t ksi;
543         struct procctl_reaper_kill *rk;
544         int error;
545
546         rk = data;
547         sx_assert(&proctree_lock, SX_LOCKED);
548         if (IN_CAPABILITY_MODE(td))
549                 return (ECAPMODE);
550         if (rk->rk_sig <= 0 || rk->rk_sig > _SIG_MAXSIG ||
551             (rk->rk_flags & ~(REAPER_KILL_CHILDREN |
552             REAPER_KILL_SUBTREE)) != 0 || (rk->rk_flags &
553             (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE)) ==
554             (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE))
555                 return (EINVAL);
556         PROC_UNLOCK(p);
557         reaper = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
558         ksiginfo_init(&ksi);
559         ksi.ksi_signo = rk->rk_sig;
560         ksi.ksi_code = SI_USER;
561         ksi.ksi_pid = td->td_proc->p_pid;
562         ksi.ksi_uid = td->td_ucred->cr_ruid;
563         error = ESRCH;
564         rk->rk_killed = 0;
565         rk->rk_fpid = -1;
566         if ((rk->rk_flags & REAPER_KILL_CHILDREN) != 0) {
567                 reap_kill_children(td, reaper, rk, &ksi, &error);
568         } else {
569                 w.cr = crhold(td->td_ucred);
570                 w.ksi = &ksi;
571                 w.rk = rk;
572                 w.error = &error;
573                 TASK_INIT(&w.t, 0, reap_kill_proc_work, &w);
574
575                 /*
576                  * Prevent swapout, since w, ksi, and possibly rk, are
577                  * allocated on the stack.  We sleep in
578                  * reap_kill_subtree_once() waiting for task to
579                  * complete single-threading.
580                  */
581                 PHOLD(td->td_proc);
582
583                 reap_kill_subtree(td, p, reaper, &w);
584                 PRELE(td->td_proc);
585                 crfree(w.cr);
586         }
587         PROC_LOCK(p);
588         return (error);
589 }
590
591 static int
592 trace_ctl(struct thread *td, struct proc *p, void *data)
593 {
594         int state;
595
596         PROC_LOCK_ASSERT(p, MA_OWNED);
597         state = *(int *)data;
598
599         /*
600          * Ktrace changes p_traceflag from or to zero under the
601          * process lock, so the test does not need to acquire ktrace
602          * mutex.
603          */
604         if ((p->p_flag & P_TRACED) != 0 || p->p_traceflag != 0)
605                 return (EBUSY);
606
607         switch (state) {
608         case PROC_TRACE_CTL_ENABLE:
609                 if (td->td_proc != p)
610                         return (EPERM);
611                 p->p_flag2 &= ~(P2_NOTRACE | P2_NOTRACE_EXEC);
612                 break;
613         case PROC_TRACE_CTL_DISABLE_EXEC:
614                 p->p_flag2 |= P2_NOTRACE_EXEC | P2_NOTRACE;
615                 break;
616         case PROC_TRACE_CTL_DISABLE:
617                 if ((p->p_flag2 & P2_NOTRACE_EXEC) != 0) {
618                         KASSERT((p->p_flag2 & P2_NOTRACE) != 0,
619                             ("dandling P2_NOTRACE_EXEC"));
620                         if (td->td_proc != p)
621                                 return (EPERM);
622                         p->p_flag2 &= ~P2_NOTRACE_EXEC;
623                 } else {
624                         p->p_flag2 |= P2_NOTRACE;
625                 }
626                 break;
627         default:
628                 return (EINVAL);
629         }
630         return (0);
631 }
632
633 static int
634 trace_status(struct thread *td, struct proc *p, void *data)
635 {
636         int *status;
637
638         status = data;
639         if ((p->p_flag2 & P2_NOTRACE) != 0) {
640                 KASSERT((p->p_flag & P_TRACED) == 0,
641                     ("%d traced but tracing disabled", p->p_pid));
642                 *status = -1;
643         } else if ((p->p_flag & P_TRACED) != 0) {
644                 *status = p->p_pptr->p_pid;
645         } else {
646                 *status = 0;
647         }
648         return (0);
649 }
650
651 static int
652 trapcap_ctl(struct thread *td, struct proc *p, void *data)
653 {
654         int state;
655
656         PROC_LOCK_ASSERT(p, MA_OWNED);
657         state = *(int *)data;
658
659         switch (state) {
660         case PROC_TRAPCAP_CTL_ENABLE:
661                 p->p_flag2 |= P2_TRAPCAP;
662                 break;
663         case PROC_TRAPCAP_CTL_DISABLE:
664                 p->p_flag2 &= ~P2_TRAPCAP;
665                 break;
666         default:
667                 return (EINVAL);
668         }
669         return (0);
670 }
671
672 static int
673 trapcap_status(struct thread *td, struct proc *p, void *data)
674 {
675         int *status;
676
677         status = data;
678         *status = (p->p_flag2 & P2_TRAPCAP) != 0 ? PROC_TRAPCAP_CTL_ENABLE :
679             PROC_TRAPCAP_CTL_DISABLE;
680         return (0);
681 }
682
683 static int
684 no_new_privs_ctl(struct thread *td, struct proc *p, void *data)
685 {
686         int state;
687
688         PROC_LOCK_ASSERT(p, MA_OWNED);
689         state = *(int *)data;
690
691         if (state != PROC_NO_NEW_PRIVS_ENABLE)
692                 return (EINVAL);
693         p->p_flag2 |= P2_NO_NEW_PRIVS;
694         return (0);
695 }
696
697 static int
698 no_new_privs_status(struct thread *td, struct proc *p, void *data)
699 {
700
701         *(int *)data = (p->p_flag2 & P2_NO_NEW_PRIVS) != 0 ?
702             PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE;
703         return (0);
704 }
705
706 static int
707 protmax_ctl(struct thread *td, struct proc *p, void *data)
708 {
709         int state;
710
711         PROC_LOCK_ASSERT(p, MA_OWNED);
712         state = *(int *)data;
713
714         switch (state) {
715         case PROC_PROTMAX_FORCE_ENABLE:
716                 p->p_flag2 &= ~P2_PROTMAX_DISABLE;
717                 p->p_flag2 |= P2_PROTMAX_ENABLE;
718                 break;
719         case PROC_PROTMAX_FORCE_DISABLE:
720                 p->p_flag2 |= P2_PROTMAX_DISABLE;
721                 p->p_flag2 &= ~P2_PROTMAX_ENABLE;
722                 break;
723         case PROC_PROTMAX_NOFORCE:
724                 p->p_flag2 &= ~(P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE);
725                 break;
726         default:
727                 return (EINVAL);
728         }
729         return (0);
730 }
731
732 static int
733 protmax_status(struct thread *td, struct proc *p, void *data)
734 {
735         int d;
736
737         switch (p->p_flag2 & (P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE)) {
738         case 0:
739                 d = PROC_PROTMAX_NOFORCE;
740                 break;
741         case P2_PROTMAX_ENABLE:
742                 d = PROC_PROTMAX_FORCE_ENABLE;
743                 break;
744         case P2_PROTMAX_DISABLE:
745                 d = PROC_PROTMAX_FORCE_DISABLE;
746                 break;
747         }
748         if (kern_mmap_maxprot(p, PROT_READ) == PROT_READ)
749                 d |= PROC_PROTMAX_ACTIVE;
750         *(int *)data = d;
751         return (0);
752 }
753
754 static int
755 aslr_ctl(struct thread *td, struct proc *p, void *data)
756 {
757         int state;
758
759         PROC_LOCK_ASSERT(p, MA_OWNED);
760         state = *(int *)data;
761
762         switch (state) {
763         case PROC_ASLR_FORCE_ENABLE:
764                 p->p_flag2 &= ~P2_ASLR_DISABLE;
765                 p->p_flag2 |= P2_ASLR_ENABLE;
766                 break;
767         case PROC_ASLR_FORCE_DISABLE:
768                 p->p_flag2 |= P2_ASLR_DISABLE;
769                 p->p_flag2 &= ~P2_ASLR_ENABLE;
770                 break;
771         case PROC_ASLR_NOFORCE:
772                 p->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
773                 break;
774         default:
775                 return (EINVAL);
776         }
777         return (0);
778 }
779
780 static int
781 aslr_status(struct thread *td, struct proc *p, void *data)
782 {
783         struct vmspace *vm;
784         int d;
785
786         switch (p->p_flag2 & (P2_ASLR_ENABLE | P2_ASLR_DISABLE)) {
787         case 0:
788                 d = PROC_ASLR_NOFORCE;
789                 break;
790         case P2_ASLR_ENABLE:
791                 d = PROC_ASLR_FORCE_ENABLE;
792                 break;
793         case P2_ASLR_DISABLE:
794                 d = PROC_ASLR_FORCE_DISABLE;
795                 break;
796         }
797         if ((p->p_flag & P_WEXIT) == 0) {
798                 _PHOLD(p);
799                 PROC_UNLOCK(p);
800                 vm = vmspace_acquire_ref(p);
801                 if (vm != NULL) {
802                         if ((vm->vm_map.flags & MAP_ASLR) != 0)
803                                 d |= PROC_ASLR_ACTIVE;
804                         vmspace_free(vm);
805                 }
806                 PROC_LOCK(p);
807                 _PRELE(p);
808         }
809         *(int *)data = d;
810         return (0);
811 }
812
813 static int
814 stackgap_ctl(struct thread *td, struct proc *p, void *data)
815 {
816         int state;
817
818         PROC_LOCK_ASSERT(p, MA_OWNED);
819         state = *(int *)data;
820
821         if ((state & ~(PROC_STACKGAP_ENABLE | PROC_STACKGAP_DISABLE |
822             PROC_STACKGAP_ENABLE_EXEC | PROC_STACKGAP_DISABLE_EXEC)) != 0)
823                 return (EINVAL);
824         switch (state & (PROC_STACKGAP_ENABLE | PROC_STACKGAP_DISABLE)) {
825         case PROC_STACKGAP_ENABLE:
826                 if ((p->p_flag2 & P2_STKGAP_DISABLE) != 0)
827                         return (EINVAL);
828                 break;
829         case PROC_STACKGAP_DISABLE:
830                 p->p_flag2 |= P2_STKGAP_DISABLE;
831                 break;
832         case 0:
833                 break;
834         default:
835                 return (EINVAL);
836         }
837         switch (state & (PROC_STACKGAP_ENABLE_EXEC |
838             PROC_STACKGAP_DISABLE_EXEC)) {
839         case PROC_STACKGAP_ENABLE_EXEC:
840                 p->p_flag2 &= ~P2_STKGAP_DISABLE_EXEC;
841                 break;
842         case PROC_STACKGAP_DISABLE_EXEC:
843                 p->p_flag2 |= P2_STKGAP_DISABLE_EXEC;
844                 break;
845         case 0:
846                 break;
847         default:
848                 return (EINVAL);
849         }
850         return (0);
851 }
852
853 static int
854 stackgap_status(struct thread *td, struct proc *p, void *data)
855 {
856         int d;
857
858         PROC_LOCK_ASSERT(p, MA_OWNED);
859
860         d = (p->p_flag2 & P2_STKGAP_DISABLE) != 0 ? PROC_STACKGAP_DISABLE :
861             PROC_STACKGAP_ENABLE;
862         d |= (p->p_flag2 & P2_STKGAP_DISABLE_EXEC) != 0 ?
863             PROC_STACKGAP_DISABLE_EXEC : PROC_STACKGAP_ENABLE_EXEC;
864         *(int *)data = d;
865         return (0);
866 }
867
868 static int
869 wxmap_ctl(struct thread *td, struct proc *p, void *data)
870 {
871         struct vmspace *vm;
872         vm_map_t map;
873         int state;
874
875         PROC_LOCK_ASSERT(p, MA_OWNED);
876         if ((p->p_flag & P_WEXIT) != 0)
877                 return (ESRCH);
878         state = *(int *)data;
879
880         switch (state) {
881         case PROC_WX_MAPPINGS_PERMIT:
882                 p->p_flag2 |= P2_WXORX_DISABLE;
883                 _PHOLD(p);
884                 PROC_UNLOCK(p);
885                 vm = vmspace_acquire_ref(p);
886                 if (vm != NULL) {
887                         map = &vm->vm_map;
888                         vm_map_lock(map);
889                         map->flags &= ~MAP_WXORX;
890                         vm_map_unlock(map);
891                         vmspace_free(vm);
892                 }
893                 PROC_LOCK(p);
894                 _PRELE(p);
895                 break;
896         case PROC_WX_MAPPINGS_DISALLOW_EXEC:
897                 p->p_flag2 |= P2_WXORX_ENABLE_EXEC;
898                 break;
899         default:
900                 return (EINVAL);
901         }
902
903         return (0);
904 }
905
906 static int
907 wxmap_status(struct thread *td, struct proc *p, void *data)
908 {
909         struct vmspace *vm;
910         int d;
911
912         PROC_LOCK_ASSERT(p, MA_OWNED);
913         if ((p->p_flag & P_WEXIT) != 0)
914                 return (ESRCH);
915
916         d = 0;
917         if ((p->p_flag2 & P2_WXORX_DISABLE) != 0)
918                 d |= PROC_WX_MAPPINGS_PERMIT;
919         if ((p->p_flag2 & P2_WXORX_ENABLE_EXEC) != 0)
920                 d |= PROC_WX_MAPPINGS_DISALLOW_EXEC;
921         _PHOLD(p);
922         PROC_UNLOCK(p);
923         vm = vmspace_acquire_ref(p);
924         if (vm != NULL) {
925                 if ((vm->vm_map.flags & MAP_WXORX) != 0)
926                         d |= PROC_WXORX_ENFORCE;
927                 vmspace_free(vm);
928         }
929         PROC_LOCK(p);
930         _PRELE(p);
931         *(int *)data = d;
932         return (0);
933 }
934
935 static int
936 pdeathsig_ctl(struct thread *td, struct proc *p, void *data)
937 {
938         int signum;
939
940         signum = *(int *)data;
941         if (p != td->td_proc || (signum != 0 && !_SIG_VALID(signum)))
942                 return (EINVAL);
943         p->p_pdeathsig = signum;
944         return (0);
945 }
946
947 static int
948 pdeathsig_status(struct thread *td, struct proc *p, void *data)
949 {
950         if (p != td->td_proc)
951                 return (EINVAL);
952         *(int *)data = p->p_pdeathsig;
953         return (0);
954 }
955
956 enum {
957         PCTL_SLOCKED,
958         PCTL_XLOCKED,
959         PCTL_UNLOCKED,
960 };
961
962 struct procctl_cmd_info {
963         int lock_tree;
964         bool one_proc : 1;
965         bool esrch_is_einval : 1;
966         bool copyout_on_error : 1;
967         bool no_nonnull_data : 1;
968         bool need_candebug : 1;
969         int copyin_sz;
970         int copyout_sz;
971         int (*exec)(struct thread *, struct proc *, void *);
972         bool (*sapblk)(struct thread *, void *);
973 };
974 static const struct procctl_cmd_info procctl_cmds_info[] = {
975         [PROC_SPROTECT] =
976             { .lock_tree = PCTL_SLOCKED, .one_proc = false,
977               .esrch_is_einval = false, .no_nonnull_data = false,
978               .need_candebug = false,
979               .copyin_sz = sizeof(int), .copyout_sz = 0,
980               .exec = protect_set, .copyout_on_error = false, },
981         [PROC_REAP_ACQUIRE] =
982             { .lock_tree = PCTL_XLOCKED, .one_proc = true,
983               .esrch_is_einval = false, .no_nonnull_data = true,
984               .need_candebug = false,
985               .copyin_sz = 0, .copyout_sz = 0,
986               .exec = reap_acquire, .copyout_on_error = false, },
987         [PROC_REAP_RELEASE] =
988             { .lock_tree = PCTL_XLOCKED, .one_proc = true,
989               .esrch_is_einval = false, .no_nonnull_data = true,
990               .need_candebug = false,
991               .copyin_sz = 0, .copyout_sz = 0,
992               .exec = reap_release, .copyout_on_error = false, },
993         [PROC_REAP_STATUS] =
994             { .lock_tree = PCTL_SLOCKED, .one_proc = true,
995               .esrch_is_einval = false, .no_nonnull_data = false,
996               .need_candebug = false,
997               .copyin_sz = 0,
998               .copyout_sz = sizeof(struct procctl_reaper_status),
999               .exec = reap_status, .copyout_on_error = false, },
1000         [PROC_REAP_GETPIDS] =
1001             { .lock_tree = PCTL_SLOCKED, .one_proc = true,
1002               .esrch_is_einval = false, .no_nonnull_data = false,
1003               .need_candebug = false,
1004               .copyin_sz = sizeof(struct procctl_reaper_pids),
1005               .copyout_sz = 0,
1006               .exec = reap_getpids, .copyout_on_error = false, },
1007         [PROC_REAP_KILL] =
1008             { .lock_tree = PCTL_SLOCKED, .one_proc = true,
1009               .esrch_is_einval = false, .no_nonnull_data = false,
1010               .need_candebug = false,
1011               .copyin_sz = sizeof(struct procctl_reaper_kill),
1012               .copyout_sz = sizeof(struct procctl_reaper_kill),
1013               .exec = reap_kill, .copyout_on_error = true,
1014               .sapblk = reap_kill_sapblk, },
1015         [PROC_TRACE_CTL] =
1016             { .lock_tree = PCTL_SLOCKED, .one_proc = false,
1017               .esrch_is_einval = false, .no_nonnull_data = false,
1018               .need_candebug = true,
1019               .copyin_sz = sizeof(int), .copyout_sz = 0,
1020               .exec = trace_ctl, .copyout_on_error = false, },
1021         [PROC_TRACE_STATUS] =
1022             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1023               .esrch_is_einval = false, .no_nonnull_data = false,
1024               .need_candebug = false,
1025               .copyin_sz = 0, .copyout_sz = sizeof(int),
1026               .exec = trace_status, .copyout_on_error = false, },
1027         [PROC_TRAPCAP_CTL] =
1028             { .lock_tree = PCTL_SLOCKED, .one_proc = false,
1029               .esrch_is_einval = false, .no_nonnull_data = false,
1030               .need_candebug = true,
1031               .copyin_sz = sizeof(int), .copyout_sz = 0,
1032               .exec = trapcap_ctl, .copyout_on_error = false, },
1033         [PROC_TRAPCAP_STATUS] =
1034             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1035               .esrch_is_einval = false, .no_nonnull_data = false,
1036               .need_candebug = false,
1037               .copyin_sz = 0, .copyout_sz = sizeof(int),
1038               .exec = trapcap_status, .copyout_on_error = false, },
1039         [PROC_PDEATHSIG_CTL] =
1040             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1041               .esrch_is_einval = true, .no_nonnull_data = false,
1042               .need_candebug = false,
1043               .copyin_sz = sizeof(int), .copyout_sz = 0,
1044               .exec = pdeathsig_ctl, .copyout_on_error = false, },
1045         [PROC_PDEATHSIG_STATUS] =
1046             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1047               .esrch_is_einval = true, .no_nonnull_data = false,
1048               .need_candebug = false,
1049               .copyin_sz = 0, .copyout_sz = sizeof(int),
1050               .exec = pdeathsig_status, .copyout_on_error = false, },
1051         [PROC_ASLR_CTL] =
1052             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1053               .esrch_is_einval = false, .no_nonnull_data = false,
1054               .need_candebug = true,
1055               .copyin_sz = sizeof(int), .copyout_sz = 0,
1056               .exec = aslr_ctl, .copyout_on_error = false, },
1057         [PROC_ASLR_STATUS] =
1058             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1059               .esrch_is_einval = false, .no_nonnull_data = false,
1060               .need_candebug = false,
1061               .copyin_sz = 0, .copyout_sz = sizeof(int),
1062               .exec = aslr_status, .copyout_on_error = false, },
1063         [PROC_PROTMAX_CTL] =
1064             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1065               .esrch_is_einval = false, .no_nonnull_data = false,
1066               .need_candebug = true,
1067               .copyin_sz = sizeof(int), .copyout_sz = 0,
1068               .exec = protmax_ctl, .copyout_on_error = false, },
1069         [PROC_PROTMAX_STATUS] =
1070             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1071               .esrch_is_einval = false, .no_nonnull_data = false,
1072               .need_candebug = false,
1073               .copyin_sz = 0, .copyout_sz = sizeof(int),
1074               .exec = protmax_status, .copyout_on_error = false, },
1075         [PROC_STACKGAP_CTL] =
1076             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1077               .esrch_is_einval = false, .no_nonnull_data = false,
1078               .need_candebug = true,
1079               .copyin_sz = sizeof(int), .copyout_sz = 0,
1080               .exec = stackgap_ctl, .copyout_on_error = false, },
1081         [PROC_STACKGAP_STATUS] =
1082             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1083               .esrch_is_einval = false, .no_nonnull_data = false,
1084               .need_candebug = false,
1085               .copyin_sz = 0, .copyout_sz = sizeof(int),
1086               .exec = stackgap_status, .copyout_on_error = false, },
1087         [PROC_NO_NEW_PRIVS_CTL] =
1088             { .lock_tree = PCTL_SLOCKED, .one_proc = true,
1089               .esrch_is_einval = false, .no_nonnull_data = false,
1090               .need_candebug = true,
1091               .copyin_sz = sizeof(int), .copyout_sz = 0,
1092               .exec = no_new_privs_ctl, .copyout_on_error = false, },
1093         [PROC_NO_NEW_PRIVS_STATUS] =
1094             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1095               .esrch_is_einval = false, .no_nonnull_data = false,
1096               .need_candebug = false,
1097               .copyin_sz = 0, .copyout_sz = sizeof(int),
1098               .exec = no_new_privs_status, .copyout_on_error = false, },
1099         [PROC_WXMAP_CTL] =
1100             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1101               .esrch_is_einval = false, .no_nonnull_data = false,
1102               .need_candebug = true,
1103               .copyin_sz = sizeof(int), .copyout_sz = 0,
1104               .exec = wxmap_ctl, .copyout_on_error = false, },
1105         [PROC_WXMAP_STATUS] =
1106             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1107               .esrch_is_einval = false, .no_nonnull_data = false,
1108               .need_candebug = false,
1109               .copyin_sz = 0, .copyout_sz = sizeof(int),
1110               .exec = wxmap_status, .copyout_on_error = false, },
1111 };
1112
1113 int
1114 sys_procctl(struct thread *td, struct procctl_args *uap)
1115 {
1116         union {
1117                 struct procctl_reaper_status rs;
1118                 struct procctl_reaper_pids rp;
1119                 struct procctl_reaper_kill rk;
1120                 int flags;
1121         } x;
1122         const struct procctl_cmd_info *cmd_info;
1123         int error, error1;
1124
1125         if (uap->com >= PROC_PROCCTL_MD_MIN)
1126                 return (cpu_procctl(td, uap->idtype, uap->id,
1127                     uap->com, uap->data));
1128         if (uap->com == 0 || uap->com >= nitems(procctl_cmds_info))
1129                 return (EINVAL);
1130         cmd_info = &procctl_cmds_info[uap->com];
1131         bzero(&x, sizeof(x));
1132
1133         if (cmd_info->copyin_sz > 0) {
1134                 error = copyin(uap->data, &x, cmd_info->copyin_sz);
1135                 if (error != 0)
1136                         return (error);
1137         } else if (cmd_info->no_nonnull_data && uap->data != NULL) {
1138                 return (EINVAL);
1139         }
1140
1141         error = kern_procctl(td, uap->idtype, uap->id, uap->com, &x);
1142
1143         if (cmd_info->copyout_sz > 0 && (error == 0 ||
1144             cmd_info->copyout_on_error)) {
1145                 error1 = copyout(&x, uap->data, cmd_info->copyout_sz);
1146                 if (error == 0)
1147                         error = error1;
1148         }
1149         return (error);
1150 }
1151
1152 static int
1153 kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
1154 {
1155
1156         PROC_LOCK_ASSERT(p, MA_OWNED);
1157         return (procctl_cmds_info[com].exec(td, p, data));
1158 }
1159
1160 int
1161 kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
1162 {
1163         struct pgrp *pg;
1164         struct proc *p;
1165         const struct procctl_cmd_info *cmd_info;
1166         int error, first_error, ok;
1167         bool sapblk;
1168
1169         MPASS(com > 0 && com < nitems(procctl_cmds_info));
1170         cmd_info = &procctl_cmds_info[com];
1171         if (idtype != P_PID && cmd_info->one_proc)
1172                 return (EINVAL);
1173
1174         sapblk = false;
1175         if (cmd_info->sapblk != NULL) {
1176                 sapblk = cmd_info->sapblk(td, data);
1177                 if (sapblk && !stop_all_proc_block())
1178                         return (ERESTART);
1179         }
1180
1181         switch (cmd_info->lock_tree) {
1182         case PCTL_XLOCKED:
1183                 sx_xlock(&proctree_lock);
1184                 break;
1185         case PCTL_SLOCKED:
1186                 sx_slock(&proctree_lock);
1187                 break;
1188         default:
1189                 break;
1190         }
1191
1192         switch (idtype) {
1193         case P_PID:
1194                 if (id == 0) {
1195                         p = td->td_proc;
1196                         error = 0;
1197                         PROC_LOCK(p);
1198                 } else {
1199                         p = pfind(id);
1200                         if (p == NULL) {
1201                                 error = cmd_info->esrch_is_einval ?
1202                                     EINVAL : ESRCH;
1203                                 break;
1204                         }
1205                         error = cmd_info->need_candebug ? p_candebug(td, p) :
1206                             p_cansee(td, p);
1207                 }
1208                 if (error == 0)
1209                         error = kern_procctl_single(td, p, com, data);
1210                 PROC_UNLOCK(p);
1211                 break;
1212         case P_PGID:
1213                 /*
1214                  * Attempt to apply the operation to all members of the
1215                  * group.  Ignore processes in the group that can't be
1216                  * seen.  Ignore errors so long as at least one process is
1217                  * able to complete the request successfully.
1218                  */
1219                 pg = pgfind(id);
1220                 if (pg == NULL) {
1221                         error = ESRCH;
1222                         break;
1223                 }
1224                 PGRP_UNLOCK(pg);
1225                 ok = 0;
1226                 first_error = 0;
1227                 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1228                         PROC_LOCK(p);
1229                         if (p->p_state == PRS_NEW ||
1230                             p->p_state == PRS_ZOMBIE ||
1231                             (cmd_info->need_candebug ? p_candebug(td, p) :
1232                             p_cansee(td, p)) != 0) {
1233                                 PROC_UNLOCK(p);
1234                                 continue;
1235                         }
1236                         error = kern_procctl_single(td, p, com, data);
1237                         PROC_UNLOCK(p);
1238                         if (error == 0)
1239                                 ok = 1;
1240                         else if (first_error == 0)
1241                                 first_error = error;
1242                 }
1243                 if (ok)
1244                         error = 0;
1245                 else if (first_error != 0)
1246                         error = first_error;
1247                 else
1248                         /*
1249                          * Was not able to see any processes in the
1250                          * process group.
1251                          */
1252                         error = ESRCH;
1253                 break;
1254         default:
1255                 error = EINVAL;
1256                 break;
1257         }
1258
1259         switch (cmd_info->lock_tree) {
1260         case PCTL_XLOCKED:
1261                 sx_xunlock(&proctree_lock);
1262                 break;
1263         case PCTL_SLOCKED:
1264                 sx_sunlock(&proctree_lock);
1265                 break;
1266         default:
1267                 break;
1268         }
1269         if (sapblk)
1270                 stop_all_proc_unblock();
1271         return (error);
1272 }