]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_procctl.c
Revert "sendfile: mark it explicitly as a TCP only feature"
[FreeBSD/FreeBSD.git] / sys / kern / kern_procctl.c
1 /*-
2  * Copyright (c) 2014 John Baldwin
3  * Copyright (c) 2014, 2016 The FreeBSD Foundation
4  *
5  * Portions of this software were developed by Konstantin Belousov
6  * under sponsorship from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include "opt_ktrace.h"
31
32 #include <sys/param.h>
33 #include <sys/_unrhdr.h>
34 #include <sys/systm.h>
35 #include <sys/capsicum.h>
36 #include <sys/lock.h>
37 #include <sys/mman.h>
38 #include <sys/mutex.h>
39 #include <sys/priv.h>
40 #include <sys/proc.h>
41 #include <sys/procctl.h>
42 #include <sys/sx.h>
43 #include <sys/syscallsubr.h>
44 #include <sys/sysproto.h>
45 #include <sys/taskqueue.h>
46 #include <sys/wait.h>
47
48 #include <vm/vm.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_map.h>
51 #include <vm/vm_extern.h>
52
53 static int
54 protect_setchild(struct thread *td, struct proc *p, int flags)
55 {
56
57         PROC_LOCK_ASSERT(p, MA_OWNED);
58         if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0)
59                 return (0);
60         if (flags & PPROT_SET) {
61                 p->p_flag |= P_PROTECTED;
62                 if (flags & PPROT_INHERIT)
63                         p->p_flag2 |= P2_INHERIT_PROTECTED;
64         } else {
65                 p->p_flag &= ~P_PROTECTED;
66                 p->p_flag2 &= ~P2_INHERIT_PROTECTED;
67         }
68         return (1);
69 }
70
71 static int
72 protect_setchildren(struct thread *td, struct proc *top, int flags)
73 {
74         struct proc *p;
75         int ret;
76
77         p = top;
78         ret = 0;
79         sx_assert(&proctree_lock, SX_LOCKED);
80         for (;;) {
81                 ret |= protect_setchild(td, p, flags);
82                 PROC_UNLOCK(p);
83                 /*
84                  * If this process has children, descend to them next,
85                  * otherwise do any siblings, and if done with this level,
86                  * follow back up the tree (but not past top).
87                  */
88                 if (!LIST_EMPTY(&p->p_children))
89                         p = LIST_FIRST(&p->p_children);
90                 else for (;;) {
91                         if (p == top) {
92                                 PROC_LOCK(p);
93                                 return (ret);
94                         }
95                         if (LIST_NEXT(p, p_sibling)) {
96                                 p = LIST_NEXT(p, p_sibling);
97                                 break;
98                         }
99                         p = p->p_pptr;
100                 }
101                 PROC_LOCK(p);
102         }
103 }
104
105 static int
106 protect_set(struct thread *td, struct proc *p, void *data)
107 {
108         int error, flags, ret;
109
110         flags = *(int *)data;
111         switch (PPROT_OP(flags)) {
112         case PPROT_SET:
113         case PPROT_CLEAR:
114                 break;
115         default:
116                 return (EINVAL);
117         }
118
119         if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
120                 return (EINVAL);
121
122         error = priv_check(td, PRIV_VM_MADV_PROTECT);
123         if (error)
124                 return (error);
125
126         if (flags & PPROT_DESCEND)
127                 ret = protect_setchildren(td, p, flags);
128         else
129                 ret = protect_setchild(td, p, flags);
130         if (ret == 0)
131                 return (EPERM);
132         return (0);
133 }
134
135 static int
136 reap_acquire(struct thread *td, struct proc *p, void *data __unused)
137 {
138
139         sx_assert(&proctree_lock, SX_XLOCKED);
140         if (p != td->td_proc)
141                 return (EPERM);
142         if ((p->p_treeflag & P_TREE_REAPER) != 0)
143                 return (EBUSY);
144         p->p_treeflag |= P_TREE_REAPER;
145         /*
146          * We do not reattach existing children and the whole tree
147          * under them to us, since p->p_reaper already seen them.
148          */
149         return (0);
150 }
151
152 static int
153 reap_release(struct thread *td, struct proc *p, void *data __unused)
154 {
155
156         sx_assert(&proctree_lock, SX_XLOCKED);
157         if (p != td->td_proc)
158                 return (EPERM);
159         if (p == initproc)
160                 return (EINVAL);
161         if ((p->p_treeflag & P_TREE_REAPER) == 0)
162                 return (EINVAL);
163         reaper_abandon_children(p, false);
164         return (0);
165 }
166
167 static int
168 reap_status(struct thread *td, struct proc *p, void *data)
169 {
170         struct proc *reap, *p2, *first_p;
171         struct procctl_reaper_status *rs;
172
173         rs = data;
174         sx_assert(&proctree_lock, SX_LOCKED);
175         if ((p->p_treeflag & P_TREE_REAPER) == 0) {
176                 reap = p->p_reaper;
177         } else {
178                 reap = p;
179                 rs->rs_flags |= REAPER_STATUS_OWNED;
180         }
181         if (reap == initproc)
182                 rs->rs_flags |= REAPER_STATUS_REALINIT;
183         rs->rs_reaper = reap->p_pid;
184         rs->rs_descendants = 0;
185         rs->rs_children = 0;
186         if (!LIST_EMPTY(&reap->p_reaplist)) {
187                 first_p = LIST_FIRST(&reap->p_children);
188                 if (first_p == NULL)
189                         first_p = LIST_FIRST(&reap->p_reaplist);
190                 rs->rs_pid = first_p->p_pid;
191                 LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
192                         if (proc_realparent(p2) == reap)
193                                 rs->rs_children++;
194                         rs->rs_descendants++;
195                 }
196         } else {
197                 rs->rs_pid = -1;
198         }
199         return (0);
200 }
201
202 static int
203 reap_getpids(struct thread *td, struct proc *p, void *data)
204 {
205         struct proc *reap, *p2;
206         struct procctl_reaper_pidinfo *pi, *pip;
207         struct procctl_reaper_pids *rp;
208         u_int i, n;
209         int error;
210
211         rp = data;
212         sx_assert(&proctree_lock, SX_LOCKED);
213         PROC_UNLOCK(p);
214         reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
215         n = i = 0;
216         error = 0;
217         LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling)
218                 n++;
219         sx_unlock(&proctree_lock);
220         if (rp->rp_count < n)
221                 n = rp->rp_count;
222         pi = malloc(n * sizeof(*pi), M_TEMP, M_WAITOK);
223         sx_slock(&proctree_lock);
224         LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
225                 if (i == n)
226                         break;
227                 pip = &pi[i];
228                 bzero(pip, sizeof(*pip));
229                 pip->pi_pid = p2->p_pid;
230                 pip->pi_subtree = p2->p_reapsubtree;
231                 pip->pi_flags = REAPER_PIDINFO_VALID;
232                 if (proc_realparent(p2) == reap)
233                         pip->pi_flags |= REAPER_PIDINFO_CHILD;
234                 if ((p2->p_treeflag & P_TREE_REAPER) != 0)
235                         pip->pi_flags |= REAPER_PIDINFO_REAPER;
236                 if ((p2->p_flag & P_STOPPED) != 0)
237                         pip->pi_flags |= REAPER_PIDINFO_STOPPED;
238                 if (p2->p_state == PRS_ZOMBIE)
239                         pip->pi_flags |= REAPER_PIDINFO_ZOMBIE;
240                 else if ((p2->p_flag & P_WEXIT) != 0)
241                         pip->pi_flags |= REAPER_PIDINFO_EXITING;
242                 i++;
243         }
244         sx_sunlock(&proctree_lock);
245         error = copyout(pi, rp->rp_pids, i * sizeof(*pi));
246         free(pi, M_TEMP);
247         sx_slock(&proctree_lock);
248         PROC_LOCK(p);
249         return (error);
250 }
251
252 struct reap_kill_proc_work {
253         struct ucred *cr;
254         struct proc *target;
255         ksiginfo_t *ksi;
256         struct procctl_reaper_kill *rk;
257         int *error;
258         struct task t;
259 };
260
261 static void
262 reap_kill_proc_locked(struct reap_kill_proc_work *w)
263 {
264         int error1;
265         bool need_stop;
266
267         PROC_LOCK_ASSERT(w->target, MA_OWNED);
268         PROC_ASSERT_HELD(w->target);
269
270         error1 = cr_cansignal(w->cr, w->target, w->rk->rk_sig);
271         if (error1 != 0) {
272                 if (*w->error == ESRCH) {
273                         w->rk->rk_fpid = w->target->p_pid;
274                         *w->error = error1;
275                 }
276                 return;
277         }
278
279         /*
280          * The need_stop indicates if the target process needs to be
281          * suspended before being signalled.  This is needed when we
282          * guarantee that all processes in subtree are signalled,
283          * avoiding the race with some process not yet fully linked
284          * into all structures during fork, ignored by iterator, and
285          * then escaping signalling.
286          *
287          * The thread cannot usefully stop itself anyway, and if other
288          * thread of the current process forks while the current
289          * thread signals the whole subtree, it is an application
290          * race.
291          */
292         if ((w->target->p_flag & (P_KPROC | P_SYSTEM | P_STOPPED)) == 0)
293                 need_stop = thread_single(w->target, SINGLE_ALLPROC) == 0;
294         else
295                 need_stop = false;
296
297         (void)pksignal(w->target, w->rk->rk_sig, w->ksi);
298         w->rk->rk_killed++;
299         *w->error = error1;
300
301         if (need_stop)
302                 thread_single_end(w->target, SINGLE_ALLPROC);
303 }
304
305 static void
306 reap_kill_proc_work(void *arg, int pending __unused)
307 {
308         struct reap_kill_proc_work *w;
309
310         w = arg;
311         PROC_LOCK(w->target);
312         if ((w->target->p_flag2 & P2_WEXIT) == 0)
313                 reap_kill_proc_locked(w);
314         PROC_UNLOCK(w->target);
315
316         sx_xlock(&proctree_lock);
317         w->target = NULL;
318         wakeup(&w->target);
319         sx_xunlock(&proctree_lock);
320 }
321
322 struct reap_kill_tracker {
323         struct proc *parent;
324         TAILQ_ENTRY(reap_kill_tracker) link;
325 };
326
327 TAILQ_HEAD(reap_kill_tracker_head, reap_kill_tracker);
328
329 static void
330 reap_kill_sched(struct reap_kill_tracker_head *tracker, struct proc *p2)
331 {
332         struct reap_kill_tracker *t;
333
334         PROC_LOCK(p2);
335         if ((p2->p_flag2 & P2_WEXIT) != 0) {
336                 PROC_UNLOCK(p2);
337                 return;
338         }
339         _PHOLD_LITE(p2);
340         PROC_UNLOCK(p2);
341         t = malloc(sizeof(struct reap_kill_tracker), M_TEMP, M_WAITOK);
342         t->parent = p2;
343         TAILQ_INSERT_TAIL(tracker, t, link);
344 }
345
346 static void
347 reap_kill_sched_free(struct reap_kill_tracker *t)
348 {
349         PRELE(t->parent);
350         free(t, M_TEMP);
351 }
352
353 static void
354 reap_kill_children(struct thread *td, struct proc *reaper,
355     struct procctl_reaper_kill *rk, ksiginfo_t *ksi, int *error)
356 {
357         struct proc *p2;
358         int error1;
359
360         LIST_FOREACH(p2, &reaper->p_children, p_sibling) {
361                 PROC_LOCK(p2);
362                 if ((p2->p_flag2 & P2_WEXIT) == 0) {
363                         error1 = p_cansignal(td, p2, rk->rk_sig);
364                         if (error1 != 0) {
365                                 if (*error == ESRCH) {
366                                         rk->rk_fpid = p2->p_pid;
367                                         *error = error1;
368                                 }
369
370                                 /*
371                                  * Do not end the loop on error,
372                                  * signal everything we can.
373                                  */
374                         } else {
375                                 (void)pksignal(p2, rk->rk_sig, ksi);
376                                 rk->rk_killed++;
377                         }
378                 }
379                 PROC_UNLOCK(p2);
380         }
381 }
382
383 static bool
384 reap_kill_subtree_once(struct thread *td, struct proc *p, struct proc *reaper,
385     struct unrhdr *pids, struct reap_kill_proc_work *w)
386 {
387         struct reap_kill_tracker_head tracker;
388         struct reap_kill_tracker *t;
389         struct proc *p2;
390         int r, xlocked;
391         bool res, st;
392
393         res = false;
394         TAILQ_INIT(&tracker);
395         reap_kill_sched(&tracker, reaper);
396         while ((t = TAILQ_FIRST(&tracker)) != NULL) {
397                 TAILQ_REMOVE(&tracker, t, link);
398
399                 /*
400                  * Since reap_kill_proc() drops proctree_lock sx, it
401                  * is possible that the tracked reaper is no longer.
402                  * In this case the subtree is reparented to the new
403                  * reaper, which should handle it.
404                  */
405                 if ((t->parent->p_treeflag & P_TREE_REAPER) == 0) {
406                         reap_kill_sched_free(t);
407                         res = true;
408                         continue;
409                 }
410
411                 LIST_FOREACH(p2, &t->parent->p_reaplist, p_reapsibling) {
412                         if (t->parent == reaper &&
413                             (w->rk->rk_flags & REAPER_KILL_SUBTREE) != 0 &&
414                             p2->p_reapsubtree != w->rk->rk_subtree)
415                                 continue;
416                         if ((p2->p_treeflag & P_TREE_REAPER) != 0)
417                                 reap_kill_sched(&tracker, p2);
418
419                         /*
420                          * Handle possible pid reuse.  If we recorded
421                          * p2 as killed but its p_flag2 does not
422                          * confirm it, that means that the process
423                          * terminated and its id was reused by other
424                          * process in the reaper subtree.
425                          *
426                          * Unlocked read of p2->p_flag2 is fine, it is
427                          * our thread that set the tested flag.
428                          */
429                         if (alloc_unr_specific(pids, p2->p_pid) != p2->p_pid &&
430                             (atomic_load_int(&p2->p_flag2) &
431                             (P2_REAPKILLED | P2_WEXIT)) != 0)
432                                 continue;
433
434                         if (p2 == td->td_proc) {
435                                 if ((p2->p_flag & P_HADTHREADS) != 0 &&
436                                     (p2->p_flag2 & P2_WEXIT) == 0) {
437                                         xlocked = sx_xlocked(&proctree_lock);
438                                         sx_unlock(&proctree_lock);
439                                         st = true;
440                                 } else {
441                                         st = false;
442                                 }
443                                 PROC_LOCK(p2);
444                                 /*
445                                  * sapblk ensures that only one thread
446                                  * in the system sets this flag.
447                                  */
448                                 p2->p_flag2 |= P2_REAPKILLED;
449                                 if (st)
450                                         r = thread_single(p2, SINGLE_NO_EXIT);
451                                 (void)pksignal(p2, w->rk->rk_sig, w->ksi);
452                                 w->rk->rk_killed++;
453                                 if (st && r == 0)
454                                         thread_single_end(p2, SINGLE_NO_EXIT);
455                                 PROC_UNLOCK(p2);
456                                 if (st) {
457                                         if (xlocked)
458                                                 sx_xlock(&proctree_lock);
459                                         else
460                                                 sx_slock(&proctree_lock);
461                                 }
462                         } else {
463                                 PROC_LOCK(p2);
464                                 if ((p2->p_flag2 & P2_WEXIT) == 0) {
465                                         _PHOLD_LITE(p2);
466                                         p2->p_flag2 |= P2_REAPKILLED;
467                                         PROC_UNLOCK(p2);
468                                         w->target = p2;
469                                         taskqueue_enqueue(taskqueue_thread,
470                                             &w->t);
471                                         while (w->target != NULL) {
472                                                 sx_sleep(&w->target,
473                                                     &proctree_lock, PWAIT,
474                                                     "reapst", 0);
475                                         }
476                                         PROC_LOCK(p2);
477                                         _PRELE(p2);
478                                 }
479                                 PROC_UNLOCK(p2);
480                         }
481                         res = true;
482                 }
483                 reap_kill_sched_free(t);
484         }
485         return (res);
486 }
487
488 static void
489 reap_kill_subtree(struct thread *td, struct proc *p, struct proc *reaper,
490     struct reap_kill_proc_work *w)
491 {
492         struct unrhdr pids;
493         void *ihandle;
494         struct proc *p2;
495         int pid;
496
497         /*
498          * pids records processes which were already signalled, to
499          * avoid doubling signals to them if iteration needs to be
500          * repeated.
501          */
502         init_unrhdr(&pids, 1, PID_MAX, UNR_NO_MTX);
503         PROC_LOCK(td->td_proc);
504         if ((td->td_proc->p_flag2 & P2_WEXIT) != 0) {
505                 PROC_UNLOCK(td->td_proc);
506                 goto out;
507         }
508         PROC_UNLOCK(td->td_proc);
509         while (reap_kill_subtree_once(td, p, reaper, &pids, w))
510                ;
511
512         ihandle = create_iter_unr(&pids);
513         while ((pid = next_iter_unr(ihandle)) != -1) {
514                 p2 = pfind(pid);
515                 if (p2 != NULL) {
516                         p2->p_flag2 &= ~P2_REAPKILLED;
517                         PROC_UNLOCK(p2);
518                 }
519         }
520         free_iter_unr(ihandle);
521
522 out:
523         clean_unrhdr(&pids);
524         clear_unrhdr(&pids);
525 }
526
527 static bool
528 reap_kill_sapblk(struct thread *td __unused, void *data)
529 {
530         struct procctl_reaper_kill *rk;
531
532         rk = data;
533         return ((rk->rk_flags & REAPER_KILL_CHILDREN) == 0);
534 }
535
536 static int
537 reap_kill(struct thread *td, struct proc *p, void *data)
538 {
539         struct reap_kill_proc_work w;
540         struct proc *reaper;
541         ksiginfo_t ksi;
542         struct procctl_reaper_kill *rk;
543         int error;
544
545         rk = data;
546         sx_assert(&proctree_lock, SX_LOCKED);
547         if (CAP_TRACING(td))
548                 ktrcapfail(CAPFAIL_SIGNAL, &rk->rk_sig);
549         if (IN_CAPABILITY_MODE(td))
550                 return (ECAPMODE);
551         if (rk->rk_sig <= 0 || rk->rk_sig > _SIG_MAXSIG ||
552             (rk->rk_flags & ~(REAPER_KILL_CHILDREN |
553             REAPER_KILL_SUBTREE)) != 0 || (rk->rk_flags &
554             (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE)) ==
555             (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE))
556                 return (EINVAL);
557         PROC_UNLOCK(p);
558         reaper = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
559         ksiginfo_init(&ksi);
560         ksi.ksi_signo = rk->rk_sig;
561         ksi.ksi_code = SI_USER;
562         ksi.ksi_pid = td->td_proc->p_pid;
563         ksi.ksi_uid = td->td_ucred->cr_ruid;
564         error = ESRCH;
565         rk->rk_killed = 0;
566         rk->rk_fpid = -1;
567         if ((rk->rk_flags & REAPER_KILL_CHILDREN) != 0) {
568                 reap_kill_children(td, reaper, rk, &ksi, &error);
569         } else {
570                 w.cr = crhold(td->td_ucred);
571                 w.ksi = &ksi;
572                 w.rk = rk;
573                 w.error = &error;
574                 TASK_INIT(&w.t, 0, reap_kill_proc_work, &w);
575
576                 /*
577                  * Prevent swapout, since w, ksi, and possibly rk, are
578                  * allocated on the stack.  We sleep in
579                  * reap_kill_subtree_once() waiting for task to
580                  * complete single-threading.
581                  */
582                 PHOLD(td->td_proc);
583
584                 reap_kill_subtree(td, p, reaper, &w);
585                 PRELE(td->td_proc);
586                 crfree(w.cr);
587         }
588         PROC_LOCK(p);
589         return (error);
590 }
591
592 static int
593 trace_ctl(struct thread *td, struct proc *p, void *data)
594 {
595         int state;
596
597         PROC_LOCK_ASSERT(p, MA_OWNED);
598         state = *(int *)data;
599
600         /*
601          * Ktrace changes p_traceflag from or to zero under the
602          * process lock, so the test does not need to acquire ktrace
603          * mutex.
604          */
605         if ((p->p_flag & P_TRACED) != 0 || p->p_traceflag != 0)
606                 return (EBUSY);
607
608         switch (state) {
609         case PROC_TRACE_CTL_ENABLE:
610                 if (td->td_proc != p)
611                         return (EPERM);
612                 p->p_flag2 &= ~(P2_NOTRACE | P2_NOTRACE_EXEC);
613                 break;
614         case PROC_TRACE_CTL_DISABLE_EXEC:
615                 p->p_flag2 |= P2_NOTRACE_EXEC | P2_NOTRACE;
616                 break;
617         case PROC_TRACE_CTL_DISABLE:
618                 if ((p->p_flag2 & P2_NOTRACE_EXEC) != 0) {
619                         KASSERT((p->p_flag2 & P2_NOTRACE) != 0,
620                             ("dandling P2_NOTRACE_EXEC"));
621                         if (td->td_proc != p)
622                                 return (EPERM);
623                         p->p_flag2 &= ~P2_NOTRACE_EXEC;
624                 } else {
625                         p->p_flag2 |= P2_NOTRACE;
626                 }
627                 break;
628         default:
629                 return (EINVAL);
630         }
631         return (0);
632 }
633
634 static int
635 trace_status(struct thread *td, struct proc *p, void *data)
636 {
637         int *status;
638
639         status = data;
640         if ((p->p_flag2 & P2_NOTRACE) != 0) {
641                 KASSERT((p->p_flag & P_TRACED) == 0,
642                     ("%d traced but tracing disabled", p->p_pid));
643                 *status = -1;
644         } else if ((p->p_flag & P_TRACED) != 0) {
645                 *status = p->p_pptr->p_pid;
646         } else {
647                 *status = 0;
648         }
649         return (0);
650 }
651
652 static int
653 trapcap_ctl(struct thread *td, struct proc *p, void *data)
654 {
655         int state;
656
657         PROC_LOCK_ASSERT(p, MA_OWNED);
658         state = *(int *)data;
659
660         switch (state) {
661         case PROC_TRAPCAP_CTL_ENABLE:
662                 p->p_flag2 |= P2_TRAPCAP;
663                 break;
664         case PROC_TRAPCAP_CTL_DISABLE:
665                 p->p_flag2 &= ~P2_TRAPCAP;
666                 break;
667         default:
668                 return (EINVAL);
669         }
670         return (0);
671 }
672
673 static int
674 trapcap_status(struct thread *td, struct proc *p, void *data)
675 {
676         int *status;
677
678         status = data;
679         *status = (p->p_flag2 & P2_TRAPCAP) != 0 ? PROC_TRAPCAP_CTL_ENABLE :
680             PROC_TRAPCAP_CTL_DISABLE;
681         return (0);
682 }
683
684 static int
685 no_new_privs_ctl(struct thread *td, struct proc *p, void *data)
686 {
687         int state;
688
689         PROC_LOCK_ASSERT(p, MA_OWNED);
690         state = *(int *)data;
691
692         if (state != PROC_NO_NEW_PRIVS_ENABLE)
693                 return (EINVAL);
694         p->p_flag2 |= P2_NO_NEW_PRIVS;
695         return (0);
696 }
697
698 static int
699 no_new_privs_status(struct thread *td, struct proc *p, void *data)
700 {
701
702         *(int *)data = (p->p_flag2 & P2_NO_NEW_PRIVS) != 0 ?
703             PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE;
704         return (0);
705 }
706
707 static int
708 protmax_ctl(struct thread *td, struct proc *p, void *data)
709 {
710         int state;
711
712         PROC_LOCK_ASSERT(p, MA_OWNED);
713         state = *(int *)data;
714
715         switch (state) {
716         case PROC_PROTMAX_FORCE_ENABLE:
717                 p->p_flag2 &= ~P2_PROTMAX_DISABLE;
718                 p->p_flag2 |= P2_PROTMAX_ENABLE;
719                 break;
720         case PROC_PROTMAX_FORCE_DISABLE:
721                 p->p_flag2 |= P2_PROTMAX_DISABLE;
722                 p->p_flag2 &= ~P2_PROTMAX_ENABLE;
723                 break;
724         case PROC_PROTMAX_NOFORCE:
725                 p->p_flag2 &= ~(P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE);
726                 break;
727         default:
728                 return (EINVAL);
729         }
730         return (0);
731 }
732
733 static int
734 protmax_status(struct thread *td, struct proc *p, void *data)
735 {
736         int d;
737
738         switch (p->p_flag2 & (P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE)) {
739         case 0:
740                 d = PROC_PROTMAX_NOFORCE;
741                 break;
742         case P2_PROTMAX_ENABLE:
743                 d = PROC_PROTMAX_FORCE_ENABLE;
744                 break;
745         case P2_PROTMAX_DISABLE:
746                 d = PROC_PROTMAX_FORCE_DISABLE;
747                 break;
748         }
749         if (kern_mmap_maxprot(p, PROT_READ) == PROT_READ)
750                 d |= PROC_PROTMAX_ACTIVE;
751         *(int *)data = d;
752         return (0);
753 }
754
755 static int
756 aslr_ctl(struct thread *td, struct proc *p, void *data)
757 {
758         int state;
759
760         PROC_LOCK_ASSERT(p, MA_OWNED);
761         state = *(int *)data;
762
763         switch (state) {
764         case PROC_ASLR_FORCE_ENABLE:
765                 p->p_flag2 &= ~P2_ASLR_DISABLE;
766                 p->p_flag2 |= P2_ASLR_ENABLE;
767                 break;
768         case PROC_ASLR_FORCE_DISABLE:
769                 p->p_flag2 |= P2_ASLR_DISABLE;
770                 p->p_flag2 &= ~P2_ASLR_ENABLE;
771                 break;
772         case PROC_ASLR_NOFORCE:
773                 p->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
774                 break;
775         default:
776                 return (EINVAL);
777         }
778         return (0);
779 }
780
781 static int
782 aslr_status(struct thread *td, struct proc *p, void *data)
783 {
784         struct vmspace *vm;
785         int d;
786
787         switch (p->p_flag2 & (P2_ASLR_ENABLE | P2_ASLR_DISABLE)) {
788         case 0:
789                 d = PROC_ASLR_NOFORCE;
790                 break;
791         case P2_ASLR_ENABLE:
792                 d = PROC_ASLR_FORCE_ENABLE;
793                 break;
794         case P2_ASLR_DISABLE:
795                 d = PROC_ASLR_FORCE_DISABLE;
796                 break;
797         }
798         if ((p->p_flag & P_WEXIT) == 0) {
799                 _PHOLD(p);
800                 PROC_UNLOCK(p);
801                 vm = vmspace_acquire_ref(p);
802                 if (vm != NULL) {
803                         if ((vm->vm_map.flags & MAP_ASLR) != 0)
804                                 d |= PROC_ASLR_ACTIVE;
805                         vmspace_free(vm);
806                 }
807                 PROC_LOCK(p);
808                 _PRELE(p);
809         }
810         *(int *)data = d;
811         return (0);
812 }
813
814 static int
815 stackgap_ctl(struct thread *td, struct proc *p, void *data)
816 {
817         int state;
818
819         PROC_LOCK_ASSERT(p, MA_OWNED);
820         state = *(int *)data;
821
822         if ((state & ~(PROC_STACKGAP_ENABLE | PROC_STACKGAP_DISABLE |
823             PROC_STACKGAP_ENABLE_EXEC | PROC_STACKGAP_DISABLE_EXEC)) != 0)
824                 return (EINVAL);
825         switch (state & (PROC_STACKGAP_ENABLE | PROC_STACKGAP_DISABLE)) {
826         case PROC_STACKGAP_ENABLE:
827                 if ((p->p_flag2 & P2_STKGAP_DISABLE) != 0)
828                         return (EINVAL);
829                 break;
830         case PROC_STACKGAP_DISABLE:
831                 p->p_flag2 |= P2_STKGAP_DISABLE;
832                 break;
833         case 0:
834                 break;
835         default:
836                 return (EINVAL);
837         }
838         switch (state & (PROC_STACKGAP_ENABLE_EXEC |
839             PROC_STACKGAP_DISABLE_EXEC)) {
840         case PROC_STACKGAP_ENABLE_EXEC:
841                 p->p_flag2 &= ~P2_STKGAP_DISABLE_EXEC;
842                 break;
843         case PROC_STACKGAP_DISABLE_EXEC:
844                 p->p_flag2 |= P2_STKGAP_DISABLE_EXEC;
845                 break;
846         case 0:
847                 break;
848         default:
849                 return (EINVAL);
850         }
851         return (0);
852 }
853
854 static int
855 stackgap_status(struct thread *td, struct proc *p, void *data)
856 {
857         int d;
858
859         PROC_LOCK_ASSERT(p, MA_OWNED);
860
861         d = (p->p_flag2 & P2_STKGAP_DISABLE) != 0 ? PROC_STACKGAP_DISABLE :
862             PROC_STACKGAP_ENABLE;
863         d |= (p->p_flag2 & P2_STKGAP_DISABLE_EXEC) != 0 ?
864             PROC_STACKGAP_DISABLE_EXEC : PROC_STACKGAP_ENABLE_EXEC;
865         *(int *)data = d;
866         return (0);
867 }
868
869 static int
870 wxmap_ctl(struct thread *td, struct proc *p, void *data)
871 {
872         struct vmspace *vm;
873         vm_map_t map;
874         int state;
875
876         PROC_LOCK_ASSERT(p, MA_OWNED);
877         if ((p->p_flag & P_WEXIT) != 0)
878                 return (ESRCH);
879         state = *(int *)data;
880
881         switch (state) {
882         case PROC_WX_MAPPINGS_PERMIT:
883                 p->p_flag2 |= P2_WXORX_DISABLE;
884                 _PHOLD(p);
885                 PROC_UNLOCK(p);
886                 vm = vmspace_acquire_ref(p);
887                 if (vm != NULL) {
888                         map = &vm->vm_map;
889                         vm_map_lock(map);
890                         map->flags &= ~MAP_WXORX;
891                         vm_map_unlock(map);
892                         vmspace_free(vm);
893                 }
894                 PROC_LOCK(p);
895                 _PRELE(p);
896                 break;
897         case PROC_WX_MAPPINGS_DISALLOW_EXEC:
898                 p->p_flag2 |= P2_WXORX_ENABLE_EXEC;
899                 break;
900         default:
901                 return (EINVAL);
902         }
903
904         return (0);
905 }
906
907 static int
908 wxmap_status(struct thread *td, struct proc *p, void *data)
909 {
910         struct vmspace *vm;
911         int d;
912
913         PROC_LOCK_ASSERT(p, MA_OWNED);
914         if ((p->p_flag & P_WEXIT) != 0)
915                 return (ESRCH);
916
917         d = 0;
918         if ((p->p_flag2 & P2_WXORX_DISABLE) != 0)
919                 d |= PROC_WX_MAPPINGS_PERMIT;
920         if ((p->p_flag2 & P2_WXORX_ENABLE_EXEC) != 0)
921                 d |= PROC_WX_MAPPINGS_DISALLOW_EXEC;
922         _PHOLD(p);
923         PROC_UNLOCK(p);
924         vm = vmspace_acquire_ref(p);
925         if (vm != NULL) {
926                 if ((vm->vm_map.flags & MAP_WXORX) != 0)
927                         d |= PROC_WXORX_ENFORCE;
928                 vmspace_free(vm);
929         }
930         PROC_LOCK(p);
931         _PRELE(p);
932         *(int *)data = d;
933         return (0);
934 }
935
936 static int
937 pdeathsig_ctl(struct thread *td, struct proc *p, void *data)
938 {
939         int signum;
940
941         signum = *(int *)data;
942         if (p != td->td_proc || (signum != 0 && !_SIG_VALID(signum)))
943                 return (EINVAL);
944         p->p_pdeathsig = signum;
945         return (0);
946 }
947
948 static int
949 pdeathsig_status(struct thread *td, struct proc *p, void *data)
950 {
951         if (p != td->td_proc)
952                 return (EINVAL);
953         *(int *)data = p->p_pdeathsig;
954         return (0);
955 }
956
957 enum {
958         PCTL_SLOCKED,
959         PCTL_XLOCKED,
960         PCTL_UNLOCKED,
961 };
962
963 struct procctl_cmd_info {
964         int lock_tree;
965         bool one_proc : 1;
966         bool esrch_is_einval : 1;
967         bool copyout_on_error : 1;
968         bool no_nonnull_data : 1;
969         bool need_candebug : 1;
970         int copyin_sz;
971         int copyout_sz;
972         int (*exec)(struct thread *, struct proc *, void *);
973         bool (*sapblk)(struct thread *, void *);
974 };
975 static const struct procctl_cmd_info procctl_cmds_info[] = {
976         [PROC_SPROTECT] =
977             { .lock_tree = PCTL_SLOCKED, .one_proc = false,
978               .esrch_is_einval = false, .no_nonnull_data = false,
979               .need_candebug = false,
980               .copyin_sz = sizeof(int), .copyout_sz = 0,
981               .exec = protect_set, .copyout_on_error = false, },
982         [PROC_REAP_ACQUIRE] =
983             { .lock_tree = PCTL_XLOCKED, .one_proc = true,
984               .esrch_is_einval = false, .no_nonnull_data = true,
985               .need_candebug = false,
986               .copyin_sz = 0, .copyout_sz = 0,
987               .exec = reap_acquire, .copyout_on_error = false, },
988         [PROC_REAP_RELEASE] =
989             { .lock_tree = PCTL_XLOCKED, .one_proc = true,
990               .esrch_is_einval = false, .no_nonnull_data = true,
991               .need_candebug = false,
992               .copyin_sz = 0, .copyout_sz = 0,
993               .exec = reap_release, .copyout_on_error = false, },
994         [PROC_REAP_STATUS] =
995             { .lock_tree = PCTL_SLOCKED, .one_proc = true,
996               .esrch_is_einval = false, .no_nonnull_data = false,
997               .need_candebug = false,
998               .copyin_sz = 0,
999               .copyout_sz = sizeof(struct procctl_reaper_status),
1000               .exec = reap_status, .copyout_on_error = false, },
1001         [PROC_REAP_GETPIDS] =
1002             { .lock_tree = PCTL_SLOCKED, .one_proc = true,
1003               .esrch_is_einval = false, .no_nonnull_data = false,
1004               .need_candebug = false,
1005               .copyin_sz = sizeof(struct procctl_reaper_pids),
1006               .copyout_sz = 0,
1007               .exec = reap_getpids, .copyout_on_error = false, },
1008         [PROC_REAP_KILL] =
1009             { .lock_tree = PCTL_SLOCKED, .one_proc = true,
1010               .esrch_is_einval = false, .no_nonnull_data = false,
1011               .need_candebug = false,
1012               .copyin_sz = sizeof(struct procctl_reaper_kill),
1013               .copyout_sz = sizeof(struct procctl_reaper_kill),
1014               .exec = reap_kill, .copyout_on_error = true,
1015               .sapblk = reap_kill_sapblk, },
1016         [PROC_TRACE_CTL] =
1017             { .lock_tree = PCTL_SLOCKED, .one_proc = false,
1018               .esrch_is_einval = false, .no_nonnull_data = false,
1019               .need_candebug = true,
1020               .copyin_sz = sizeof(int), .copyout_sz = 0,
1021               .exec = trace_ctl, .copyout_on_error = false, },
1022         [PROC_TRACE_STATUS] =
1023             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1024               .esrch_is_einval = false, .no_nonnull_data = false,
1025               .need_candebug = false,
1026               .copyin_sz = 0, .copyout_sz = sizeof(int),
1027               .exec = trace_status, .copyout_on_error = false, },
1028         [PROC_TRAPCAP_CTL] =
1029             { .lock_tree = PCTL_SLOCKED, .one_proc = false,
1030               .esrch_is_einval = false, .no_nonnull_data = false,
1031               .need_candebug = true,
1032               .copyin_sz = sizeof(int), .copyout_sz = 0,
1033               .exec = trapcap_ctl, .copyout_on_error = false, },
1034         [PROC_TRAPCAP_STATUS] =
1035             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1036               .esrch_is_einval = false, .no_nonnull_data = false,
1037               .need_candebug = false,
1038               .copyin_sz = 0, .copyout_sz = sizeof(int),
1039               .exec = trapcap_status, .copyout_on_error = false, },
1040         [PROC_PDEATHSIG_CTL] =
1041             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1042               .esrch_is_einval = true, .no_nonnull_data = false,
1043               .need_candebug = false,
1044               .copyin_sz = sizeof(int), .copyout_sz = 0,
1045               .exec = pdeathsig_ctl, .copyout_on_error = false, },
1046         [PROC_PDEATHSIG_STATUS] =
1047             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1048               .esrch_is_einval = true, .no_nonnull_data = false,
1049               .need_candebug = false,
1050               .copyin_sz = 0, .copyout_sz = sizeof(int),
1051               .exec = pdeathsig_status, .copyout_on_error = false, },
1052         [PROC_ASLR_CTL] =
1053             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1054               .esrch_is_einval = false, .no_nonnull_data = false,
1055               .need_candebug = true,
1056               .copyin_sz = sizeof(int), .copyout_sz = 0,
1057               .exec = aslr_ctl, .copyout_on_error = false, },
1058         [PROC_ASLR_STATUS] =
1059             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1060               .esrch_is_einval = false, .no_nonnull_data = false,
1061               .need_candebug = false,
1062               .copyin_sz = 0, .copyout_sz = sizeof(int),
1063               .exec = aslr_status, .copyout_on_error = false, },
1064         [PROC_PROTMAX_CTL] =
1065             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1066               .esrch_is_einval = false, .no_nonnull_data = false,
1067               .need_candebug = true,
1068               .copyin_sz = sizeof(int), .copyout_sz = 0,
1069               .exec = protmax_ctl, .copyout_on_error = false, },
1070         [PROC_PROTMAX_STATUS] =
1071             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1072               .esrch_is_einval = false, .no_nonnull_data = false,
1073               .need_candebug = false,
1074               .copyin_sz = 0, .copyout_sz = sizeof(int),
1075               .exec = protmax_status, .copyout_on_error = false, },
1076         [PROC_STACKGAP_CTL] =
1077             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1078               .esrch_is_einval = false, .no_nonnull_data = false,
1079               .need_candebug = true,
1080               .copyin_sz = sizeof(int), .copyout_sz = 0,
1081               .exec = stackgap_ctl, .copyout_on_error = false, },
1082         [PROC_STACKGAP_STATUS] =
1083             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1084               .esrch_is_einval = false, .no_nonnull_data = false,
1085               .need_candebug = false,
1086               .copyin_sz = 0, .copyout_sz = sizeof(int),
1087               .exec = stackgap_status, .copyout_on_error = false, },
1088         [PROC_NO_NEW_PRIVS_CTL] =
1089             { .lock_tree = PCTL_SLOCKED, .one_proc = true,
1090               .esrch_is_einval = false, .no_nonnull_data = false,
1091               .need_candebug = true,
1092               .copyin_sz = sizeof(int), .copyout_sz = 0,
1093               .exec = no_new_privs_ctl, .copyout_on_error = false, },
1094         [PROC_NO_NEW_PRIVS_STATUS] =
1095             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1096               .esrch_is_einval = false, .no_nonnull_data = false,
1097               .need_candebug = false,
1098               .copyin_sz = 0, .copyout_sz = sizeof(int),
1099               .exec = no_new_privs_status, .copyout_on_error = false, },
1100         [PROC_WXMAP_CTL] =
1101             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1102               .esrch_is_einval = false, .no_nonnull_data = false,
1103               .need_candebug = true,
1104               .copyin_sz = sizeof(int), .copyout_sz = 0,
1105               .exec = wxmap_ctl, .copyout_on_error = false, },
1106         [PROC_WXMAP_STATUS] =
1107             { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1108               .esrch_is_einval = false, .no_nonnull_data = false,
1109               .need_candebug = false,
1110               .copyin_sz = 0, .copyout_sz = sizeof(int),
1111               .exec = wxmap_status, .copyout_on_error = false, },
1112 };
1113
1114 int
1115 sys_procctl(struct thread *td, struct procctl_args *uap)
1116 {
1117         union {
1118                 struct procctl_reaper_status rs;
1119                 struct procctl_reaper_pids rp;
1120                 struct procctl_reaper_kill rk;
1121                 int flags;
1122         } x;
1123         const struct procctl_cmd_info *cmd_info;
1124         int error, error1;
1125
1126         if (uap->com >= PROC_PROCCTL_MD_MIN)
1127                 return (cpu_procctl(td, uap->idtype, uap->id,
1128                     uap->com, uap->data));
1129         if (uap->com <= 0 || uap->com >= nitems(procctl_cmds_info))
1130                 return (EINVAL);
1131         cmd_info = &procctl_cmds_info[uap->com];
1132         bzero(&x, sizeof(x));
1133
1134         if (cmd_info->copyin_sz > 0) {
1135                 error = copyin(uap->data, &x, cmd_info->copyin_sz);
1136                 if (error != 0)
1137                         return (error);
1138         } else if (cmd_info->no_nonnull_data && uap->data != NULL) {
1139                 return (EINVAL);
1140         }
1141
1142         error = kern_procctl(td, uap->idtype, uap->id, uap->com, &x);
1143
1144         if (cmd_info->copyout_sz > 0 && (error == 0 ||
1145             cmd_info->copyout_on_error)) {
1146                 error1 = copyout(&x, uap->data, cmd_info->copyout_sz);
1147                 if (error == 0)
1148                         error = error1;
1149         }
1150         return (error);
1151 }
1152
1153 static int
1154 kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
1155 {
1156
1157         PROC_LOCK_ASSERT(p, MA_OWNED);
1158         return (procctl_cmds_info[com].exec(td, p, data));
1159 }
1160
1161 int
1162 kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
1163 {
1164         struct pgrp *pg;
1165         struct proc *p;
1166         const struct procctl_cmd_info *cmd_info;
1167         int error, first_error, ok;
1168         bool sapblk;
1169
1170         MPASS(com > 0 && com < nitems(procctl_cmds_info));
1171         cmd_info = &procctl_cmds_info[com];
1172         if (idtype != P_PID && cmd_info->one_proc)
1173                 return (EINVAL);
1174
1175         sapblk = false;
1176         if (cmd_info->sapblk != NULL) {
1177                 sapblk = cmd_info->sapblk(td, data);
1178                 if (sapblk && !stop_all_proc_block())
1179                         return (ERESTART);
1180         }
1181
1182         switch (cmd_info->lock_tree) {
1183         case PCTL_XLOCKED:
1184                 sx_xlock(&proctree_lock);
1185                 break;
1186         case PCTL_SLOCKED:
1187                 sx_slock(&proctree_lock);
1188                 break;
1189         default:
1190                 break;
1191         }
1192
1193         switch (idtype) {
1194         case P_PID:
1195                 if (id == 0) {
1196                         p = td->td_proc;
1197                         error = 0;
1198                         PROC_LOCK(p);
1199                 } else {
1200                         p = pfind(id);
1201                         if (p == NULL) {
1202                                 error = cmd_info->esrch_is_einval ?
1203                                     EINVAL : ESRCH;
1204                                 break;
1205                         }
1206                         error = cmd_info->need_candebug ? p_candebug(td, p) :
1207                             p_cansee(td, p);
1208                 }
1209                 if (error == 0)
1210                         error = kern_procctl_single(td, p, com, data);
1211                 PROC_UNLOCK(p);
1212                 break;
1213         case P_PGID:
1214                 /*
1215                  * Attempt to apply the operation to all members of the
1216                  * group.  Ignore processes in the group that can't be
1217                  * seen.  Ignore errors so long as at least one process is
1218                  * able to complete the request successfully.
1219                  */
1220                 pg = pgfind(id);
1221                 if (pg == NULL) {
1222                         error = ESRCH;
1223                         break;
1224                 }
1225                 PGRP_UNLOCK(pg);
1226                 ok = 0;
1227                 first_error = 0;
1228                 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1229                         PROC_LOCK(p);
1230                         if (p->p_state == PRS_NEW ||
1231                             p->p_state == PRS_ZOMBIE ||
1232                             (cmd_info->need_candebug ? p_candebug(td, p) :
1233                             p_cansee(td, p)) != 0) {
1234                                 PROC_UNLOCK(p);
1235                                 continue;
1236                         }
1237                         error = kern_procctl_single(td, p, com, data);
1238                         PROC_UNLOCK(p);
1239                         if (error == 0)
1240                                 ok = 1;
1241                         else if (first_error == 0)
1242                                 first_error = error;
1243                 }
1244                 if (ok)
1245                         error = 0;
1246                 else if (first_error != 0)
1247                         error = first_error;
1248                 else
1249                         /*
1250                          * Was not able to see any processes in the
1251                          * process group.
1252                          */
1253                         error = ESRCH;
1254                 break;
1255         default:
1256                 error = EINVAL;
1257                 break;
1258         }
1259
1260         switch (cmd_info->lock_tree) {
1261         case PCTL_XLOCKED:
1262                 sx_xunlock(&proctree_lock);
1263                 break;
1264         case PCTL_SLOCKED:
1265                 sx_sunlock(&proctree_lock);
1266                 break;
1267         default:
1268                 break;
1269         }
1270         if (sapblk)
1271                 stop_all_proc_unblock();
1272         return (error);
1273 }