]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/kern/sys_process.c
MFC r363988:
[FreeBSD/stable/9.git] / sys / kern / sys_process.c
1 /*-
2  * Copyright (c) 1994, Sean Eric Fagan
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by Sean Eric Fagan.
16  * 4. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include "opt_compat.h"
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/syscallsubr.h>
42 #include <sys/sysent.h>
43 #include <sys/sysproto.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/procctl.h>
47 #include <sys/vnode.h>
48 #include <sys/ptrace.h>
49 #include <sys/sx.h>
50 #include <sys/malloc.h>
51 #include <sys/signalvar.h>
52
53 #include <machine/reg.h>
54
55 #include <security/audit/audit.h>
56
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_extern.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_kern.h>
62 #include <vm/vm_object.h>
63 #include <vm/vm_page.h>
64 #include <vm/vm_pager.h>
65 #include <vm/vm_param.h>
66
67 #ifdef COMPAT_FREEBSD32
68 #include <sys/procfs.h>
69 #include <compat/freebsd32/freebsd32_signal.h>
70
71 struct ptrace_io_desc32 {
72         int             piod_op;
73         uint32_t        piod_offs;
74         uint32_t        piod_addr;
75         uint32_t        piod_len;
76 };
77
78 struct ptrace_vm_entry32 {
79         int             pve_entry;
80         int             pve_timestamp;
81         uint32_t        pve_start;
82         uint32_t        pve_end;
83         uint32_t        pve_offset;
84         u_int           pve_prot;
85         u_int           pve_pathlen;
86         int32_t         pve_fileid;
87         u_int           pve_fsid;
88         uint32_t        pve_path;
89 };
90
91 struct ptrace_lwpinfo32 {
92         lwpid_t pl_lwpid;       /* LWP described. */
93         int     pl_event;       /* Event that stopped the LWP. */
94         int     pl_flags;       /* LWP flags. */
95         sigset_t        pl_sigmask;     /* LWP signal mask */
96         sigset_t        pl_siglist;     /* LWP pending signal */
97         struct siginfo32 pl_siginfo;    /* siginfo for signal */
98         char    pl_tdname[MAXCOMLEN + 1];       /* LWP name. */
99         pid_t   pl_child_pid;           /* New child pid */
100         u_int           pl_syscall_code;
101         u_int           pl_syscall_narg;
102 };
103
104 #endif
105
106 /*
107  * Functions implemented using PROC_ACTION():
108  *
109  * proc_read_regs(proc, regs)
110  *      Get the current user-visible register set from the process
111  *      and copy it into the regs structure (<machine/reg.h>).
112  *      The process is stopped at the time read_regs is called.
113  *
114  * proc_write_regs(proc, regs)
115  *      Update the current register set from the passed in regs
116  *      structure.  Take care to avoid clobbering special CPU
117  *      registers or privileged bits in the PSL.
118  *      Depending on the architecture this may have fix-up work to do,
119  *      especially if the IAR or PCW are modified.
120  *      The process is stopped at the time write_regs is called.
121  *
122  * proc_read_fpregs, proc_write_fpregs
123  *      deal with the floating point register set, otherwise as above.
124  *
125  * proc_read_dbregs, proc_write_dbregs
126  *      deal with the processor debug register set, otherwise as above.
127  *
128  * proc_sstep(proc)
129  *      Arrange for the process to trap after executing a single instruction.
130  */
131
132 #define PROC_ACTION(action) do {                                        \
133         int error;                                                      \
134                                                                         \
135         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);                        \
136         if ((td->td_proc->p_flag & P_INMEM) == 0)                       \
137                 error = EIO;                                            \
138         else                                                            \
139                 error = (action);                                       \
140         return (error);                                                 \
141 } while(0)
142
143 int
144 proc_read_regs(struct thread *td, struct reg *regs)
145 {
146
147         PROC_ACTION(fill_regs(td, regs));
148 }
149
150 int
151 proc_write_regs(struct thread *td, struct reg *regs)
152 {
153
154         PROC_ACTION(set_regs(td, regs));
155 }
156
157 int
158 proc_read_dbregs(struct thread *td, struct dbreg *dbregs)
159 {
160
161         PROC_ACTION(fill_dbregs(td, dbregs));
162 }
163
164 int
165 proc_write_dbregs(struct thread *td, struct dbreg *dbregs)
166 {
167
168         PROC_ACTION(set_dbregs(td, dbregs));
169 }
170
171 /*
172  * Ptrace doesn't support fpregs at all, and there are no security holes
173  * or translations for fpregs, so we can just copy them.
174  */
175 int
176 proc_read_fpregs(struct thread *td, struct fpreg *fpregs)
177 {
178
179         PROC_ACTION(fill_fpregs(td, fpregs));
180 }
181
182 int
183 proc_write_fpregs(struct thread *td, struct fpreg *fpregs)
184 {
185
186         PROC_ACTION(set_fpregs(td, fpregs));
187 }
188
189 #ifdef COMPAT_FREEBSD32
190 /* For 32 bit binaries, we need to expose the 32 bit regs layouts. */
191 int
192 proc_read_regs32(struct thread *td, struct reg32 *regs32)
193 {
194
195         PROC_ACTION(fill_regs32(td, regs32));
196 }
197
198 int
199 proc_write_regs32(struct thread *td, struct reg32 *regs32)
200 {
201
202         PROC_ACTION(set_regs32(td, regs32));
203 }
204
205 int
206 proc_read_dbregs32(struct thread *td, struct dbreg32 *dbregs32)
207 {
208
209         PROC_ACTION(fill_dbregs32(td, dbregs32));
210 }
211
212 int
213 proc_write_dbregs32(struct thread *td, struct dbreg32 *dbregs32)
214 {
215
216         PROC_ACTION(set_dbregs32(td, dbregs32));
217 }
218
219 int
220 proc_read_fpregs32(struct thread *td, struct fpreg32 *fpregs32)
221 {
222
223         PROC_ACTION(fill_fpregs32(td, fpregs32));
224 }
225
226 int
227 proc_write_fpregs32(struct thread *td, struct fpreg32 *fpregs32)
228 {
229
230         PROC_ACTION(set_fpregs32(td, fpregs32));
231 }
232 #endif
233
234 int
235 proc_sstep(struct thread *td)
236 {
237
238         PROC_ACTION(ptrace_single_step(td));
239 }
240
241 int
242 proc_rwmem(struct proc *p, struct uio *uio)
243 {
244         vm_map_t map;
245         vm_offset_t pageno;             /* page number */
246         vm_prot_t reqprot;
247         int error, fault_flags, page_offset, writing;
248
249         /*
250          * Assert that someone has locked this vmspace.  (Should be
251          * curthread but we can't assert that.)  This keeps the process
252          * from exiting out from under us until this operation completes.
253          */
254         KASSERT(p->p_lock >= 1, ("%s: process %p (pid %d) not held", __func__,
255             p, p->p_pid));
256
257         /*
258          * The map we want...
259          */
260         map = &p->p_vmspace->vm_map;
261
262         /*
263          * If we are writing, then we request vm_fault() to create a private
264          * copy of each page.  Since these copies will not be writeable by the
265          * process, we must explicity request that they be dirtied.
266          */
267         writing = uio->uio_rw == UIO_WRITE;
268         reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ;
269         fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL;
270
271         /*
272          * Only map in one page at a time.  We don't have to, but it
273          * makes things easier.  This way is trivial - right?
274          */
275         do {
276                 vm_offset_t uva;
277                 u_int len;
278                 vm_page_t m;
279
280                 uva = (vm_offset_t)uio->uio_offset;
281
282                 /*
283                  * Get the page number of this segment.
284                  */
285                 pageno = trunc_page(uva);
286                 page_offset = uva - pageno;
287
288                 /*
289                  * How many bytes to copy
290                  */
291                 len = min(PAGE_SIZE - page_offset, uio->uio_resid);
292
293                 /*
294                  * Fault and hold the page on behalf of the process.
295                  */
296                 error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m);
297                 if (error != KERN_SUCCESS) {
298                         if (error == KERN_RESOURCE_SHORTAGE)
299                                 error = ENOMEM;
300                         else
301                                 error = EFAULT;
302                         break;
303                 }
304
305                 /*
306                  * Now do the i/o move.
307                  */
308                 error = uiomove_fromphys(&m, page_offset, len, uio);
309
310                 /* Make the I-cache coherent for breakpoints. */
311                 if (writing && error == 0) {
312                         vm_map_lock_read(map);
313                         if (vm_map_check_protection(map, pageno, pageno +
314                             PAGE_SIZE, VM_PROT_EXECUTE))
315                                 vm_sync_icache(map, uva, len);
316                         vm_map_unlock_read(map);
317                 }
318
319                 /*
320                  * Release the page.
321                  */
322                 vm_page_lock(m);
323                 vm_page_unhold(m);
324                 vm_page_unlock(m);
325
326         } while (error == 0 && uio->uio_resid > 0);
327
328         return (error);
329 }
330
331 static int
332 ptrace_vm_entry(struct thread *td, struct proc *p, struct ptrace_vm_entry *pve)
333 {
334         struct vattr vattr;
335         vm_map_t map;
336         vm_map_entry_t entry;
337         vm_object_t obj, tobj, lobj;
338         struct vmspace *vm;
339         struct vnode *vp;
340         char *freepath, *fullpath;
341         u_int pathlen;
342         int error, index, vfslocked;
343
344         error = 0;
345         obj = NULL;
346
347         vm = vmspace_acquire_ref(p);
348         map = &vm->vm_map;
349         vm_map_lock_read(map);
350
351         do {
352                 entry = map->header.next;
353                 index = 0;
354                 while (index < pve->pve_entry && entry != &map->header) {
355                         entry = entry->next;
356                         index++;
357                 }
358                 if (index != pve->pve_entry) {
359                         error = EINVAL;
360                         break;
361                 }
362                 while (entry != &map->header &&
363                     (entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) {
364                         entry = entry->next;
365                         index++;
366                 }
367                 if (entry == &map->header) {
368                         error = ENOENT;
369                         break;
370                 }
371
372                 /* We got an entry. */
373                 pve->pve_entry = index + 1;
374                 pve->pve_timestamp = map->timestamp;
375                 pve->pve_start = entry->start;
376                 pve->pve_end = entry->end - 1;
377                 pve->pve_offset = entry->offset;
378                 pve->pve_prot = entry->protection;
379
380                 /* Backing object's path needed? */
381                 if (pve->pve_pathlen == 0)
382                         break;
383
384                 pathlen = pve->pve_pathlen;
385                 pve->pve_pathlen = 0;
386
387                 obj = entry->object.vm_object;
388                 if (obj != NULL)
389                         VM_OBJECT_LOCK(obj);
390         } while (0);
391
392         vm_map_unlock_read(map);
393         vmspace_free(vm);
394
395         pve->pve_fsid = VNOVAL;
396         pve->pve_fileid = VNOVAL;
397
398         if (error == 0 && obj != NULL) {
399                 lobj = obj;
400                 for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) {
401                         if (tobj != obj)
402                                 VM_OBJECT_LOCK(tobj);
403                         if (lobj != obj)
404                                 VM_OBJECT_UNLOCK(lobj);
405                         lobj = tobj;
406                         pve->pve_offset += tobj->backing_object_offset;
407                 }
408                 vp = (lobj->type == OBJT_VNODE) ? lobj->handle : NULL;
409                 if (vp != NULL)
410                         vref(vp);
411                 if (lobj != obj)
412                         VM_OBJECT_UNLOCK(lobj);
413                 VM_OBJECT_UNLOCK(obj);
414
415                 if (vp != NULL) {
416                         freepath = NULL;
417                         fullpath = NULL;
418                         vn_fullpath(td, vp, &fullpath, &freepath);
419                         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
420                         vn_lock(vp, LK_SHARED | LK_RETRY);
421                         if (VOP_GETATTR(vp, &vattr, td->td_ucred) == 0) {
422                                 pve->pve_fileid = vattr.va_fileid;
423                                 pve->pve_fsid = vattr.va_fsid;
424                         }
425                         vput(vp);
426                         VFS_UNLOCK_GIANT(vfslocked);
427
428                         if (fullpath != NULL) {
429                                 pve->pve_pathlen = strlen(fullpath) + 1;
430                                 if (pve->pve_pathlen <= pathlen) {
431                                         error = copyout(fullpath, pve->pve_path,
432                                             pve->pve_pathlen);
433                                 } else
434                                         error = ENAMETOOLONG;
435                         }
436                         if (freepath != NULL)
437                                 free(freepath, M_TEMP);
438                 }
439         }
440         if (error == 0)
441                 CTR3(KTR_PTRACE, "PT_VM_ENTRY: pid %d, entry %d, start %p",
442                     p->p_pid, pve->pve_entry, pve->pve_start);
443
444         return (error);
445 }
446
447 #ifdef COMPAT_FREEBSD32
448 static int      
449 ptrace_vm_entry32(struct thread *td, struct proc *p,
450     struct ptrace_vm_entry32 *pve32)
451 {
452         struct ptrace_vm_entry pve;
453         int error;
454
455         pve.pve_entry = pve32->pve_entry;
456         pve.pve_pathlen = pve32->pve_pathlen;
457         pve.pve_path = (void *)(uintptr_t)pve32->pve_path;
458
459         error = ptrace_vm_entry(td, p, &pve);
460         if (error == 0) {
461                 pve32->pve_entry = pve.pve_entry;
462                 pve32->pve_timestamp = pve.pve_timestamp;
463                 pve32->pve_start = pve.pve_start;
464                 pve32->pve_end = pve.pve_end;
465                 pve32->pve_offset = pve.pve_offset;
466                 pve32->pve_prot = pve.pve_prot;
467                 pve32->pve_fileid = pve.pve_fileid;
468                 pve32->pve_fsid = pve.pve_fsid;
469         }
470
471         pve32->pve_pathlen = pve.pve_pathlen;
472         return (error);
473 }
474
475 static void
476 ptrace_lwpinfo_to32(const struct ptrace_lwpinfo *pl,
477     struct ptrace_lwpinfo32 *pl32)
478 {
479
480         pl32->pl_lwpid = pl->pl_lwpid;
481         pl32->pl_event = pl->pl_event;
482         pl32->pl_flags = pl->pl_flags;
483         pl32->pl_sigmask = pl->pl_sigmask;
484         pl32->pl_siglist = pl->pl_siglist;
485         siginfo_to_siginfo32(&pl->pl_siginfo, &pl32->pl_siginfo);
486         strcpy(pl32->pl_tdname, pl->pl_tdname);
487         pl32->pl_child_pid = pl->pl_child_pid;
488         pl32->pl_syscall_code = pl->pl_syscall_code;
489         pl32->pl_syscall_narg = pl->pl_syscall_narg;
490 }
491 #endif /* COMPAT_FREEBSD32 */
492
493 /*
494  * Process debugging system call.
495  */
496 #ifndef _SYS_SYSPROTO_H_
497 struct ptrace_args {
498         int     req;
499         pid_t   pid;
500         caddr_t addr;
501         int     data;
502 };
503 #endif
504
505 #ifdef COMPAT_FREEBSD32
506 /*
507  * This CPP subterfuge is to try and reduce the number of ifdefs in
508  * the body of the code.
509  *   COPYIN(uap->addr, &r.reg, sizeof r.reg);
510  * becomes either:
511  *   copyin(uap->addr, &r.reg, sizeof r.reg);
512  * or
513  *   copyin(uap->addr, &r.reg32, sizeof r.reg32);
514  * .. except this is done at runtime.
515  */
516 #define COPYIN(u, k, s)         wrap32 ? \
517         copyin(u, k ## 32, s ## 32) : \
518         copyin(u, k, s)
519 #define COPYOUT(k, u, s)        wrap32 ? \
520         copyout(k ## 32, u, s ## 32) : \
521         copyout(k, u, s)
522 #else
523 #define COPYIN(u, k, s)         copyin(u, k, s)
524 #define COPYOUT(k, u, s)        copyout(k, u, s)
525 #endif
526 int
527 sys_ptrace(struct thread *td, struct ptrace_args *uap)
528 {
529         /*
530          * XXX this obfuscation is to reduce stack usage, but the register
531          * structs may be too large to put on the stack anyway.
532          */
533         union {
534                 struct ptrace_io_desc piod;
535                 struct ptrace_lwpinfo pl;
536                 struct ptrace_vm_entry pve;
537                 struct dbreg dbreg;
538                 struct fpreg fpreg;
539                 struct reg reg;
540 #ifdef COMPAT_FREEBSD32
541                 struct dbreg32 dbreg32;
542                 struct fpreg32 fpreg32;
543                 struct reg32 reg32;
544                 struct ptrace_io_desc32 piod32;
545                 struct ptrace_lwpinfo32 pl32;
546                 struct ptrace_vm_entry32 pve32;
547 #endif
548         } r;
549         void *addr;
550         int error = 0;
551 #ifdef COMPAT_FREEBSD32
552         int wrap32 = 0;
553
554         if (SV_CURPROC_FLAG(SV_ILP32))
555                 wrap32 = 1;
556 #endif
557         AUDIT_ARG_PID(uap->pid);
558         AUDIT_ARG_CMD(uap->req);
559         AUDIT_ARG_VALUE(uap->data);
560         addr = &r;
561         switch (uap->req) {
562         case PT_GETREGS:
563         case PT_GETFPREGS:
564         case PT_GETDBREGS:
565         case PT_LWPINFO:
566                 break;
567         case PT_SETREGS:
568                 error = COPYIN(uap->addr, &r.reg, sizeof r.reg);
569                 break;
570         case PT_SETFPREGS:
571                 error = COPYIN(uap->addr, &r.fpreg, sizeof r.fpreg);
572                 break;
573         case PT_SETDBREGS:
574                 error = COPYIN(uap->addr, &r.dbreg, sizeof r.dbreg);
575                 break;
576         case PT_IO:
577                 error = COPYIN(uap->addr, &r.piod, sizeof r.piod);
578                 break;
579         case PT_VM_ENTRY:
580                 error = COPYIN(uap->addr, &r.pve, sizeof r.pve);
581                 break;
582         default:
583                 addr = uap->addr;
584                 break;
585         }
586         if (error)
587                 return (error);
588
589         error = kern_ptrace(td, uap->req, uap->pid, addr, uap->data);
590         if (error)
591                 return (error);
592
593         switch (uap->req) {
594         case PT_VM_ENTRY:
595                 error = COPYOUT(&r.pve, uap->addr, sizeof r.pve);
596                 break;
597         case PT_IO:
598                 error = COPYOUT(&r.piod, uap->addr, sizeof r.piod);
599                 break;
600         case PT_GETREGS:
601                 error = COPYOUT(&r.reg, uap->addr, sizeof r.reg);
602                 break;
603         case PT_GETFPREGS:
604                 error = COPYOUT(&r.fpreg, uap->addr, sizeof r.fpreg);
605                 break;
606         case PT_GETDBREGS:
607                 error = COPYOUT(&r.dbreg, uap->addr, sizeof r.dbreg);
608                 break;
609         case PT_LWPINFO:
610                 error = copyout(&r.pl, uap->addr, uap->data);
611                 break;
612         }
613
614         return (error);
615 }
616 #undef COPYIN
617 #undef COPYOUT
618
619 #ifdef COMPAT_FREEBSD32
620 /*
621  *   PROC_READ(regs, td2, addr);
622  * becomes either:
623  *   proc_read_regs(td2, addr);
624  * or
625  *   proc_read_regs32(td2, addr);
626  * .. except this is done at runtime.  There is an additional
627  * complication in that PROC_WRITE disallows 32 bit consumers
628  * from writing to 64 bit address space targets.
629  */
630 #define PROC_READ(w, t, a)      wrap32 ? \
631         proc_read_ ## w ## 32(t, a) : \
632         proc_read_ ## w (t, a)
633 #define PROC_WRITE(w, t, a)     wrap32 ? \
634         (safe ? proc_write_ ## w ## 32(t, a) : EINVAL ) : \
635         proc_write_ ## w (t, a)
636 #else
637 #define PROC_READ(w, t, a)      proc_read_ ## w (t, a)
638 #define PROC_WRITE(w, t, a)     proc_write_ ## w (t, a)
639 #endif
640
641 int
642 kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
643 {
644         struct iovec iov;
645         struct uio uio;
646         struct proc *curp, *p, *pp;
647         struct thread *td2 = NULL;
648         struct ptrace_io_desc *piod = NULL;
649         struct ptrace_lwpinfo *pl;
650         int error, write, tmp, num;
651         int proctree_locked = 0;
652         lwpid_t tid = 0, *buf;
653 #ifdef COMPAT_FREEBSD32
654         int wrap32 = 0, safe = 0;
655         struct ptrace_io_desc32 *piod32 = NULL;
656         struct ptrace_lwpinfo32 *pl32 = NULL;
657         struct ptrace_lwpinfo plr;
658 #endif
659
660         curp = td->td_proc;
661
662         /* Lock proctree before locking the process. */
663         switch (req) {
664         case PT_TRACE_ME:
665         case PT_ATTACH:
666         case PT_STEP:
667         case PT_CONTINUE:
668         case PT_TO_SCE:
669         case PT_TO_SCX:
670         case PT_SYSCALL:
671         case PT_FOLLOW_FORK:
672         case PT_DETACH:
673                 sx_xlock(&proctree_lock);
674                 proctree_locked = 1;
675                 break;
676         default:
677                 break;
678         }
679
680         write = 0;
681         if (req == PT_TRACE_ME) {
682                 p = td->td_proc;
683                 PROC_LOCK(p);
684         } else {
685                 if (pid <= PID_MAX) {
686                         if ((p = pfind(pid)) == NULL) {
687                                 if (proctree_locked)
688                                         sx_xunlock(&proctree_lock);
689                                 return (ESRCH);
690                         }
691                 } else {
692                         td2 = tdfind(pid, -1);
693                         if (td2 == NULL) {
694                                 if (proctree_locked)
695                                         sx_xunlock(&proctree_lock);
696                                 return (ESRCH);
697                         }
698                         p = td2->td_proc;
699                         tid = pid;
700                         pid = p->p_pid;
701                 }
702         }
703         AUDIT_ARG_PROCESS(p);
704
705         if ((p->p_flag & P_WEXIT) != 0) {
706                 error = ESRCH;
707                 goto fail;
708         }
709         if ((error = p_cansee(td, p)) != 0)
710                 goto fail;
711
712         if ((error = p_candebug(td, p)) != 0)
713                 goto fail;
714
715         /*
716          * System processes can't be debugged.
717          */
718         if ((p->p_flag & P_SYSTEM) != 0) {
719                 error = EINVAL;
720                 goto fail;
721         }
722
723         if (tid == 0) {
724                 if ((p->p_flag & P_STOPPED_TRACE) != 0) {
725                         KASSERT(p->p_xthread != NULL, ("NULL p_xthread"));
726                         td2 = p->p_xthread;
727                 } else {
728                         td2 = FIRST_THREAD_IN_PROC(p);
729                 }
730                 tid = td2->td_tid;
731         }
732
733 #ifdef COMPAT_FREEBSD32
734         /*
735          * Test if we're a 32 bit client and what the target is.
736          * Set the wrap controls accordingly.
737          */
738         if (SV_CURPROC_FLAG(SV_ILP32)) {
739                 if (SV_PROC_FLAG(td2->td_proc, SV_ILP32))
740                         safe = 1;
741                 wrap32 = 1;
742         }
743 #endif
744         /*
745          * Permissions check
746          */
747         switch (req) {
748         case PT_TRACE_ME:
749                 /* Always legal. */
750                 break;
751
752         case PT_ATTACH:
753                 /* Self */
754                 if (p->p_pid == td->td_proc->p_pid) {
755                         error = EINVAL;
756                         goto fail;
757                 }
758
759                 /* Already traced */
760                 if (p->p_flag & P_TRACED) {
761                         error = EBUSY;
762                         goto fail;
763                 }
764
765                 /* Can't trace an ancestor if you're being traced. */
766                 if (curp->p_flag & P_TRACED) {
767                         for (pp = curp->p_pptr; pp != NULL; pp = pp->p_pptr) {
768                                 if (pp == p) {
769                                         error = EINVAL;
770                                         goto fail;
771                                 }
772                         }
773                 }
774
775
776                 /* OK */
777                 break;
778
779         case PT_CLEARSTEP:
780                 /* Allow thread to clear single step for itself */
781                 if (td->td_tid == tid)
782                         break;
783
784                 /* FALLTHROUGH */
785         default:
786                 /* not being traced... */
787                 if ((p->p_flag & P_TRACED) == 0) {
788                         error = EPERM;
789                         goto fail;
790                 }
791
792                 /* not being traced by YOU */
793                 if (p->p_pptr != td->td_proc) {
794                         error = EBUSY;
795                         goto fail;
796                 }
797
798                 /* not currently stopped */
799                 if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) == 0 ||
800                     p->p_suspcount != p->p_numthreads  ||
801                     (p->p_flag & P_WAITED) == 0) {
802                         error = EBUSY;
803                         goto fail;
804                 }
805
806                 if ((p->p_flag & P_STOPPED_TRACE) == 0) {
807                         static int count = 0;
808                         if (count++ == 0)
809                                 printf("P_STOPPED_TRACE not set.\n");
810                 }
811
812                 /* OK */
813                 break;
814         }
815
816         /* Keep this process around until we finish this request. */
817         _PHOLD(p);
818
819 #ifdef FIX_SSTEP
820         /*
821          * Single step fixup ala procfs
822          */
823         FIX_SSTEP(td2);
824 #endif
825
826         /*
827          * Actually do the requests
828          */
829
830         td->td_retval[0] = 0;
831
832         switch (req) {
833         case PT_TRACE_ME:
834                 /* set my trace flag and "owner" so it can read/write me */
835                 p->p_flag |= P_TRACED;
836                 if (p->p_flag & P_PPWAIT)
837                         p->p_flag |= P_PPTRACE;
838                 p->p_oppid = p->p_pptr->p_pid;
839                 CTR1(KTR_PTRACE, "PT_TRACE_ME: pid %d", p->p_pid);
840                 break;
841
842         case PT_ATTACH:
843                 /* security check done above */
844                 /*
845                  * It would be nice if the tracing relationship was separate
846                  * from the parent relationship but that would require
847                  * another set of links in the proc struct or for "wait"
848                  * to scan the entire proc table.  To make life easier,
849                  * we just re-parent the process we're trying to trace.
850                  * The old parent is remembered so we can put things back
851                  * on a "detach".
852                  */
853                 p->p_flag |= P_TRACED;
854                 p->p_oppid = p->p_pptr->p_pid;
855                 if (p->p_pptr != td->td_proc) {
856                         proc_reparent(p, td->td_proc);
857                 }
858                 data = SIGSTOP;
859                 CTR2(KTR_PTRACE, "PT_ATTACH: pid %d, oppid %d", p->p_pid,
860                     p->p_oppid);
861                 goto sendsig;   /* in PT_CONTINUE below */
862
863         case PT_CLEARSTEP:
864                 CTR2(KTR_PTRACE, "PT_CLEARSTEP: tid %d (pid %d)", td2->td_tid,
865                     p->p_pid);
866                 error = ptrace_clear_single_step(td2);
867                 break;
868
869         case PT_SETSTEP:
870                 CTR2(KTR_PTRACE, "PT_SETSTEP: tid %d (pid %d)", td2->td_tid,
871                     p->p_pid);
872                 error = ptrace_single_step(td2);
873                 break;
874
875         case PT_SUSPEND:
876                 CTR2(KTR_PTRACE, "PT_SUSPEND: tid %d (pid %d)", td2->td_tid,
877                     p->p_pid);
878                 td2->td_dbgflags |= TDB_SUSPEND;
879                 thread_lock(td2);
880                 td2->td_flags |= TDF_NEEDSUSPCHK;
881                 thread_unlock(td2);
882                 break;
883
884         case PT_RESUME:
885                 CTR2(KTR_PTRACE, "PT_RESUME: tid %d (pid %d)", td2->td_tid,
886                     p->p_pid);
887                 td2->td_dbgflags &= ~TDB_SUSPEND;
888                 break;
889
890         case PT_FOLLOW_FORK:
891                 CTR3(KTR_PTRACE, "PT_FOLLOW_FORK: pid %d %s -> %s", p->p_pid,
892                     p->p_flag & P_FOLLOWFORK ? "enabled" : "disabled",
893                     data ? "enabled" : "disabled");
894                 if (data)
895                         p->p_flag |= P_FOLLOWFORK;
896                 else
897                         p->p_flag &= ~P_FOLLOWFORK;
898                 break;
899
900         case PT_STEP:
901         case PT_CONTINUE:
902         case PT_TO_SCE:
903         case PT_TO_SCX:
904         case PT_SYSCALL:
905         case PT_DETACH:
906                 /* Zero means do not send any signal */
907                 if (data < 0 || data > _SIG_MAXSIG) {
908                         error = EINVAL;
909                         break;
910                 }
911
912                 switch (req) {
913                 case PT_STEP:
914                         CTR2(KTR_PTRACE, "PT_STEP: tid %d (pid %d)",
915                             td2->td_tid, p->p_pid);
916                         error = ptrace_single_step(td2);
917                         if (error)
918                                 goto out;
919                         break;
920                 case PT_CONTINUE:
921                 case PT_TO_SCE:
922                 case PT_TO_SCX:
923                 case PT_SYSCALL:
924                         if (addr != (void *)1) {
925                                 error = ptrace_set_pc(td2,
926                                     (u_long)(uintfptr_t)addr);
927                                 if (error)
928                                         goto out;
929                         }
930                         switch (req) {
931                         case PT_TO_SCE:
932                                 p->p_stops |= S_PT_SCE;
933                                 CTR4(KTR_PTRACE,
934                     "PT_TO_SCE: pid %d, stops = %#x, PC = %#lx, sig = %d",
935                                     p->p_pid, p->p_stops,
936                                     (u_long)(uintfptr_t)addr, data);
937                                 break;
938                         case PT_TO_SCX:
939                                 p->p_stops |= S_PT_SCX;
940                                 CTR4(KTR_PTRACE,
941                     "PT_TO_SCX: pid %d, stops = %#x, PC = %#lx, sig = %d",
942                                     p->p_pid, p->p_stops,
943                                     (u_long)(uintfptr_t)addr, data);
944                                 break;
945                         case PT_SYSCALL:
946                                 p->p_stops |= S_PT_SCE | S_PT_SCX;
947                                 CTR4(KTR_PTRACE,
948                     "PT_SYSCALL: pid %d, stops = %#x, PC = %#lx, sig = %d",
949                                     p->p_pid, p->p_stops,
950                                     (u_long)(uintfptr_t)addr, data);
951                                 break;
952                         case PT_CONTINUE:
953                                 CTR3(KTR_PTRACE,
954                                     "PT_CONTINUE: pid %d, PC = %#lx, sig = %d",
955                                     p->p_pid, (u_long)(uintfptr_t)addr, data);
956                                 break;
957                         }
958                         break;
959                 case PT_DETACH:
960                         /*
961                          * Reset the process parent.
962                          *
963                          * NB: This clears P_TRACED before reparenting
964                          * a detached process back to its original
965                          * parent.  Otherwise the debugee will be set
966                          * as an orphan of the debugger.
967                          */
968                         p->p_flag &= ~(P_TRACED | P_WAITED | P_FOLLOWFORK);
969                         if (p->p_oppid != p->p_pptr->p_pid) {
970                                 PROC_LOCK(p->p_pptr);
971                                 sigqueue_take(p->p_ksi);
972                                 PROC_UNLOCK(p->p_pptr);
973
974                                 pp = proc_realparent(p);
975                                 proc_reparent(p, pp);
976                                 if (pp == initproc)
977                                         p->p_sigparent = SIGCHLD;
978                                 CTR3(KTR_PTRACE,
979                             "PT_DETACH: pid %d reparented to pid %d, sig %d",
980                                     p->p_pid, pp->p_pid, data);
981                         } else
982                                 CTR2(KTR_PTRACE, "PT_DETACH: pid %d, sig %d",
983                                     p->p_pid, data);
984                         p->p_oppid = 0;
985
986                         /* should we send SIGCHLD? */
987                         /* childproc_continued(p); */
988                         break;
989                 }
990
991         sendsig:
992                 if (proctree_locked) {
993                         sx_xunlock(&proctree_lock);
994                         proctree_locked = 0;
995                 }
996                 p->p_xstat = data;
997                 p->p_xthread = NULL;
998                 if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) != 0) {
999                         /* deliver or queue signal */
1000                         td2->td_dbgflags &= ~TDB_XSIG;
1001                         td2->td_xsig = data;
1002
1003                         if (req == PT_DETACH) {
1004                                 struct thread *td3;
1005                                 FOREACH_THREAD_IN_PROC(p, td3) {
1006                                         td3->td_dbgflags &= ~TDB_SUSPEND; 
1007                                 }
1008                         }
1009                         /*
1010                          * unsuspend all threads, to not let a thread run,
1011                          * you should use PT_SUSPEND to suspend it before
1012                          * continuing process.
1013                          */
1014                         PROC_SLOCK(p);
1015                         p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG|P_WAITED);
1016                         thread_unsuspend(p);
1017                         PROC_SUNLOCK(p);
1018                 } else {
1019                         if (data)
1020                                 kern_psignal(p, data);
1021                 }
1022                 break;
1023
1024         case PT_WRITE_I:
1025         case PT_WRITE_D:
1026                 td2->td_dbgflags |= TDB_USERWR;
1027                 write = 1;
1028                 /* FALLTHROUGH */
1029         case PT_READ_I:
1030         case PT_READ_D:
1031                 PROC_UNLOCK(p);
1032                 tmp = 0;
1033                 /* write = 0 set above */
1034                 iov.iov_base = write ? (caddr_t)&data : (caddr_t)&tmp;
1035                 iov.iov_len = sizeof(int);
1036                 uio.uio_iov = &iov;
1037                 uio.uio_iovcnt = 1;
1038                 uio.uio_offset = (off_t)(uintptr_t)addr;
1039                 uio.uio_resid = sizeof(int);
1040                 uio.uio_segflg = UIO_SYSSPACE;  /* i.e.: the uap */
1041                 uio.uio_rw = write ? UIO_WRITE : UIO_READ;
1042                 uio.uio_td = td;
1043                 error = proc_rwmem(p, &uio);
1044                 if (uio.uio_resid != 0) {
1045                         /*
1046                          * XXX proc_rwmem() doesn't currently return ENOSPC,
1047                          * so I think write() can bogusly return 0.
1048                          * XXX what happens for short writes?  We don't want
1049                          * to write partial data.
1050                          * XXX proc_rwmem() returns EPERM for other invalid
1051                          * addresses.  Convert this to EINVAL.  Does this
1052                          * clobber returns of EPERM for other reasons?
1053                          */
1054                         if (error == 0 || error == ENOSPC || error == EPERM)
1055                                 error = EINVAL; /* EOF */
1056                 }
1057                 if (!write)
1058                         td->td_retval[0] = tmp;
1059                 if (error == 0) {
1060                         if (write)
1061                                 CTR3(KTR_PTRACE, "PT_WRITE: pid %d: %p <= %#x",
1062                                     p->p_pid, addr, data);
1063                         else
1064                                 CTR3(KTR_PTRACE, "PT_READ: pid %d: %p >= %#x",
1065                                     p->p_pid, addr, tmp);
1066                 }
1067                 PROC_LOCK(p);
1068                 break;
1069
1070         case PT_IO:
1071 #ifdef COMPAT_FREEBSD32
1072                 if (wrap32) {
1073                         piod32 = addr;
1074                         iov.iov_base = (void *)(uintptr_t)piod32->piod_addr;
1075                         iov.iov_len = piod32->piod_len;
1076                         uio.uio_offset = (off_t)(uintptr_t)piod32->piod_offs;
1077                         uio.uio_resid = piod32->piod_len;
1078                 } else
1079 #endif
1080                 {
1081                         piod = addr;
1082                         iov.iov_base = piod->piod_addr;
1083                         iov.iov_len = piod->piod_len;
1084                         uio.uio_offset = (off_t)(uintptr_t)piod->piod_offs;
1085                         uio.uio_resid = piod->piod_len;
1086                 }
1087                 uio.uio_iov = &iov;
1088                 uio.uio_iovcnt = 1;
1089                 uio.uio_segflg = UIO_USERSPACE;
1090                 uio.uio_td = td;
1091 #ifdef COMPAT_FREEBSD32
1092                 tmp = wrap32 ? piod32->piod_op : piod->piod_op;
1093 #else
1094                 tmp = piod->piod_op;
1095 #endif
1096                 switch (tmp) {
1097                 case PIOD_READ_D:
1098                 case PIOD_READ_I:
1099                         CTR3(KTR_PTRACE, "PT_IO: pid %d: READ (%p, %#x)",
1100                             p->p_pid, (uintptr_t)uio.uio_offset, uio.uio_resid);
1101                         uio.uio_rw = UIO_READ;
1102                         break;
1103                 case PIOD_WRITE_D:
1104                 case PIOD_WRITE_I:
1105                         CTR3(KTR_PTRACE, "PT_IO: pid %d: WRITE (%p, %#x)",
1106                             p->p_pid, (uintptr_t)uio.uio_offset, uio.uio_resid);
1107                         td2->td_dbgflags |= TDB_USERWR;
1108                         uio.uio_rw = UIO_WRITE;
1109                         break;
1110                 default:
1111                         error = EINVAL;
1112                         goto out;
1113                 }
1114                 PROC_UNLOCK(p);
1115                 error = proc_rwmem(p, &uio);
1116 #ifdef COMPAT_FREEBSD32
1117                 if (wrap32)
1118                         piod32->piod_len -= uio.uio_resid;
1119                 else
1120 #endif
1121                         piod->piod_len -= uio.uio_resid;
1122                 PROC_LOCK(p);
1123                 break;
1124
1125         case PT_KILL:
1126                 CTR1(KTR_PTRACE, "PT_KILL: pid %d", p->p_pid);
1127                 data = SIGKILL;
1128                 goto sendsig;   /* in PT_CONTINUE above */
1129
1130         case PT_SETREGS:
1131                 CTR2(KTR_PTRACE, "PT_SETREGS: tid %d (pid %d)", td2->td_tid,
1132                     p->p_pid);
1133                 td2->td_dbgflags |= TDB_USERWR;
1134                 error = PROC_WRITE(regs, td2, addr);
1135                 break;
1136
1137         case PT_GETREGS:
1138                 CTR2(KTR_PTRACE, "PT_GETREGS: tid %d (pid %d)", td2->td_tid,
1139                     p->p_pid);
1140                 error = PROC_READ(regs, td2, addr);
1141                 break;
1142
1143         case PT_SETFPREGS:
1144                 CTR2(KTR_PTRACE, "PT_SETFPREGS: tid %d (pid %d)", td2->td_tid,
1145                     p->p_pid);
1146                 td2->td_dbgflags |= TDB_USERWR;
1147                 error = PROC_WRITE(fpregs, td2, addr);
1148                 break;
1149
1150         case PT_GETFPREGS:
1151                 CTR2(KTR_PTRACE, "PT_GETFPREGS: tid %d (pid %d)", td2->td_tid,
1152                     p->p_pid);
1153                 error = PROC_READ(fpregs, td2, addr);
1154                 break;
1155
1156         case PT_SETDBREGS:
1157                 CTR2(KTR_PTRACE, "PT_SETDBREGS: tid %d (pid %d)", td2->td_tid,
1158                     p->p_pid);
1159                 td2->td_dbgflags |= TDB_USERWR;
1160                 error = PROC_WRITE(dbregs, td2, addr);
1161                 break;
1162
1163         case PT_GETDBREGS:
1164                 CTR2(KTR_PTRACE, "PT_GETDBREGS: tid %d (pid %d)", td2->td_tid,
1165                     p->p_pid);
1166                 error = PROC_READ(dbregs, td2, addr);
1167                 break;
1168
1169         case PT_LWPINFO:
1170                 if (data <= 0 ||
1171 #ifdef COMPAT_FREEBSD32
1172                     (!wrap32 && data > sizeof(*pl)) ||
1173                     (wrap32 && data > sizeof(*pl32))) {
1174 #else
1175                     data > sizeof(*pl)) {
1176 #endif
1177                         error = EINVAL;
1178                         break;
1179                 }
1180 #ifdef COMPAT_FREEBSD32
1181                 if (wrap32) {
1182                         pl = &plr;
1183                         pl32 = addr;
1184                 } else
1185 #endif
1186                 pl = addr;
1187                 pl->pl_lwpid = td2->td_tid;
1188                 pl->pl_event = PL_EVENT_NONE;
1189                 pl->pl_flags = 0;
1190                 if (td2->td_dbgflags & TDB_XSIG) {
1191                         pl->pl_event = PL_EVENT_SIGNAL;
1192                         if (td2->td_dbgksi.ksi_signo != 0 &&
1193 #ifdef COMPAT_FREEBSD32
1194                             ((!wrap32 && data >= offsetof(struct ptrace_lwpinfo,
1195                             pl_siginfo) + sizeof(pl->pl_siginfo)) ||
1196                             (wrap32 && data >= offsetof(struct ptrace_lwpinfo32,
1197                             pl_siginfo) + sizeof(struct siginfo32)))
1198 #else
1199                             data >= offsetof(struct ptrace_lwpinfo, pl_siginfo)
1200                             + sizeof(pl->pl_siginfo)
1201 #endif
1202                         ){
1203                                 pl->pl_flags |= PL_FLAG_SI;
1204                                 pl->pl_siginfo = td2->td_dbgksi.ksi_info;
1205                         }
1206                 }
1207                 if ((pl->pl_flags & PL_FLAG_SI) == 0)
1208                         bzero(&pl->pl_siginfo, sizeof(pl->pl_siginfo));
1209                 if (td2->td_dbgflags & TDB_SCE)
1210                         pl->pl_flags |= PL_FLAG_SCE;
1211                 else if (td2->td_dbgflags & TDB_SCX)
1212                         pl->pl_flags |= PL_FLAG_SCX;
1213                 if (td2->td_dbgflags & TDB_EXEC)
1214                         pl->pl_flags |= PL_FLAG_EXEC;
1215                 if (td2->td_dbgflags & TDB_FORK) {
1216                         pl->pl_flags |= PL_FLAG_FORKED;
1217                         pl->pl_child_pid = td2->td_dbg_forked;
1218                 }
1219                 if (td2->td_dbgflags & TDB_CHILD)
1220                         pl->pl_flags |= PL_FLAG_CHILD;
1221                 pl->pl_sigmask = td2->td_sigmask;
1222                 pl->pl_siglist = td2->td_siglist;
1223                 strcpy(pl->pl_tdname, td2->td_name);
1224                 if ((td2->td_dbgflags & (TDB_SCE | TDB_SCX)) != 0) {
1225                         pl->pl_syscall_code = td2->td_dbg_sc_code;
1226                         pl->pl_syscall_narg = td2->td_dbg_sc_narg;
1227                 } else {
1228                         pl->pl_syscall_code = 0;
1229                         pl->pl_syscall_narg = 0;
1230                 }
1231 #ifdef COMPAT_FREEBSD32
1232                 if (wrap32)
1233                         ptrace_lwpinfo_to32(pl, pl32);
1234 #endif
1235                 CTR6(KTR_PTRACE,
1236     "PT_LWPINFO: tid %d (pid %d) event %d flags %#x child pid %d syscall %d",
1237                     td2->td_tid, p->p_pid, pl->pl_event, pl->pl_flags,
1238                     pl->pl_child_pid, pl->pl_syscall_code);
1239                 break;
1240
1241         case PT_GETNUMLWPS:
1242                 CTR2(KTR_PTRACE, "PT_GETNUMLWPS: pid %d: %d threads", p->p_pid,
1243                     p->p_numthreads);
1244                 td->td_retval[0] = p->p_numthreads;
1245                 break;
1246
1247         case PT_GETLWPLIST:
1248                 CTR3(KTR_PTRACE, "PT_GETLWPLIST: pid %d: data %d, actual %d",
1249                     p->p_pid, data, p->p_numthreads);
1250                 if (data <= 0) {
1251                         error = EINVAL;
1252                         break;
1253                 }
1254                 num = imin(p->p_numthreads, data);
1255                 PROC_UNLOCK(p);
1256                 buf = malloc(num * sizeof(lwpid_t), M_TEMP, M_WAITOK);
1257                 tmp = 0;
1258                 PROC_LOCK(p);
1259                 FOREACH_THREAD_IN_PROC(p, td2) {
1260                         if (tmp >= num)
1261                                 break;
1262                         buf[tmp++] = td2->td_tid;
1263                 }
1264                 PROC_UNLOCK(p);
1265                 error = copyout(buf, addr, tmp * sizeof(lwpid_t));
1266                 free(buf, M_TEMP);
1267                 if (!error)
1268                         td->td_retval[0] = tmp;
1269                 PROC_LOCK(p);
1270                 break;
1271
1272         case PT_VM_TIMESTAMP:
1273                 CTR2(KTR_PTRACE, "PT_VM_TIMESTAMP: pid %d: timestamp %d",
1274                     p->p_pid, p->p_vmspace->vm_map.timestamp);
1275                 td->td_retval[0] = p->p_vmspace->vm_map.timestamp;
1276                 break;
1277
1278         case PT_VM_ENTRY:
1279                 PROC_UNLOCK(p);
1280 #ifdef COMPAT_FREEBSD32
1281                 if (wrap32)
1282                         error = ptrace_vm_entry32(td, p, addr);
1283                 else
1284 #endif
1285                 error = ptrace_vm_entry(td, p, addr);
1286                 PROC_LOCK(p);
1287                 break;
1288
1289         default:
1290 #ifdef __HAVE_PTRACE_MACHDEP
1291                 if (req >= PT_FIRSTMACH) {
1292                         PROC_UNLOCK(p);
1293                         error = cpu_ptrace(td2, req, addr, data);
1294                         PROC_LOCK(p);
1295                 } else
1296 #endif
1297                         /* Unknown request. */
1298                         error = EINVAL;
1299                 break;
1300         }
1301
1302 out:
1303         /* Drop our hold on this process now that the request has completed. */
1304         _PRELE(p);
1305 fail:
1306         PROC_UNLOCK(p);
1307         if (proctree_locked)
1308                 sx_xunlock(&proctree_lock);
1309         return (error);
1310 }
1311 #undef PROC_READ
1312 #undef PROC_WRITE
1313
1314 /*
1315  * Stop a process because of a debugging event;
1316  * stay stopped until p->p_step is cleared
1317  * (cleared by PIOCCONT in procfs).
1318  */
1319 void
1320 stopevent(struct proc *p, unsigned int event, unsigned int val)
1321 {
1322
1323         PROC_LOCK_ASSERT(p, MA_OWNED);
1324         p->p_step = 1;
1325         CTR3(KTR_PTRACE, "stopevent: pid %d event %u val %u", p->p_pid, event,
1326             val);
1327         do {
1328                 p->p_xstat = val;
1329                 p->p_xthread = NULL;
1330                 p->p_stype = event;     /* Which event caused the stop? */
1331                 wakeup(&p->p_stype);    /* Wake up any PIOCWAIT'ing procs */
1332                 msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0);
1333         } while (p->p_step);
1334 }
1335
1336 static int
1337 protect_setchild(struct thread *td, struct proc *p, int flags)
1338 {
1339
1340         PROC_LOCK_ASSERT(p, MA_OWNED);
1341         if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0)
1342                 return (0);
1343         if (flags & PPROT_SET) {
1344                 p->p_flag |= P_PROTECTED;
1345                 if (flags & PPROT_INHERIT)
1346                         p->p_flag2 |= P2_INHERIT_PROTECTED;
1347         } else {
1348                 p->p_flag &= ~P_PROTECTED;
1349                 p->p_flag2 &= ~P2_INHERIT_PROTECTED;
1350         }
1351         return (1);
1352 }
1353
1354 static int
1355 protect_setchildren(struct thread *td, struct proc *top, int flags)
1356 {
1357         struct proc *p;
1358         int ret;
1359
1360         p = top;
1361         ret = 0;
1362         sx_assert(&proctree_lock, SX_LOCKED);
1363         for (;;) {
1364                 ret |= protect_setchild(td, p, flags);
1365                 PROC_UNLOCK(p);
1366                 /*
1367                  * If this process has children, descend to them next,
1368                  * otherwise do any siblings, and if done with this level,
1369                  * follow back up the tree (but not past top).
1370                  */
1371                 if (!LIST_EMPTY(&p->p_children))
1372                         p = LIST_FIRST(&p->p_children);
1373                 else for (;;) {
1374                         if (p == top) {
1375                                 PROC_LOCK(p);
1376                                 return (ret);
1377                         }
1378                         if (LIST_NEXT(p, p_sibling)) {
1379                                 p = LIST_NEXT(p, p_sibling);
1380                                 break;
1381                         }
1382                         p = p->p_pptr;
1383                 }
1384                 PROC_LOCK(p);
1385         }
1386 }
1387
1388 static int
1389 protect_set(struct thread *td, struct proc *p, int flags)
1390 {
1391         int error, ret;
1392
1393         switch (PPROT_OP(flags)) {
1394         case PPROT_SET:
1395         case PPROT_CLEAR:
1396                 break;
1397         default:
1398                 return (EINVAL);
1399         }
1400
1401         if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
1402                 return (EINVAL);
1403
1404         error = priv_check(td, PRIV_VM_MADV_PROTECT);
1405         if (error)
1406                 return (error);
1407
1408         if (flags & PPROT_DESCEND)
1409                 ret = protect_setchildren(td, p, flags);
1410         else
1411                 ret = protect_setchild(td, p, flags);
1412         if (ret == 0)
1413                 return (EPERM);
1414         return (0);
1415 }
1416
1417 #ifndef _SYS_SYSPROTO_H_
1418 struct procctl_args {
1419         idtype_t idtype;
1420         id_t    id;
1421         int     com;
1422         void    *data;
1423 };
1424 #endif
1425 /* ARGSUSED */
1426 int
1427 sys_procctl(struct thread *td, struct procctl_args *uap)
1428 {
1429         int error, flags;
1430         void *data;
1431
1432         switch (uap->com) {
1433         case PROC_SPROTECT:
1434                 error = copyin(uap->data, &flags, sizeof(flags));
1435                 if (error)
1436                         return (error);
1437                 data = &flags;
1438                 break;
1439         default:
1440                 return (EINVAL);
1441         }
1442
1443         return (kern_procctl(td, uap->idtype, uap->id, uap->com, data));
1444 }
1445
1446 static int
1447 kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
1448 {
1449
1450         PROC_LOCK_ASSERT(p, MA_OWNED);
1451         switch (com) {
1452         case PROC_SPROTECT:
1453                 return (protect_set(td, p, *(int *)data));
1454         default:
1455                 return (EINVAL);
1456         }
1457 }
1458
1459 int
1460 kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
1461 {
1462         struct pgrp *pg;
1463         struct proc *p;
1464         int error, first_error, ok;
1465
1466         sx_slock(&proctree_lock);
1467         switch (idtype) {
1468         case P_PID:
1469                 p = pfind(id);
1470                 if (p == NULL) {
1471                         error = ESRCH;
1472                         break;
1473                 }
1474                 if (p->p_state == PRS_NEW)
1475                         error = ESRCH;
1476                 else
1477                         error = p_cansee(td, p);
1478                 if (error == 0)
1479                         error = kern_procctl_single(td, p, com, data);
1480                 PROC_UNLOCK(p);
1481                 break;
1482         case P_PGID:
1483                 /*
1484                  * Attempt to apply the operation to all members of the
1485                  * group.  Ignore processes in the group that can't be
1486                  * seen.  Ignore errors so long as at least one process is
1487                  * able to complete the request successfully.
1488                  */
1489                 pg = pgfind(id);
1490                 if (pg == NULL) {
1491                         error = ESRCH;
1492                         break;
1493                 }
1494                 PGRP_UNLOCK(pg);
1495                 ok = 0;
1496                 first_error = 0;
1497                 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1498                         PROC_LOCK(p);
1499                         if (p->p_state == PRS_NEW || p_cansee(td, p) != 0) {
1500                                 PROC_UNLOCK(p);
1501                                 continue;
1502                         }
1503                         error = kern_procctl_single(td, p, com, data);
1504                         PROC_UNLOCK(p);
1505                         if (error == 0)
1506                                 ok = 1;
1507                         else if (first_error == 0)
1508                                 first_error = error;
1509                 }
1510                 if (ok)
1511                         error = 0;
1512                 else if (first_error != 0)
1513                         error = first_error;
1514                 else
1515                         /*
1516                          * Was not able to see any processes in the
1517                          * process group.
1518                          */
1519                         error = ESRCH;
1520                 break;
1521         default:
1522                 error = EINVAL;
1523                 break;
1524         }
1525         sx_sunlock(&proctree_lock);
1526         return (error);
1527 }