]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/sys_process.c
Unbreak building kernels with COMPAT_32 enabled. The actual support
[FreeBSD/FreeBSD.git] / sys / kern / sys_process.c
1 /*-
2  * Copyright (c) 1994, Sean Eric Fagan
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by Sean Eric Fagan.
16  * 4. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include "opt_compat.h"
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/syscallsubr.h>
42 #include <sys/sysent.h>
43 #include <sys/sysproto.h>
44 #include <sys/proc.h>
45 #include <sys/vnode.h>
46 #include <sys/ptrace.h>
47 #include <sys/sx.h>
48 #include <sys/malloc.h>
49 #include <sys/signalvar.h>
50
51 #include <machine/reg.h>
52
53 #include <security/audit/audit.h>
54
55 #include <vm/vm.h>
56 #include <vm/pmap.h>
57 #include <vm/vm_extern.h>
58 #include <vm/vm_map.h>
59 #include <vm/vm_kern.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63 #include <vm/vm_param.h>
64
65 #ifdef COMPAT_IA32
66 #include <sys/procfs.h>
67 #include <machine/fpu.h>
68 #include <compat/ia32/ia32_reg.h>
69
70 struct ptrace_io_desc32 {
71         int             piod_op;
72         u_int32_t       piod_offs;
73         u_int32_t       piod_addr;
74         u_int32_t       piod_len;
75 };
76
77 struct ptrace_vm_entry32 {
78         uint32_t        pve_cookie;
79         uint32_t        pve_start;
80         uint32_t        pve_end;
81         uint32_t        pve_offset;
82         u_int           pve_prot;
83         u_int           pve_pathlen;
84         uint32_t        pve_path;
85 };
86
87 #endif
88
89 /*
90  * Functions implemented using PROC_ACTION():
91  *
92  * proc_read_regs(proc, regs)
93  *      Get the current user-visible register set from the process
94  *      and copy it into the regs structure (<machine/reg.h>).
95  *      The process is stopped at the time read_regs is called.
96  *
97  * proc_write_regs(proc, regs)
98  *      Update the current register set from the passed in regs
99  *      structure.  Take care to avoid clobbering special CPU
100  *      registers or privileged bits in the PSL.
101  *      Depending on the architecture this may have fix-up work to do,
102  *      especially if the IAR or PCW are modified.
103  *      The process is stopped at the time write_regs is called.
104  *
105  * proc_read_fpregs, proc_write_fpregs
106  *      deal with the floating point register set, otherwise as above.
107  *
108  * proc_read_dbregs, proc_write_dbregs
109  *      deal with the processor debug register set, otherwise as above.
110  *
111  * proc_sstep(proc)
112  *      Arrange for the process to trap after executing a single instruction.
113  */
114
115 #define PROC_ACTION(action) do {                                        \
116         int error;                                                      \
117                                                                         \
118         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);                        \
119         if ((td->td_proc->p_flag & P_INMEM) == 0)                       \
120                 error = EIO;                                            \
121         else                                                            \
122                 error = (action);                                       \
123         return (error);                                                 \
124 } while(0)
125
126 int
127 proc_read_regs(struct thread *td, struct reg *regs)
128 {
129
130         PROC_ACTION(fill_regs(td, regs));
131 }
132
133 int
134 proc_write_regs(struct thread *td, struct reg *regs)
135 {
136
137         PROC_ACTION(set_regs(td, regs));
138 }
139
140 int
141 proc_read_dbregs(struct thread *td, struct dbreg *dbregs)
142 {
143
144         PROC_ACTION(fill_dbregs(td, dbregs));
145 }
146
147 int
148 proc_write_dbregs(struct thread *td, struct dbreg *dbregs)
149 {
150
151         PROC_ACTION(set_dbregs(td, dbregs));
152 }
153
154 /*
155  * Ptrace doesn't support fpregs at all, and there are no security holes
156  * or translations for fpregs, so we can just copy them.
157  */
158 int
159 proc_read_fpregs(struct thread *td, struct fpreg *fpregs)
160 {
161
162         PROC_ACTION(fill_fpregs(td, fpregs));
163 }
164
165 int
166 proc_write_fpregs(struct thread *td, struct fpreg *fpregs)
167 {
168
169         PROC_ACTION(set_fpregs(td, fpregs));
170 }
171
172 #ifdef COMPAT_IA32
173 /* For 32 bit binaries, we need to expose the 32 bit regs layouts. */
174 int
175 proc_read_regs32(struct thread *td, struct reg32 *regs32)
176 {
177
178         PROC_ACTION(fill_regs32(td, regs32));
179 }
180
181 int
182 proc_write_regs32(struct thread *td, struct reg32 *regs32)
183 {
184
185         PROC_ACTION(set_regs32(td, regs32));
186 }
187
188 int
189 proc_read_dbregs32(struct thread *td, struct dbreg32 *dbregs32)
190 {
191
192         PROC_ACTION(fill_dbregs32(td, dbregs32));
193 }
194
195 int
196 proc_write_dbregs32(struct thread *td, struct dbreg32 *dbregs32)
197 {
198
199         PROC_ACTION(set_dbregs32(td, dbregs32));
200 }
201
202 int
203 proc_read_fpregs32(struct thread *td, struct fpreg32 *fpregs32)
204 {
205
206         PROC_ACTION(fill_fpregs32(td, fpregs32));
207 }
208
209 int
210 proc_write_fpregs32(struct thread *td, struct fpreg32 *fpregs32)
211 {
212
213         PROC_ACTION(set_fpregs32(td, fpregs32));
214 }
215 #endif
216
217 int
218 proc_sstep(struct thread *td)
219 {
220
221         PROC_ACTION(ptrace_single_step(td));
222 }
223
224 int
225 proc_rwmem(struct proc *p, struct uio *uio)
226 {
227         vm_map_t map;
228         vm_object_t backing_object, object;
229         vm_offset_t pageno;             /* page number */
230         vm_prot_t reqprot;
231         int error, writing;
232
233         /*
234          * Assert that someone has locked this vmspace.  (Should be
235          * curthread but we can't assert that.)  This keeps the process
236          * from exiting out from under us until this operation completes.
237          */
238         KASSERT(p->p_lock >= 1, ("%s: process %p (pid %d) not held", __func__,
239             p, p->p_pid));
240
241         /*
242          * The map we want...
243          */
244         map = &p->p_vmspace->vm_map;
245
246         writing = uio->uio_rw == UIO_WRITE;
247         reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ;
248
249         /*
250          * Only map in one page at a time.  We don't have to, but it
251          * makes things easier.  This way is trivial - right?
252          */
253         do {
254                 vm_map_t tmap;
255                 vm_offset_t uva;
256                 int page_offset;                /* offset into page */
257                 vm_map_entry_t out_entry;
258                 vm_prot_t out_prot;
259                 boolean_t wired;
260                 vm_pindex_t pindex;
261                 u_int len;
262                 vm_page_t m;
263
264                 object = NULL;
265
266                 uva = (vm_offset_t)uio->uio_offset;
267
268                 /*
269                  * Get the page number of this segment.
270                  */
271                 pageno = trunc_page(uva);
272                 page_offset = uva - pageno;
273
274                 /*
275                  * How many bytes to copy
276                  */
277                 len = min(PAGE_SIZE - page_offset, uio->uio_resid);
278
279                 /*
280                  * Fault the page on behalf of the process
281                  */
282                 error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL);
283                 if (error) {
284                         if (error == KERN_RESOURCE_SHORTAGE)
285                                 error = ENOMEM;
286                         else
287                                 error = EFAULT;
288                         break;
289                 }
290
291                 /*
292                  * Now we need to get the page.  out_entry and wired
293                  * aren't used.  One would think the vm code
294                  * would be a *bit* nicer...  We use tmap because
295                  * vm_map_lookup() can change the map argument.
296                  */
297                 tmap = map;
298                 error = vm_map_lookup(&tmap, pageno, reqprot, &out_entry,
299                     &object, &pindex, &out_prot, &wired);
300                 if (error) {
301                         error = EFAULT;
302                         break;
303                 }
304                 VM_OBJECT_LOCK(object);
305                 while ((m = vm_page_lookup(object, pindex)) == NULL &&
306                     !writing &&
307                     (backing_object = object->backing_object) != NULL) {
308                         /*
309                          * Allow fallback to backing objects if we are reading.
310                          */
311                         VM_OBJECT_LOCK(backing_object);
312                         pindex += OFF_TO_IDX(object->backing_object_offset);
313                         VM_OBJECT_UNLOCK(object);
314                         object = backing_object;
315                 }
316                 if (writing && m != NULL) {
317                         vm_page_dirty(m);
318                         vm_pager_page_unswapped(m);
319                 }
320                 VM_OBJECT_UNLOCK(object);
321                 if (m == NULL) {
322                         vm_map_lookup_done(tmap, out_entry);
323                         error = EFAULT;
324                         break;
325                 }
326
327                 /*
328                  * Hold the page in memory.
329                  */
330                 vm_page_lock_queues();
331                 vm_page_hold(m);
332                 vm_page_unlock_queues();
333
334                 /*
335                  * We're done with tmap now.
336                  */
337                 vm_map_lookup_done(tmap, out_entry);
338
339                 /*
340                  * Now do the i/o move.
341                  */
342                 error = uiomove_fromphys(&m, page_offset, len, uio);
343
344                 /* Make the I-cache coherent for breakpoints. */
345                 if (!error && writing && (out_prot & VM_PROT_EXECUTE))
346                         vm_sync_icache(map, uva, len);
347
348                 /*
349                  * Release the page.
350                  */
351                 vm_page_lock_queues();
352                 vm_page_unhold(m);
353                 vm_page_unlock_queues();
354
355         } while (error == 0 && uio->uio_resid > 0);
356
357         return (error);
358 }
359
360 static int
361 ptrace_vm_entry(struct thread *td, struct proc *p, struct ptrace_vm_entry *pve)
362 {
363         vm_map_t map;
364         vm_map_entry_t entry;
365         vm_object_t obj, tobj, lobj;
366         struct vnode *vp;
367         char *freepath, *fullpath;
368         u_int pathlen;
369         int error, vfslocked;
370
371         map = &p->p_vmspace->vm_map;
372         entry = map->header.next;
373         if (pve->pve_cookie != NULL) {
374                 while (entry != &map->header && entry != pve->pve_cookie)
375                         entry = entry->next;
376                 if (entry != pve->pve_cookie)
377                         return (EINVAL);
378                 entry = entry->next;
379         }
380         while (entry != &map->header && (entry->eflags & MAP_ENTRY_IS_SUB_MAP))
381                 entry = entry->next;
382         if (entry == &map->header)
383                 return (ENOENT);
384
385         /* We got an entry. */
386         pve->pve_cookie = entry;
387         pve->pve_start = entry->start;
388         pve->pve_end = entry->end - 1;
389         pve->pve_offset = entry->offset;
390         pve->pve_prot = entry->protection;
391
392         /* Backing object's path needed? */
393         if (pve->pve_pathlen == 0)
394                 return (0);
395
396         pathlen = pve->pve_pathlen;
397         pve->pve_pathlen = 0;
398
399         obj = entry->object.vm_object;
400         if (obj == NULL)
401                 return (0);
402
403         VM_OBJECT_LOCK(obj);
404         for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
405                 if (tobj != obj)
406                         VM_OBJECT_LOCK(tobj);
407                 if (lobj != obj)
408                         VM_OBJECT_UNLOCK(lobj);
409                 lobj = tobj;
410                 pve->pve_offset += tobj->backing_object_offset;
411         }
412         if (lobj != NULL) {
413                 vp = (lobj->type == OBJT_VNODE) ? lobj->handle : NULL;
414                 if (vp != NULL)
415                         vref(vp);
416                 if (lobj != obj)
417                         VM_OBJECT_UNLOCK(lobj);
418                 VM_OBJECT_UNLOCK(obj);
419         } else
420                 vp = NULL;
421
422         if (vp == NULL)
423                 return (0);
424
425         freepath = NULL;
426         fullpath = NULL;
427         vn_fullpath(td, vp, &fullpath, &freepath);
428         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
429         vrele(vp);
430         VFS_UNLOCK_GIANT(vfslocked);
431
432         error = 0;
433         if (fullpath != NULL) {
434                 pve->pve_pathlen = strlen(fullpath) + 1;
435                 if (pve->pve_pathlen <= pathlen) {
436                         error = copyout(fullpath, pve->pve_path,
437                             pve->pve_pathlen);
438                 } else
439                         error = ENAMETOOLONG;
440         }
441         if (freepath != NULL)
442                 free(freepath, M_TEMP);
443         return (error);
444 }
445
446 /*
447  * Process debugging system call.
448  */
449 #ifndef _SYS_SYSPROTO_H_
450 struct ptrace_args {
451         int     req;
452         pid_t   pid;
453         caddr_t addr;
454         int     data;
455 };
456 #endif
457
458 #ifdef COMPAT_IA32
459 /*
460  * This CPP subterfuge is to try and reduce the number of ifdefs in
461  * the body of the code.
462  *   COPYIN(uap->addr, &r.reg, sizeof r.reg);
463  * becomes either:
464  *   copyin(uap->addr, &r.reg, sizeof r.reg);
465  * or
466  *   copyin(uap->addr, &r.reg32, sizeof r.reg32);
467  * .. except this is done at runtime.
468  */
469 #define COPYIN(u, k, s)         wrap32 ? \
470         copyin(u, k ## 32, s ## 32) : \
471         copyin(u, k, s)
472 #define COPYOUT(k, u, s)        wrap32 ? \
473         copyout(k ## 32, u, s ## 32) : \
474         copyout(k, u, s)
475 #else
476 #define COPYIN(u, k, s)         copyin(u, k, s)
477 #define COPYOUT(k, u, s)        copyout(k, u, s)
478 #endif
479 int
480 ptrace(struct thread *td, struct ptrace_args *uap)
481 {
482         /*
483          * XXX this obfuscation is to reduce stack usage, but the register
484          * structs may be too large to put on the stack anyway.
485          */
486         union {
487                 struct ptrace_io_desc piod;
488                 struct ptrace_lwpinfo pl;
489                 struct ptrace_vm_entry pve;
490                 struct dbreg dbreg;
491                 struct fpreg fpreg;
492                 struct reg reg;
493 #ifdef COMPAT_IA32
494                 struct dbreg32 dbreg32;
495                 struct fpreg32 fpreg32;
496                 struct reg32 reg32;
497                 struct ptrace_io_desc32 piod32;
498                 struct ptrace_vm_entry32 pve32;
499 #endif
500         } r;
501         void *addr;
502         int error = 0;
503 #ifdef COMPAT_IA32
504         int wrap32 = 0;
505
506         if (SV_CURPROC_FLAG(SV_ILP32))
507                 wrap32 = 1;
508 #endif
509         AUDIT_ARG_PID(uap->pid);
510         AUDIT_ARG_CMD(uap->req);
511         AUDIT_ARG_VALUE(uap->data);
512         addr = &r;
513         switch (uap->req) {
514         case PT_GETREGS:
515         case PT_GETFPREGS:
516         case PT_GETDBREGS:
517         case PT_LWPINFO:
518                 break;
519         case PT_SETREGS:
520                 error = COPYIN(uap->addr, &r.reg, sizeof r.reg);
521                 break;
522         case PT_SETFPREGS:
523                 error = COPYIN(uap->addr, &r.fpreg, sizeof r.fpreg);
524                 break;
525         case PT_SETDBREGS:
526                 error = COPYIN(uap->addr, &r.dbreg, sizeof r.dbreg);
527                 break;
528         case PT_IO:
529                 error = COPYIN(uap->addr, &r.piod, sizeof r.piod);
530                 break;
531         case PT_VM_ENTRY:
532                 error = COPYIN(uap->addr, &r.pve, sizeof r.pve);
533                 break;
534         default:
535                 addr = uap->addr;
536                 break;
537         }
538         if (error)
539                 return (error);
540
541         error = kern_ptrace(td, uap->req, uap->pid, addr, uap->data);
542         if (error)
543                 return (error);
544
545         switch (uap->req) {
546         case PT_VM_ENTRY:
547                 error = COPYOUT(&r.pve, uap->addr, sizeof r.pve);
548                 break;
549         case PT_IO:
550                 error = COPYOUT(&r.piod, uap->addr, sizeof r.piod);
551                 break;
552         case PT_GETREGS:
553                 error = COPYOUT(&r.reg, uap->addr, sizeof r.reg);
554                 break;
555         case PT_GETFPREGS:
556                 error = COPYOUT(&r.fpreg, uap->addr, sizeof r.fpreg);
557                 break;
558         case PT_GETDBREGS:
559                 error = COPYOUT(&r.dbreg, uap->addr, sizeof r.dbreg);
560                 break;
561         case PT_LWPINFO:
562                 error = copyout(&r.pl, uap->addr, uap->data);
563                 break;
564         }
565
566         return (error);
567 }
568 #undef COPYIN
569 #undef COPYOUT
570
571 #ifdef COMPAT_IA32
572 /*
573  *   PROC_READ(regs, td2, addr);
574  * becomes either:
575  *   proc_read_regs(td2, addr);
576  * or
577  *   proc_read_regs32(td2, addr);
578  * .. except this is done at runtime.  There is an additional
579  * complication in that PROC_WRITE disallows 32 bit consumers
580  * from writing to 64 bit address space targets.
581  */
582 #define PROC_READ(w, t, a)      wrap32 ? \
583         proc_read_ ## w ## 32(t, a) : \
584         proc_read_ ## w (t, a)
585 #define PROC_WRITE(w, t, a)     wrap32 ? \
586         (safe ? proc_write_ ## w ## 32(t, a) : EINVAL ) : \
587         proc_write_ ## w (t, a)
588 #else
589 #define PROC_READ(w, t, a)      proc_read_ ## w (t, a)
590 #define PROC_WRITE(w, t, a)     proc_write_ ## w (t, a)
591 #endif
592
593 int
594 kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
595 {
596         struct iovec iov;
597         struct uio uio;
598         struct proc *curp, *p, *pp;
599         struct thread *td2 = NULL;
600         struct ptrace_io_desc *piod = NULL;
601         struct ptrace_lwpinfo *pl;
602         int error, write, tmp, num;
603         int proctree_locked = 0;
604         lwpid_t tid = 0, *buf;
605 #ifdef COMPAT_IA32
606         int wrap32 = 0, safe = 0;
607         struct ptrace_io_desc32 *piod32 = NULL;
608 #endif
609
610         curp = td->td_proc;
611
612         /* Lock proctree before locking the process. */
613         switch (req) {
614         case PT_TRACE_ME:
615         case PT_ATTACH:
616         case PT_STEP:
617         case PT_CONTINUE:
618         case PT_TO_SCE:
619         case PT_TO_SCX:
620         case PT_SYSCALL:
621         case PT_DETACH:
622                 sx_xlock(&proctree_lock);
623                 proctree_locked = 1;
624                 break;
625         default:
626                 break;
627         }
628
629         write = 0;
630         if (req == PT_TRACE_ME) {
631                 p = td->td_proc;
632                 PROC_LOCK(p);
633         } else {
634                 if (pid <= PID_MAX) {
635                         if ((p = pfind(pid)) == NULL) {
636                                 if (proctree_locked)
637                                         sx_xunlock(&proctree_lock);
638                                 return (ESRCH);
639                         }
640                 } else {
641                         /* this is slow, should be optimized */
642                         sx_slock(&allproc_lock);
643                         FOREACH_PROC_IN_SYSTEM(p) {
644                                 PROC_LOCK(p);
645                                 FOREACH_THREAD_IN_PROC(p, td2) {
646                                         if (td2->td_tid == pid)
647                                                 break;
648                                 }
649                                 if (td2 != NULL)
650                                         break; /* proc lock held */
651                                 PROC_UNLOCK(p);
652                         }
653                         sx_sunlock(&allproc_lock);
654                         if (p == NULL) {
655                                 if (proctree_locked)
656                                         sx_xunlock(&proctree_lock);
657                                 return (ESRCH);
658                         }
659                         tid = pid;
660                         pid = p->p_pid;
661                 }
662         }
663         AUDIT_ARG_PROCESS(p);
664
665         if ((p->p_flag & P_WEXIT) != 0) {
666                 error = ESRCH;
667                 goto fail;
668         }
669         if ((error = p_cansee(td, p)) != 0)
670                 goto fail;
671
672         if ((error = p_candebug(td, p)) != 0)
673                 goto fail;
674
675         /*
676          * System processes can't be debugged.
677          */
678         if ((p->p_flag & P_SYSTEM) != 0) {
679                 error = EINVAL;
680                 goto fail;
681         }
682
683         if (tid == 0) {
684                 if ((p->p_flag & P_STOPPED_TRACE) != 0) {
685                         KASSERT(p->p_xthread != NULL, ("NULL p_xthread"));
686                         td2 = p->p_xthread;
687                 } else {
688                         td2 = FIRST_THREAD_IN_PROC(p);
689                 }
690                 tid = td2->td_tid;
691         }
692
693 #ifdef COMPAT_IA32
694         /*
695          * Test if we're a 32 bit client and what the target is.
696          * Set the wrap controls accordingly.
697          */
698         if (SV_CURPROC_FLAG(SV_ILP32)) {
699                 if (td2->td_proc->p_sysent->sv_flags & SV_ILP32)
700                         safe = 1;
701                 wrap32 = 1;
702         }
703 #endif
704         /*
705          * Permissions check
706          */
707         switch (req) {
708         case PT_TRACE_ME:
709                 /* Always legal. */
710                 break;
711
712         case PT_ATTACH:
713                 /* Self */
714                 if (p->p_pid == td->td_proc->p_pid) {
715                         error = EINVAL;
716                         goto fail;
717                 }
718
719                 /* Already traced */
720                 if (p->p_flag & P_TRACED) {
721                         error = EBUSY;
722                         goto fail;
723                 }
724
725                 /* Can't trace an ancestor if you're being traced. */
726                 if (curp->p_flag & P_TRACED) {
727                         for (pp = curp->p_pptr; pp != NULL; pp = pp->p_pptr) {
728                                 if (pp == p) {
729                                         error = EINVAL;
730                                         goto fail;
731                                 }
732                         }
733                 }
734
735
736                 /* OK */
737                 break;
738
739         case PT_CLEARSTEP:
740                 /* Allow thread to clear single step for itself */
741                 if (td->td_tid == tid)
742                         break;
743
744                 /* FALLTHROUGH */
745         default:
746                 /* not being traced... */
747                 if ((p->p_flag & P_TRACED) == 0) {
748                         error = EPERM;
749                         goto fail;
750                 }
751
752                 /* not being traced by YOU */
753                 if (p->p_pptr != td->td_proc) {
754                         error = EBUSY;
755                         goto fail;
756                 }
757
758                 /* not currently stopped */
759                 if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) == 0 ||
760                     p->p_suspcount != p->p_numthreads  ||
761                     (p->p_flag & P_WAITED) == 0) {
762                         error = EBUSY;
763                         goto fail;
764                 }
765
766                 if ((p->p_flag & P_STOPPED_TRACE) == 0) {
767                         static int count = 0;
768                         if (count++ == 0)
769                                 printf("P_STOPPED_TRACE not set.\n");
770                 }
771
772                 /* OK */
773                 break;
774         }
775
776         /* Keep this process around until we finish this request. */
777         _PHOLD(p);
778
779 #ifdef FIX_SSTEP
780         /*
781          * Single step fixup ala procfs
782          */
783         FIX_SSTEP(td2);
784 #endif
785
786         /*
787          * Actually do the requests
788          */
789
790         td->td_retval[0] = 0;
791
792         switch (req) {
793         case PT_TRACE_ME:
794                 /* set my trace flag and "owner" so it can read/write me */
795                 p->p_flag |= P_TRACED;
796                 p->p_oppid = p->p_pptr->p_pid;
797                 break;
798
799         case PT_ATTACH:
800                 /* security check done above */
801                 p->p_flag |= P_TRACED;
802                 p->p_oppid = p->p_pptr->p_pid;
803                 if (p->p_pptr != td->td_proc)
804                         proc_reparent(p, td->td_proc);
805                 data = SIGSTOP;
806                 goto sendsig;   /* in PT_CONTINUE below */
807
808         case PT_CLEARSTEP:
809                 error = ptrace_clear_single_step(td2);
810                 break;
811
812         case PT_SETSTEP:
813                 error = ptrace_single_step(td2);
814                 break;
815
816         case PT_SUSPEND:
817                 td2->td_dbgflags |= TDB_SUSPEND;
818                 thread_lock(td2);
819                 td2->td_flags |= TDF_NEEDSUSPCHK;
820                 thread_unlock(td2);
821                 break;
822
823         case PT_RESUME:
824                 td2->td_dbgflags &= ~TDB_SUSPEND;
825                 break;
826
827         case PT_STEP:
828         case PT_CONTINUE:
829         case PT_TO_SCE:
830         case PT_TO_SCX:
831         case PT_SYSCALL:
832         case PT_DETACH:
833                 /* Zero means do not send any signal */
834                 if (data < 0 || data > _SIG_MAXSIG) {
835                         error = EINVAL;
836                         break;
837                 }
838
839                 switch (req) {
840                 case PT_STEP:
841                         error = ptrace_single_step(td2);
842                         if (error)
843                                 goto out;
844                         break;
845                 case PT_TO_SCE:
846                         p->p_stops |= S_PT_SCE;
847                         break;
848                 case PT_TO_SCX:
849                         p->p_stops |= S_PT_SCX;
850                         break;
851                 case PT_SYSCALL:
852                         p->p_stops |= S_PT_SCE | S_PT_SCX;
853                         break;
854                 }
855
856                 if (addr != (void *)1) {
857                         error = ptrace_set_pc(td2, (u_long)(uintfptr_t)addr);
858                         if (error)
859                                 break;
860                 }
861
862                 if (req == PT_DETACH) {
863                         /* reset process parent */
864                         if (p->p_oppid != p->p_pptr->p_pid) {
865                                 struct proc *pp;
866
867                                 PROC_LOCK(p->p_pptr);
868                                 sigqueue_take(p->p_ksi);
869                                 PROC_UNLOCK(p->p_pptr);
870
871                                 PROC_UNLOCK(p);
872                                 pp = pfind(p->p_oppid);
873                                 if (pp == NULL)
874                                         pp = initproc;
875                                 else
876                                         PROC_UNLOCK(pp);
877                                 PROC_LOCK(p);
878                                 proc_reparent(p, pp);
879                                 if (pp == initproc)
880                                         p->p_sigparent = SIGCHLD;
881                         }
882                         p->p_flag &= ~(P_TRACED | P_WAITED);
883                         p->p_oppid = 0;
884
885                         /* should we send SIGCHLD? */
886                         /* childproc_continued(p); */
887                 }
888
889         sendsig:
890                 if (proctree_locked) {
891                         sx_xunlock(&proctree_lock);
892                         proctree_locked = 0;
893                 }
894                 p->p_xstat = data;
895                 p->p_xthread = NULL;
896                 if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) != 0) {
897                         /* deliver or queue signal */
898                         td2->td_dbgflags &= ~TDB_XSIG;
899                         td2->td_xsig = data;
900
901                         if (req == PT_DETACH) {
902                                 struct thread *td3;
903                                 FOREACH_THREAD_IN_PROC(p, td3) {
904                                         td3->td_dbgflags &= ~TDB_SUSPEND; 
905                                 }
906                         }
907                         /*
908                          * unsuspend all threads, to not let a thread run,
909                          * you should use PT_SUSPEND to suspend it before
910                          * continuing process.
911                          */
912                         PROC_SLOCK(p);
913                         p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG|P_WAITED);
914                         thread_unsuspend(p);
915                         PROC_SUNLOCK(p);
916                 } else {
917                         if (data)
918                                 psignal(p, data);
919                 }
920                 break;
921
922         case PT_WRITE_I:
923         case PT_WRITE_D:
924                 td2->td_dbgflags |= TDB_USERWR;
925                 write = 1;
926                 /* FALLTHROUGH */
927         case PT_READ_I:
928         case PT_READ_D:
929                 PROC_UNLOCK(p);
930                 tmp = 0;
931                 /* write = 0 set above */
932                 iov.iov_base = write ? (caddr_t)&data : (caddr_t)&tmp;
933                 iov.iov_len = sizeof(int);
934                 uio.uio_iov = &iov;
935                 uio.uio_iovcnt = 1;
936                 uio.uio_offset = (off_t)(uintptr_t)addr;
937                 uio.uio_resid = sizeof(int);
938                 uio.uio_segflg = UIO_SYSSPACE;  /* i.e.: the uap */
939                 uio.uio_rw = write ? UIO_WRITE : UIO_READ;
940                 uio.uio_td = td;
941                 error = proc_rwmem(p, &uio);
942                 if (uio.uio_resid != 0) {
943                         /*
944                          * XXX proc_rwmem() doesn't currently return ENOSPC,
945                          * so I think write() can bogusly return 0.
946                          * XXX what happens for short writes?  We don't want
947                          * to write partial data.
948                          * XXX proc_rwmem() returns EPERM for other invalid
949                          * addresses.  Convert this to EINVAL.  Does this
950                          * clobber returns of EPERM for other reasons?
951                          */
952                         if (error == 0 || error == ENOSPC || error == EPERM)
953                                 error = EINVAL; /* EOF */
954                 }
955                 if (!write)
956                         td->td_retval[0] = tmp;
957                 PROC_LOCK(p);
958                 break;
959
960         case PT_IO:
961 #ifdef COMPAT_IA32
962                 if (wrap32) {
963                         piod32 = addr;
964                         iov.iov_base = (void *)(uintptr_t)piod32->piod_addr;
965                         iov.iov_len = piod32->piod_len;
966                         uio.uio_offset = (off_t)(uintptr_t)piod32->piod_offs;
967                         uio.uio_resid = piod32->piod_len;
968                 } else
969 #endif
970                 {
971                         piod = addr;
972                         iov.iov_base = piod->piod_addr;
973                         iov.iov_len = piod->piod_len;
974                         uio.uio_offset = (off_t)(uintptr_t)piod->piod_offs;
975                         uio.uio_resid = piod->piod_len;
976                 }
977                 uio.uio_iov = &iov;
978                 uio.uio_iovcnt = 1;
979                 uio.uio_segflg = UIO_USERSPACE;
980                 uio.uio_td = td;
981 #ifdef COMPAT_IA32
982                 tmp = wrap32 ? piod32->piod_op : piod->piod_op;
983 #else
984                 tmp = piod->piod_op;
985 #endif
986                 switch (tmp) {
987                 case PIOD_READ_D:
988                 case PIOD_READ_I:
989                         uio.uio_rw = UIO_READ;
990                         break;
991                 case PIOD_WRITE_D:
992                 case PIOD_WRITE_I:
993                         td2->td_dbgflags |= TDB_USERWR;
994                         uio.uio_rw = UIO_WRITE;
995                         break;
996                 default:
997                         error = EINVAL;
998                         goto out;
999                 }
1000                 PROC_UNLOCK(p);
1001                 error = proc_rwmem(p, &uio);
1002 #ifdef COMPAT_IA32
1003                 if (wrap32)
1004                         piod32->piod_len -= uio.uio_resid;
1005                 else
1006 #endif
1007                         piod->piod_len -= uio.uio_resid;
1008                 PROC_LOCK(p);
1009                 break;
1010
1011         case PT_KILL:
1012                 data = SIGKILL;
1013                 goto sendsig;   /* in PT_CONTINUE above */
1014
1015         case PT_SETREGS:
1016                 td2->td_dbgflags |= TDB_USERWR;
1017                 error = PROC_WRITE(regs, td2, addr);
1018                 break;
1019
1020         case PT_GETREGS:
1021                 error = PROC_READ(regs, td2, addr);
1022                 break;
1023
1024         case PT_SETFPREGS:
1025                 td2->td_dbgflags |= TDB_USERWR;
1026                 error = PROC_WRITE(fpregs, td2, addr);
1027                 break;
1028
1029         case PT_GETFPREGS:
1030                 error = PROC_READ(fpregs, td2, addr);
1031                 break;
1032
1033         case PT_SETDBREGS:
1034                 td2->td_dbgflags |= TDB_USERWR;
1035                 error = PROC_WRITE(dbregs, td2, addr);
1036                 break;
1037
1038         case PT_GETDBREGS:
1039                 error = PROC_READ(dbregs, td2, addr);
1040                 break;
1041
1042         case PT_LWPINFO:
1043                 if (data <= 0 || data > sizeof(*pl)) {
1044                         error = EINVAL;
1045                         break;
1046                 }
1047                 pl = addr;
1048                 pl->pl_lwpid = td2->td_tid;
1049                 if (td2->td_dbgflags & TDB_XSIG)
1050                         pl->pl_event = PL_EVENT_SIGNAL;
1051                 else
1052                         pl->pl_event = 0;
1053                 pl->pl_flags = 0;
1054                 pl->pl_sigmask = td2->td_sigmask;
1055                 pl->pl_siglist = td2->td_siglist;
1056                 break;
1057
1058         case PT_GETNUMLWPS:
1059                 td->td_retval[0] = p->p_numthreads;
1060                 break;
1061
1062         case PT_GETLWPLIST:
1063                 if (data <= 0) {
1064                         error = EINVAL;
1065                         break;
1066                 }
1067                 num = imin(p->p_numthreads, data);
1068                 PROC_UNLOCK(p);
1069                 buf = malloc(num * sizeof(lwpid_t), M_TEMP, M_WAITOK);
1070                 tmp = 0;
1071                 PROC_LOCK(p);
1072                 FOREACH_THREAD_IN_PROC(p, td2) {
1073                         if (tmp >= num)
1074                                 break;
1075                         buf[tmp++] = td2->td_tid;
1076                 }
1077                 PROC_UNLOCK(p);
1078                 error = copyout(buf, addr, tmp * sizeof(lwpid_t));
1079                 free(buf, M_TEMP);
1080                 if (!error)
1081                         td->td_retval[0] = tmp;
1082                 PROC_LOCK(p);
1083                 break;
1084
1085         case PT_VM_TIMESTAMP:
1086                 td->td_retval[0] = p->p_vmspace->vm_map.timestamp;
1087                 break;
1088
1089         case PT_VM_ENTRY:
1090 #ifdef COMPAT_IA32
1091                 /* XXX to be implemented. */
1092                 if (wrap32) {
1093                         error = EDOOFUS;
1094                         break;
1095                 }
1096 #endif
1097                 PROC_UNLOCK(p);
1098                 error = ptrace_vm_entry(td, p, addr);
1099                 PROC_LOCK(p);
1100                 break;
1101
1102         default:
1103 #ifdef __HAVE_PTRACE_MACHDEP
1104                 if (req >= PT_FIRSTMACH) {
1105                         PROC_UNLOCK(p);
1106                         error = cpu_ptrace(td2, req, addr, data);
1107                         PROC_LOCK(p);
1108                 } else
1109 #endif
1110                         /* Unknown request. */
1111                         error = EINVAL;
1112                 break;
1113         }
1114
1115 out:
1116         /* Drop our hold on this process now that the request has completed. */
1117         _PRELE(p);
1118 fail:
1119         PROC_UNLOCK(p);
1120         if (proctree_locked)
1121                 sx_xunlock(&proctree_lock);
1122         return (error);
1123 }
1124 #undef PROC_READ
1125 #undef PROC_WRITE
1126
1127 /*
1128  * Stop a process because of a debugging event;
1129  * stay stopped until p->p_step is cleared
1130  * (cleared by PIOCCONT in procfs).
1131  */
1132 void
1133 stopevent(struct proc *p, unsigned int event, unsigned int val)
1134 {
1135
1136         PROC_LOCK_ASSERT(p, MA_OWNED);
1137         p->p_step = 1;
1138         do {
1139                 p->p_xstat = val;
1140                 p->p_xthread = NULL;
1141                 p->p_stype = event;     /* Which event caused the stop? */
1142                 wakeup(&p->p_stype);    /* Wake up any PIOCWAIT'ing procs */
1143                 msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0);
1144         } while (p->p_step);
1145 }