2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93
37 #include "opt_ktrace.h"
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/fcntl.h>
43 #include <sys/kernel.h>
44 #include <sys/kthread.h>
46 #include <sys/mutex.h>
47 #include <sys/malloc.h>
48 #include <sys/namei.h>
50 #include <sys/unistd.h>
51 #include <sys/vnode.h>
52 #include <sys/ktrace.h>
55 #include <sys/sysctl.h>
56 #include <sys/syslog.h>
57 #include <sys/sysproto.h>
59 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
63 #ifndef KTRACE_REQUEST_POOL
64 #define KTRACE_REQUEST_POOL 100
68 struct ktr_header ktr_header;
69 struct ucred *ktr_cred;
72 struct ktr_syscall ktr_syscall;
73 struct ktr_sysret ktr_sysret;
74 struct ktr_genio ktr_genio;
75 struct ktr_psig ktr_psig;
76 struct ktr_csw ktr_csw;
79 STAILQ_ENTRY(ktr_request) ktr_list;
82 static int data_lengths[] = {
84 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */
85 sizeof(struct ktr_sysret), /* KTR_SYSRET */
87 sizeof(struct ktr_genio), /* KTR_GENIO */
88 sizeof(struct ktr_psig), /* KTR_PSIG */
89 sizeof(struct ktr_csw), /* KTR_CSW */
93 static STAILQ_HEAD(, ktr_request) ktr_todo;
94 static STAILQ_HEAD(, ktr_request) ktr_free;
96 static uint ktr_requestpool = KTRACE_REQUEST_POOL;
97 TUNABLE_INT("kern.ktrace_request_pool", &ktr_requestpool);
99 static int print_message = 1;
100 struct mtx ktrace_mtx;
101 static struct sema ktrace_sema;
103 static void ktrace_init(void *dummy);
104 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
105 static uint ktrace_resize_pool(uint newsize);
106 static struct ktr_request *ktr_getrequest(int type);
107 static void ktr_submitrequest(struct ktr_request *req);
108 static void ktr_freerequest(struct ktr_request *req);
109 static void ktr_loop(void *dummy);
110 static void ktr_writerequest(struct ktr_request *req);
111 static int ktrcanset(struct thread *,struct proc *);
112 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
113 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
116 ktrace_init(void *dummy)
118 struct ktr_request *req;
121 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
122 sema_init(&ktrace_sema, 0, "ktrace");
123 STAILQ_INIT(&ktr_todo);
124 STAILQ_INIT(&ktr_free);
125 for (i = 0; i < ktr_requestpool; i++) {
126 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
127 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
129 kthread_create(ktr_loop, NULL, NULL, RFHIGHPID, "ktrace");
131 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
134 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
137 uint newsize, oldsize, wantsize;
140 /* Handle easy read-only case first to avoid warnings from GCC. */
142 mtx_lock(&ktrace_mtx);
143 oldsize = ktr_requestpool;
144 mtx_unlock(&ktrace_mtx);
145 return (SYSCTL_OUT(req, &oldsize, sizeof(uint)));
148 error = SYSCTL_IN(req, &wantsize, sizeof(uint));
153 mtx_lock(&ktrace_mtx);
154 oldsize = ktr_requestpool;
155 newsize = ktrace_resize_pool(wantsize);
156 mtx_unlock(&ktrace_mtx);
158 error = SYSCTL_OUT(req, &oldsize, sizeof(uint));
161 if (newsize != wantsize)
165 SYSCTL_PROC(_kern, OID_AUTO, ktrace_request_pool, CTLTYPE_UINT|CTLFLAG_RW,
166 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", "");
169 ktrace_resize_pool(uint newsize)
171 struct ktr_request *req;
173 mtx_assert(&ktrace_mtx, MA_OWNED);
175 if (newsize == ktr_requestpool)
177 if (newsize < ktr_requestpool)
178 /* Shrink pool down to newsize if possible. */
179 while (ktr_requestpool > newsize) {
180 req = STAILQ_FIRST(&ktr_free);
182 return (ktr_requestpool);
183 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
185 mtx_unlock(&ktrace_mtx);
187 mtx_lock(&ktrace_mtx);
190 /* Grow pool up to newsize. */
191 while (ktr_requestpool < newsize) {
192 mtx_unlock(&ktrace_mtx);
193 req = malloc(sizeof(struct ktr_request), M_KTRACE,
195 mtx_lock(&ktrace_mtx);
196 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
199 return (ktr_requestpool);
202 static struct ktr_request *
203 ktr_getrequest(int type)
205 struct ktr_request *req;
206 struct thread *td = curthread;
207 struct proc *p = td->td_proc;
211 mtx_lock(&ktrace_mtx);
212 if (!KTRCHECK(td, type)) {
213 mtx_unlock(&ktrace_mtx);
217 req = STAILQ_FIRST(&ktr_free);
219 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
220 req->ktr_header.ktr_type = type;
221 KASSERT(p->p_tracep != NULL, ("ktrace: no trace vnode"));
222 req->ktr_vp = p->p_tracep;
224 mtx_unlock(&ktrace_mtx);
225 microtime(&req->ktr_header.ktr_time);
226 req->ktr_header.ktr_pid = p->p_pid;
227 bcopy(p->p_comm, req->ktr_header.ktr_comm, MAXCOMLEN + 1);
228 req->ktr_cred = crhold(td->td_ucred);
229 req->ktr_header.ktr_buffer = NULL;
230 req->ktr_header.ktr_len = 0;
231 req->ktr_synchronous = 0;
235 mtx_unlock(&ktrace_mtx);
237 printf("Out of ktrace request objects.\n");
244 ktr_submitrequest(struct ktr_request *req)
247 mtx_lock(&ktrace_mtx);
248 STAILQ_INSERT_TAIL(&ktr_todo, req, ktr_list);
249 sema_post(&ktrace_sema);
250 if (req->ktr_synchronous) {
252 * For a synchronous request, we wait for the ktrace thread
253 * to get to our item in the todo list and wake us up. Then
254 * we write the request out ourselves and wake the ktrace
257 msleep(req, &ktrace_mtx, curthread->td_priority, "ktrsync", 0);
258 mtx_unlock(&ktrace_mtx);
259 ktr_writerequest(req);
260 mtx_lock(&ktrace_mtx);
263 mtx_unlock(&ktrace_mtx);
264 curthread->td_inktrace = 0;
268 ktr_freerequest(struct ktr_request *req)
271 crfree(req->ktr_cred);
275 mtx_lock(&ktrace_mtx);
276 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
277 mtx_unlock(&ktrace_mtx);
281 ktr_loop(void *dummy)
283 struct ktr_request *req;
287 /* Only cache these values once. */
291 sema_wait(&ktrace_sema);
292 mtx_lock(&ktrace_mtx);
293 req = STAILQ_FIRST(&ktr_todo);
294 STAILQ_REMOVE_HEAD(&ktr_todo, ktr_list);
295 KASSERT(req != NULL, ("got a NULL request"));
296 if (req->ktr_synchronous) {
298 msleep(req, &ktrace_mtx, curthread->td_priority,
300 mtx_unlock(&ktrace_mtx);
302 mtx_unlock(&ktrace_mtx);
304 * It is not enough just to pass the cached cred
305 * to the VOP's in ktr_writerequest(). Some VFS
306 * operations use curthread->td_ucred, so we need
307 * to modify our thread's credentials as well.
310 td->td_ucred = req->ktr_cred;
311 ktr_writerequest(req);
314 ktr_freerequest(req);
322 ktrsyscall(code, narg, args)
326 struct ktr_request *req;
327 struct ktr_syscall *ktp;
330 req = ktr_getrequest(KTR_SYSCALL);
333 ktp = &req->ktr_data.ktr_syscall;
334 ktp->ktr_code = code;
335 ktp->ktr_narg = narg;
336 buflen = sizeof(register_t) * narg;
338 req->ktr_header.ktr_buffer = malloc(buflen, M_KTRACE, M_WAITOK);
339 bcopy(args, req->ktr_header.ktr_buffer, buflen);
340 req->ktr_header.ktr_len = buflen;
342 ktr_submitrequest(req);
349 ktrsysret(code, error, retval)
353 struct ktr_request *req;
354 struct ktr_sysret *ktp;
356 req = ktr_getrequest(KTR_SYSRET);
359 ktp = &req->ktr_data.ktr_sysret;
360 ktp->ktr_code = code;
361 ktp->ktr_error = error;
362 ktp->ktr_retval = retval; /* what about val2 ? */
363 ktr_submitrequest(req);
370 struct ktr_request *req;
373 req = ktr_getrequest(KTR_NAMEI);
376 namelen = strlen(path);
378 req->ktr_header.ktr_len = namelen;
379 req->ktr_header.ktr_buffer = malloc(namelen, M_KTRACE,
381 bcopy(path, req->ktr_header.ktr_buffer, namelen);
383 ktr_submitrequest(req);
387 * Since the uio may not stay valid, we can not hand off this request to
388 * the thread and need to process it synchronously. However, we wish to
389 * keep the relative order of records in a trace file correct, so we
390 * do put this request on the queue (if it isn't empty) and then block.
391 * The ktrace thread waks us back up when it is time for this event to
392 * be posted and blocks until we have completed writing out the event
393 * and woken it back up.
396 ktrgenio(fd, rw, uio, error)
402 struct ktr_request *req;
403 struct ktr_genio *ktg;
407 req = ktr_getrequest(KTR_GENIO);
410 ktg = &req->ktr_data.ktr_genio;
413 req->ktr_header.ktr_buffer = uio;
415 uio->uio_rw = UIO_WRITE;
416 req->ktr_synchronous = 1;
417 ktr_submitrequest(req);
421 ktrpsig(sig, action, mask, code)
427 struct ktr_request *req;
430 req = ktr_getrequest(KTR_PSIG);
433 kp = &req->ktr_data.ktr_psig;
434 kp->signo = (char)sig;
438 ktr_submitrequest(req);
445 struct ktr_request *req;
448 req = ktr_getrequest(KTR_CSW);
451 kc = &req->ktr_data.ktr_csw;
454 ktr_submitrequest(req);
458 /* Interface and common routines */
463 #ifndef _SYS_SYSPROTO_H_
475 register struct ktrace_args *uap;
478 register struct vnode *vp = NULL;
479 register struct proc *p;
481 int facs = uap->facs & ~KTRFAC_ROOT;
482 int ops = KTROP(uap->ops);
483 int descend = uap->ops & KTRFLAG_DESCEND;
485 int flags, error = 0;
489 if (ops != KTROP_CLEAR) {
491 * an operation which requires a file argument.
493 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td);
494 flags = FREAD | FWRITE | O_NOFOLLOW;
495 error = vn_open(&nd, &flags, 0);
500 NDFREE(&nd, NDF_ONLY_PNBUF);
502 VOP_UNLOCK(vp, 0, td);
503 if (vp->v_type != VREG) {
504 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
510 * Clear all uses of the tracefile.
512 if (ops == KTROP_CLEARFILE) {
513 sx_slock(&allproc_lock);
514 LIST_FOREACH(p, &allproc, p_list) {
516 if (p->p_tracep == vp) {
517 if (ktrcanset(td, p)) {
518 mtx_lock(&ktrace_mtx);
521 mtx_unlock(&ktrace_mtx);
523 (void) vn_close(vp, FREAD|FWRITE,
532 sx_sunlock(&allproc_lock);
536 * need something to (un)trace (XXX - why is this here?)
549 sx_slock(&proctree_lock);
550 pg = pgfind(-uap->pid);
552 sx_sunlock(&proctree_lock);
557 * ktrops() may call vrele(). Lock pg_members
558 * by the proctree_lock rather than pg_mtx.
561 LIST_FOREACH(p, &pg->pg_members, p_pglist)
563 ret |= ktrsetchildren(td, p, ops, facs, vp);
565 ret |= ktrops(td, p, ops, facs, vp);
566 sx_sunlock(&proctree_lock);
577 /* XXX: UNLOCK above has a race */
579 ret |= ktrsetchildren(td, p, ops, facs, vp);
581 ret |= ktrops(td, p, ops, facs, vp);
587 (void) vn_close(vp, FWRITE, td->td_ucred, td);
602 register struct utrace_args *uap;
606 struct ktr_request *req;
609 if (uap->len > KTR_USER_MAXLEN)
611 req = ktr_getrequest(KTR_USER);
614 cp = malloc(uap->len, M_KTRACE, M_WAITOK);
615 if (!copyin(uap->addr, cp, uap->len)) {
616 req->ktr_header.ktr_buffer = cp;
617 req->ktr_header.ktr_len = uap->len;
618 ktr_submitrequest(req);
620 ktr_freerequest(req);
631 ktrops(td, p, ops, facs, vp)
637 struct vnode *tracevp = NULL;
640 if (!ktrcanset(td, p)) {
644 mtx_lock(&ktrace_mtx);
645 if (ops == KTROP_SET) {
646 if (p->p_tracep != vp) {
648 * if trace file already in use, relinquish below
650 tracevp = p->p_tracep;
654 p->p_traceflag |= facs;
655 if (td->td_ucred->cr_uid == 0)
656 p->p_traceflag |= KTRFAC_ROOT;
659 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
660 /* no more tracing */
662 tracevp = p->p_tracep;
666 mtx_unlock(&ktrace_mtx);
675 ktrsetchildren(td, top, ops, facs, vp)
681 register struct proc *p;
682 register int ret = 0;
685 sx_slock(&proctree_lock);
687 ret |= ktrops(td, p, ops, facs, vp);
689 * If this process has children, descend to them next,
690 * otherwise do any siblings, and if done with this level,
691 * follow back up the tree (but not past top).
693 if (!LIST_EMPTY(&p->p_children))
694 p = LIST_FIRST(&p->p_children);
697 sx_sunlock(&proctree_lock);
700 if (LIST_NEXT(p, p_sibling)) {
701 p = LIST_NEXT(p, p_sibling);
711 ktr_writerequest(struct ktr_request *req)
713 struct ktr_header *kth;
715 struct uio *uio = NULL;
720 struct iovec aiov[3];
722 int datalen, buflen, vrele_count;
727 * If vp is NULL, the vp has been cleared out from under this
728 * request, so just drop it.
732 kth = &req->ktr_header;
733 datalen = data_lengths[kth->ktr_type];
734 buflen = kth->ktr_len;
735 cred = req->ktr_cred;
737 auio.uio_iov = &aiov[0];
739 auio.uio_segflg = UIO_SYSSPACE;
740 auio.uio_rw = UIO_WRITE;
741 aiov[0].iov_base = (caddr_t)kth;
742 aiov[0].iov_len = sizeof(struct ktr_header);
743 auio.uio_resid = sizeof(struct ktr_header);
747 aiov[1].iov_base = (caddr_t)&req->ktr_data;
748 aiov[1].iov_len = datalen;
749 auio.uio_resid += datalen;
751 kth->ktr_len += datalen;
754 KASSERT(kth->ktr_buffer != NULL, ("ktrace: nothing to write"));
755 aiov[auio.uio_iovcnt].iov_base = kth->ktr_buffer;
756 aiov[auio.uio_iovcnt].iov_len = buflen;
757 auio.uio_resid += buflen;
760 uio = kth->ktr_buffer;
761 KASSERT((uio == NULL) ^ (kth->ktr_type == KTR_GENIO),
762 ("ktrace: uio and genio mismatch"));
764 kth->ktr_len += uio->uio_resid;
766 vn_start_write(vp, &mp, V_WAIT);
767 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
768 (void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
769 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
770 if (error == 0 && uio != NULL) {
771 (void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
772 error = VOP_WRITE(vp, uio, IO_UNIT | IO_APPEND, cred);
774 VOP_UNLOCK(vp, 0, td);
775 vn_finished_write(mp);
778 free(kth->ktr_buffer, M_KTRACE);
782 * If error encountered, give up tracing on this vnode. We defer
783 * all the vrele()'s on the vnode until after we are finished walking
784 * the various lists to avoid needlessly holding locks.
786 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
790 * First, clear this vnode from being used by any processes in the
792 * XXX - If one process gets an EPERM writing to the vnode, should
793 * we really do this? Other processes might have suitable
794 * credentials for the operation.
796 sx_slock(&allproc_lock);
797 LIST_FOREACH(p, &allproc, p_list) {
799 if (p->p_tracep == vp) {
800 mtx_lock(&ktrace_mtx);
803 mtx_unlock(&ktrace_mtx);
808 sx_sunlock(&allproc_lock);
810 * Second, clear this vnode from any pending requests.
812 mtx_lock(&ktrace_mtx);
813 STAILQ_FOREACH(req, &ktr_todo, ktr_list) {
814 if (req->ktr_vp == vp) {
819 mtx_unlock(&ktrace_mtx);
821 while (vrele_count-- > 0)
827 * Return true if caller has permission to set the ktracing state
828 * of target. Essentially, the target can't possess any
829 * more permissions than the caller. KTRFAC_ROOT signifies that
830 * root previously set the tracing status on the target process, and
831 * so, only root may further change it.
834 ktrcanset(td, targetp)
836 struct proc *targetp;
839 PROC_LOCK_ASSERT(targetp, MA_OWNED);
840 if (targetp->p_traceflag & KTRFAC_ROOT &&
841 suser_cred(td->td_ucred, PRISON_ROOT))
844 if (p_candebug(td, targetp) != 0)