From 49ed550123352266e1416e93c50d8b7a3b217eb8 Mon Sep 17 00:00:00 2001 From: rwatson Date: Mon, 28 Feb 2011 23:28:35 +0000 Subject: [PATCH] Merge userspace DTrace support from head to stable/8: r209721: Merge from vendor-sys/opensolaris: * add fasttrap files r209731: Introduce USD_{SET,GET}{BASE,LIMIT}. These help setting up the user segment descriptor hi and lo values. Idea from Solaris. Reviewed by: kib r209763: Fix style issues with the previous commit, namely use-tab-instead-of-space and don't use underscores in macro variables. Pointed out by: bde r210292: Fix typo in comment. r210357: MFamd64: Add USD_GETBASE(), USD_SETBASE(), USD_GETLIMIT() and USD_SETLIMIT(). r210611: Bump the witness pendlist to 768 to accomodate the increased number of spinlocks. r211553: Add sysname to struct opensolaris_utsname. This is needed by one DTrace test. r211566: Add a sysname char * to struct opensolaris_utsname. r211606: Add the FreeBSD definition for the fasttrap ioctls. r211607: Add a function compatibility function dtrace_instr_size_isa() that on FreeBSD does the same as dtrace_dis_isize(). r211608: Kernel DTrace support for: o uregs (sson@) o ustack (sson@) o /dev/dtrace/helper device (needed for USDT probes) r211610: Add more compatibility structure members needed by the upcoming fasttrap DTrace device. r211611: Destroy the helper device when unloading. r211613: Fix style issues. r211614: Bump KDTRACE_THREAD_ZERO and use M_ZERO as a malloc flag instead of calling bzero. r211615: Remove an elif and add an or-clause. r211616: Add an extra comment to the SDT probes definition. This allows us to get use '-' in probe names, matching the probe names in Solaris. Add userland SDT probes definitions to sys/sdt.h. r211617: Call the systrace_probe_func() when the error value. r211618: Port this to FreeBSD. We miss some suword functions, so we use copyout. r211738: Port the fasttrap provider to FreeBSD. This provider is responsible for injecting debugging probes in the userland programs and is the basis for the pid provider and the usdt provider. r211744: MD fasttrap implementation. r211745: Replace a pksignal() call with tdksignal(). Pointed out by: kib r211746: Update for the recent location of the fasttrap code. r211747: Replace structure assignments with explicity memcpy calls. This allows Clang to compile this file: it was using the builtin memcpy and we want to use the memcpy defined in gptboot.c. (Clang can't compile boot2 yet). Submitted by: Dimitry Andric Reviewed by: jhb r211751: Add a trap code for DTrace induced traps. r211752: Add two DTrace trap type values. Used by fasttrap. r211753: Enable fasttrap and make dtraceall depend on fasttrap when building i386 or amd64. r211804: Call the necessary DTrace function pointers when we have different kinds of traps. r211813: Add the necessary DTrace function pointers. r211839: Sync DTrace bits with amd64 and fix the build. r211924: Register an interrupt vector for DTrace return probes. There is some code missing in lapic to make sure that we don't overwrite this entry, but this will be done on a sequent commit. r211925: Replace a memory barrier with a mutex barrier. r211926: Add the path necessary to find fasttrap_isa.h to CFLAGS. r211929: Remove debugging. r212004: When DTrace is enabled, make sure we don't overwrite the IDT_DTRACE_RET entry with an IRQ for some hardware component. Reviewed by: jhb r212093: Make the /dev/dtrace/helper node have the mode 0660. This allows programs that refuse to run as root (pgsql) to install probes when their user is part of the wheel group. r212357: Fix two bugs in DTrace: * when the process exits, remove the associated USDT probes * when the process forks, duplicate the USDT probes. r212465: Avoid a LOR (sleepable after non-sleepable) in fasttrap_tracepoint_enable(). r212494: Revamp locking a bit. This fixes three problems: * processes now can't go away while we are inserting probes (fixes a panic) * if a trap happens, we won't be holding the process lock (fixes a hang) * fix a LOR between the process lock and the fasttrap bucket list lock Thanks to kib for pointing some problems. r212568: Bump __FreeBSD_version to reflect the userland DTrace changes Sponsored by: The FreeBSD Foundation Userspace DTrace work by: rpaulo git-svn-id: svn://svn.freebsd.org/base/stable/8@219107 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- sys/amd64/amd64/exception.S | 4 + sys/amd64/amd64/machdep.c | 7 + sys/amd64/amd64/trap.c | 56 + sys/amd64/include/segments.h | 8 + sys/amd64/include/trap.h | 2 + sys/boot/common/ufsread.c | 13 +- .../opensolaris/kern/opensolaris_misc.c | 3 +- sys/cddl/compat/opensolaris/sys/misc.h | 1 + .../opensolaris/uts/common/dtrace/dtrace.c | 66 +- .../opensolaris/uts/common/dtrace/fasttrap.c | 567 +++-- .../opensolaris/uts/common/sys/dtrace.h | 5 + .../opensolaris/uts/common/sys/dtrace_impl.h | 2 +- .../opensolaris/uts/common/sys/fasttrap.h | 5 + .../uts/common/sys/fasttrap_impl.h | 199 ++ .../uts/intel/dtrace/fasttrap_isa.c | 1904 +++++++++++++++++ .../uts/sparc/dtrace/fasttrap_isa.c | 1597 ++++++++++++++ sys/cddl/dev/dtrace/amd64/dtrace_isa.c | 198 +- sys/cddl/dev/dtrace/amd64/instr_size.c | 7 + sys/cddl/dev/dtrace/amd64/regset.h | 127 ++ sys/cddl/dev/dtrace/dtrace_cddl.h | 36 +- sys/cddl/dev/dtrace/dtrace_ioctl.c | 49 + sys/cddl/dev/dtrace/dtrace_load.c | 5 +- sys/cddl/dev/dtrace/dtrace_unload.c | 1 + sys/cddl/dev/dtrace/i386/dtrace_isa.c | 285 +-- sys/cddl/dev/dtrace/i386/instr_size.c | 7 + sys/cddl/dev/dtrace/i386/regset.h | 127 ++ sys/cddl/dev/systrace/systrace.c | 15 +- sys/i386/i386/exception.s | 2 + sys/i386/i386/machdep.c | 11 +- sys/i386/i386/trap.c | 39 + sys/i386/include/segments.h | 8 + sys/i386/include/trap.h | 2 + sys/kern/kern_dtrace.c | 29 +- sys/kern/kern_exec.c | 6 +- sys/kern/kern_exit.c | 2 +- sys/kern/kern_fork.c | 26 +- sys/kern/kern_priv.c | 4 +- sys/kern/kern_proc.c | 12 +- sys/kern/kern_sig.c | 6 +- sys/kern/kern_timeout.c | 4 +- sys/kern/subr_trap.c | 4 +- sys/kern/subr_witness.c | 2 +- sys/kern/vfs_cache.c | 32 +- sys/kern/vfs_lookup.c | 4 +- sys/kern/vfs_syscalls.c | 4 +- sys/modules/dtrace/Makefile | 6 +- sys/modules/dtrace/dtrace/Makefile | 1 + sys/modules/dtrace/dtraceall/dtraceall.c | 1 + sys/modules/dtrace/fasttrap/Makefile | 5 +- sys/net/vnet.c | 14 +- sys/opencrypto/deflate.c | 8 +- sys/security/mac/mac_framework.c | 8 +- sys/security/mac/mac_internal.h | 20 +- sys/sys/dtrace_bsd.h | 9 + sys/sys/param.h | 2 +- sys/sys/priv.h | 2 +- sys/sys/sdt.h | 89 +- sys/sys/signal.h | 1 + sys/sys/sysent.h | 3 +- sys/tools/vnode_if.awk | 4 +- sys/x86/x86/local_apic.c | 25 + 61 files changed, 5096 insertions(+), 595 deletions(-) create mode 100644 sys/cddl/contrib/opensolaris/uts/common/sys/fasttrap_impl.h create mode 100644 sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c create mode 100644 sys/cddl/contrib/opensolaris/uts/sparc/dtrace/fasttrap_isa.c create mode 100644 sys/cddl/dev/dtrace/amd64/regset.h create mode 100644 sys/cddl/dev/dtrace/i386/regset.h diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index b0e685ebc..7ffa7a9c3 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -108,6 +108,10 @@ IDTVEC(dbg) TRAP_NOEN(T_TRCTRAP) IDTVEC(bpt) TRAP_NOEN(T_BPTFLT) +#ifdef KDTRACE_HOOKS +IDTVEC(dtrace_ret) + TRAP_NOEN(T_DTRACE_RET) +#endif /* Regular traps; The cpu does not supply tf_err for these. */ #define TRAP(a) \ diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 674363ca1..5025fd49c 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include "opt_msgbuf.h" #include "opt_perfmon.h" #include "opt_sched.h" +#include "opt_kdtrace.h" #include #include @@ -1094,6 +1095,9 @@ extern inthand_t IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(dblfault), +#ifdef KDTRACE_HOOKS + IDTVEC(dtrace_ret), +#endif IDTVEC(fast_syscall), IDTVEC(fast_syscall32); #ifdef DDB @@ -1624,6 +1628,9 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); +#ifdef KDTRACE_HOOKS + setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); +#endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (long) idt; diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index fe8ee76a9..36f8e0e5a 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -113,6 +113,13 @@ dtrace_doubletrap_func_t dtrace_doubletrap_func; * implementation opaque. */ systrace_probe_func_t systrace_probe_func; + +/* + * These hooks are necessary for the pid, usdt and fasttrap providers. + */ +dtrace_fasttrap_probe_ptr_t dtrace_fasttrap_probe_ptr; +dtrace_pid_probe_ptr_t dtrace_pid_probe_ptr; +dtrace_return_probe_ptr_t dtrace_return_probe_ptr; #endif extern void trap(struct trapframe *frame); @@ -243,6 +250,55 @@ trap(struct trapframe *frame) if (dtrace_trap_func != NULL) if ((*dtrace_trap_func)(frame, type)) goto out; + if (type == T_DTRACE_PROBE || type == T_DTRACE_RET || + type == T_BPTFLT) { + struct reg regs; + + regs.r_r15 = frame->tf_r15; + regs.r_r14 = frame->tf_r14; + regs.r_r13 = frame->tf_r13; + regs.r_r12 = frame->tf_r12; + regs.r_r11 = frame->tf_r11; + regs.r_r10 = frame->tf_r10; + regs.r_r9 = frame->tf_r9; + regs.r_r8 = frame->tf_r8; + regs.r_rdi = frame->tf_rdi; + regs.r_rsi = frame->tf_rsi; + regs.r_rbp = frame->tf_rbp; + regs.r_rbx = frame->tf_rbx; + regs.r_rdx = frame->tf_rdx; + regs.r_rcx = frame->tf_rcx; + regs.r_rax = frame->tf_rax; + regs.r_rip = frame->tf_rip; + regs.r_cs = frame->tf_cs; + regs.r_rflags = frame->tf_rflags; + regs.r_rsp = frame->tf_rsp; + regs.r_ss = frame->tf_ss; + if (frame->tf_flags & TF_HASSEGS) { + regs.r_ds = frame->tf_ds; + regs.r_es = frame->tf_es; + regs.r_fs = frame->tf_fs; + regs.r_gs = frame->tf_gs; + } else { + regs.r_ds = 0; + regs.r_es = 0; + regs.r_fs = 0; + regs.r_gs = 0; + } + if (type == T_DTRACE_PROBE && + dtrace_fasttrap_probe_ptr != NULL && + dtrace_fasttrap_probe_ptr(®s) == 0) + goto out; + if (type == T_BPTFLT && + dtrace_pid_probe_ptr != NULL && + dtrace_pid_probe_ptr(®s) == 0) + goto out; + if (type == T_DTRACE_RET && + dtrace_return_probe_ptr != NULL && + dtrace_return_probe_ptr(®s) == 0) + goto out; + + } #endif if ((frame->tf_rflags & PSL_I) == 0) { diff --git a/sys/amd64/include/segments.h b/sys/amd64/include/segments.h index 3dca80ad5..22dc95ab0 100644 --- a/sys/amd64/include/segments.h +++ b/sys/amd64/include/segments.h @@ -74,6 +74,13 @@ struct user_segment_descriptor { u_int64_t sd_hibase:8; /* segment base address (msb) */ } __packed; +#define USD_GETBASE(sd) (((sd)->sd_lobase) | (sd)->sd_hibase << 24) +#define USD_SETBASE(sd, b) (sd)->sd_lobase = (b); \ + (sd)->sd_hibase = ((b) >> 24); +#define USD_GETLIMIT(sd) (((sd)->sd_lolimit) | (sd)->sd_hilimit << 16) +#define USD_SETLIMIT(sd, l) (sd)->sd_lolimit = (l); \ + (sd)->sd_hilimit = ((l) >> 16); + /* * System segment descriptors (128 bit wide) */ @@ -207,6 +214,7 @@ struct region_descriptor { #define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */ #define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */ #define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */ +#define IDT_DTRACE_RET 0x92 /* DTrace pid provider Interrupt Vector */ /* * Entries in the Global Descriptor Table (GDT) diff --git a/sys/amd64/include/trap.h b/sys/amd64/include/trap.h index f0176b2c9..d8e36b5aa 100644 --- a/sys/amd64/include/trap.h +++ b/sys/amd64/include/trap.h @@ -62,6 +62,8 @@ #define T_MCHK 28 /* machine check trap */ #define T_XMMFLT 29 /* SIMD floating-point exception */ #define T_RESERVED 30 /* reserved (unknown) */ +#define T_DTRACE_RET 31 /* DTrace pid return */ +#define T_DTRACE_PROBE 32 /* DTrace fasttrap probe */ /* XXX most of the following codes aren't used, but could be. */ diff --git a/sys/boot/common/ufsread.c b/sys/boot/common/ufsread.c index cd3ba4c5d..5d0e8af56 100644 --- a/sys/boot/common/ufsread.c +++ b/sys/boot/common/ufsread.c @@ -223,14 +223,19 @@ fsread(ino_t inode, void *buf, size_t nbyte) return -1; n = INO_TO_VBO(n, inode); #if defined(UFS1_ONLY) - dp1 = ((struct ufs1_dinode *)blkbuf)[n]; + memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n, + sizeof(struct ufs1_dinode)); #elif defined(UFS2_ONLY) - dp2 = ((struct ufs2_dinode *)blkbuf)[n]; + memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n, + sizeof(struct ufs2_dinode)); #else if (fs->fs_magic == FS_UFS1_MAGIC) - dp1 = ((struct ufs1_dinode *)blkbuf)[n]; + memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n, + sizeof(struct ufs1_dinode)); else - dp2 = ((struct ufs2_dinode *)blkbuf)[n]; + memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n, + sizeof(struct ufs2_dinode)); + #endif inomap = inode; fs_off = 0; diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_misc.c b/sys/cddl/compat/opensolaris/kern/opensolaris_misc.c index 4f46933d0..4ac666dfb 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris_misc.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris_misc.c @@ -38,7 +38,8 @@ __FBSDID("$FreeBSD$"); char hw_serial[11] = "0"; struct opensolaris_utsname utsname = { - .nodename = "unset" + .nodename = "unset", + .sysname = "SunOS" }; int diff --git a/sys/cddl/compat/opensolaris/sys/misc.h b/sys/cddl/compat/opensolaris/sys/misc.h index 0343f2f95..20d335b06 100644 --- a/sys/cddl/compat/opensolaris/sys/misc.h +++ b/sys/cddl/compat/opensolaris/sys/misc.h @@ -46,6 +46,7 @@ #ifdef _KERNEL struct opensolaris_utsname { char *nodename; + char *sysname; }; extern char hw_serial[11]; diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c index 70282e9a3..f7aeecc13 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c +++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c @@ -551,20 +551,16 @@ static void dtrace_enabling_provide(dtrace_provider_t *); static int dtrace_enabling_match(dtrace_enabling_t *, int *); static void dtrace_enabling_matchall(void); static dtrace_state_t *dtrace_anon_grab(void); -#if defined(sun) static uint64_t dtrace_helper(int, dtrace_mstate_t *, dtrace_state_t *, uint64_t, uint64_t); static dtrace_helpers_t *dtrace_helpers_create(proc_t *); -#endif static void dtrace_buffer_drop(dtrace_buffer_t *); static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, dtrace_state_t *, dtrace_mstate_t *); static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, dtrace_optval_t); static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); -#if defined(sun) static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); -#endif uint16_t dtrace_load16(uintptr_t); uint32_t dtrace_load32(uintptr_t); uint64_t dtrace_load64(uintptr_t); @@ -2784,6 +2780,21 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (dtrace_getreg(lwp->lwp_regs, ndx)); return (0); } +#else + case DIF_VAR_UREGS: { + struct trapframe *tframe; + + if (!dtrace_priv_proc(state)) + return (0); + + if ((tframe = curthread->td_frame) == NULL) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[curcpu].cpuc_dtrace_illval = 0; + return (0); + } + + return (dtrace_getreg(tframe, ndx)); + } #endif case DIF_VAR_CURTHREAD: @@ -2839,7 +2850,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } return (mstate->dtms_stackdepth); -#if defined(sun) case DIF_VAR_USTACKDEPTH: if (!dtrace_priv_proc(state)) return (0); @@ -2859,7 +2869,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH; } return (mstate->dtms_ustackdepth); -#endif case DIF_VAR_CALLER: if (!dtrace_priv_kernel(state)) @@ -2896,7 +2905,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } return (mstate->dtms_caller); -#if defined(sun) case DIF_VAR_UCALLER: if (!dtrace_priv_proc(state)) return (0); @@ -2920,7 +2928,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } return (mstate->dtms_ucaller); -#endif case DIF_VAR_PROBEPROV: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); @@ -5736,7 +5743,6 @@ dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) cpu->cpu_dtrace_chilled += val; } -#if defined(sun) static void dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t *buf, uint64_t arg) @@ -5849,7 +5855,6 @@ dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, out: mstate->dtms_scratch_ptr = old; } -#endif /* * If you're looking for the epicenter of DTrace, you just found it. This @@ -6172,7 +6177,6 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, (uint32_t *)arg0); continue; -#if defined(sun) case DTRACEACT_JSTACK: case DTRACEACT_USTACK: if (!dtrace_priv_proc(state)) @@ -6214,7 +6218,6 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); continue; -#endif default: break; @@ -8141,7 +8144,6 @@ dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) dtrace_enabling_matchall(); } -#if defined(sun) static void dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) { @@ -8189,7 +8191,6 @@ dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) dtrace_helper_provider_remove_one(dhp, sec, pid); } } -#endif /* * DTrace Meta Provider-to-Framework API Functions @@ -8729,7 +8730,6 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, return (err); } -#if defined(sun) /* * Validate a DTrace DIF object that it is to be used as a helper. Helpers * are much more constrained than normal DIFOs. Specifically, they may @@ -8887,7 +8887,6 @@ dtrace_difo_validate_helper(dtrace_difo_t *dp) return (err); } -#endif /* * Returns 1 if the expression in the DIF object can be cached on a per-thread @@ -9219,7 +9218,6 @@ dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) dtrace_difo_hold(dp); } -#if defined(sun) static dtrace_difo_t * dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { @@ -9263,7 +9261,6 @@ dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate) dtrace_difo_init(new, vstate); return (new); } -#endif static void dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) @@ -13791,7 +13788,6 @@ dtrace_anon_property(void) } } -#if defined(sun) /* * DTrace Helper Functions */ @@ -13855,9 +13851,7 @@ dtrace_helper_trace(dtrace_helper_action_t *helper, ((uint64_t *)(uintptr_t)svar->dtsv_data)[curcpu]; } } -#endif -#if defined(sun) static uint64_t dtrace_helper(int which, dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t arg0, uint64_t arg1) @@ -13865,7 +13859,7 @@ dtrace_helper(int which, dtrace_mstate_t *mstate, uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; uint64_t sarg0 = mstate->dtms_arg[0]; uint64_t sarg1 = mstate->dtms_arg[1]; - uint64_t rval; + uint64_t rval = 0; dtrace_helpers_t *helpers = curproc->p_dtrace_helpers; dtrace_helper_action_t *helper; dtrace_vstate_t *vstate; @@ -14056,9 +14050,7 @@ dtrace_helper_destroygen(int gen) return (0); } -#endif -#if defined(sun) static int dtrace_helper_validate(dtrace_helper_action_t *helper) { @@ -14073,9 +14065,7 @@ dtrace_helper_validate(dtrace_helper_action_t *helper) return (err == 0); } -#endif -#if defined(sun) static int dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep) { @@ -14622,12 +14612,17 @@ dtrace_helpers_create(proc_t *p) return (help); } -static void -dtrace_helpers_destroy(void) +#if defined(sun) +static +#endif +void +dtrace_helpers_destroy(proc_t *p) { dtrace_helpers_t *help; dtrace_vstate_t *vstate; +#if defined(sun) proc_t *p = curproc; +#endif int i; mutex_enter(&dtrace_lock); @@ -14714,7 +14709,10 @@ dtrace_helpers_destroy(void) mutex_exit(&dtrace_lock); } -static void +#if defined(sun) +static +#endif +void dtrace_helpers_duplicate(proc_t *from, proc_t *to) { dtrace_helpers_t *help, *newhelp; @@ -14795,7 +14793,6 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) if (hasprovs) dtrace_helper_provider_register(to, newhelp, NULL); } -#endif #if defined(sun) /* @@ -16466,6 +16463,7 @@ _fini(void) #else static d_ioctl_t dtrace_ioctl; +static d_ioctl_t dtrace_ioctl_helper; static void dtrace_load(void *); static int dtrace_unload(void); #if __FreeBSD_version < 800039 @@ -16474,6 +16472,7 @@ static struct clonedevs *dtrace_clones; /* Ptr to the array of cloned devices. static eventhandler_tag eh_tag; /* Event handler tag. */ #else static struct cdev *dtrace_dev; +static struct cdev *helper_dev; #endif void dtrace_invop_init(void); @@ -16488,6 +16487,13 @@ static struct cdevsw dtrace_cdevsw = { .d_name = "dtrace", }; +static struct cdevsw helper_cdevsw = { + .d_version = D_VERSION, + .d_flags = D_TRACKCLOSE | D_NEEDMINOR, + .d_ioctl = dtrace_ioctl_helper, + .d_name = "helper", +}; + #include #if __FreeBSD_version < 800039 #include diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c index 45839cbb8..4599a3238 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c +++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c @@ -17,6 +17,10 @@ * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END + * + * Portions Copyright 2010 The FreeBSD Foundation + * + * $FreeBSD$ */ /* @@ -24,7 +28,9 @@ * Use is subject to license terms. */ +#if defined(sun) #pragma ident "%Z%%M% %I% %E% SMI" +#endif #include #include @@ -32,11 +38,15 @@ #include #include #include +#if defined(sun) #include +#endif #include #include #include +#if defined(sun) #include +#endif #include #include #include @@ -44,9 +54,17 @@ #include #include #include -#include #include +#if defined(sun) #include +#endif +#include +#include +#if !defined(sun) +#include +#include +#include +#endif /* * User-Land Trap-Based Tracing @@ -125,11 +143,20 @@ * never hold the provider lock and creation lock simultaneously */ -static dev_info_t *fasttrap_devi; +static d_open_t fasttrap_open; +static d_ioctl_t fasttrap_ioctl; + +static struct cdevsw fasttrap_cdevsw = { + .d_version = D_VERSION, + .d_open = fasttrap_open, + .d_ioctl = fasttrap_ioctl, + .d_name = "fasttrap", +}; +static struct cdev *fasttrap_cdev; static dtrace_meta_provider_id_t fasttrap_meta_id; -static timeout_id_t fasttrap_timeout; -static kmutex_t fasttrap_cleanup_mtx; +static struct callout fasttrap_timeout; +static struct mtx fasttrap_cleanup_mtx; static uint_t fasttrap_cleanup_work; /* @@ -181,6 +208,10 @@ static void fasttrap_proc_release(fasttrap_proc_t *); #define FASTTRAP_PROCS_INDEX(pid) ((pid) & fasttrap_procs.fth_mask) +#if !defined(sun) +static kmutex_t fasttrap_cpuc_pid_lock[MAXCPU]; +#endif + static int fasttrap_highbit(ulong_t i) { @@ -229,6 +260,7 @@ fasttrap_hash_str(const char *p) void fasttrap_sigtrap(proc_t *p, kthread_t *t, uintptr_t pc) { +#if defined(sun) sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); sqp->sq_info.si_signo = SIGTRAP; @@ -241,6 +273,17 @@ fasttrap_sigtrap(proc_t *p, kthread_t *t, uintptr_t pc) if (t != NULL) aston(t); +#else + ksiginfo_t *ksi = kmem_zalloc(sizeof (ksiginfo_t), KM_SLEEP); + + ksiginfo_init(ksi); + ksi->ksi_signo = SIGTRAP; + ksi->ksi_code = TRAP_DTRACE; + ksi->ksi_addr = (caddr_t)pc; + PROC_LOCK(p); + (void) tdksignal(t, SIGTRAP, ksi); + PROC_UNLOCK(p); +#endif } /* @@ -257,9 +300,9 @@ fasttrap_mod_barrier(uint64_t gen) fasttrap_mod_gen++; - for (i = 0; i < NCPU; i++) { - mutex_enter(&cpu_core[i].cpuc_pid_lock); - mutex_exit(&cpu_core[i].cpuc_pid_lock); + CPU_FOREACH(i) { + mutex_enter(&fasttrap_cpuc_pid_lock[i]); + mutex_exit(&fasttrap_cpuc_pid_lock[i]); } } @@ -274,16 +317,15 @@ fasttrap_pid_cleanup_cb(void *data) fasttrap_provider_t **fpp, *fp; fasttrap_bucket_t *bucket; dtrace_provider_id_t provid; - int i, later; + int i, later = 0; static volatile int in = 0; ASSERT(in == 0); in = 1; - mutex_enter(&fasttrap_cleanup_mtx); while (fasttrap_cleanup_work) { fasttrap_cleanup_work = 0; - mutex_exit(&fasttrap_cleanup_mtx); + mtx_unlock(&fasttrap_cleanup_mtx); later = 0; @@ -349,10 +391,12 @@ fasttrap_pid_cleanup_cb(void *data) mutex_exit(&bucket->ftb_mtx); } - mutex_enter(&fasttrap_cleanup_mtx); + mtx_lock(&fasttrap_cleanup_mtx); } +#if 0 ASSERT(fasttrap_timeout != 0); +#endif /* * If we were unable to remove a retired provider, try again after @@ -364,14 +408,17 @@ fasttrap_pid_cleanup_cb(void *data) * get a chance to do that work if and when the timeout is reenabled * (if detach fails). */ - if (later > 0 && fasttrap_timeout != (timeout_id_t)1) - fasttrap_timeout = timeout(&fasttrap_pid_cleanup_cb, NULL, hz); + if (later > 0 && callout_active(&fasttrap_timeout)) + callout_reset(&fasttrap_timeout, hz, &fasttrap_pid_cleanup_cb, + NULL); else if (later > 0) fasttrap_cleanup_work = 1; - else - fasttrap_timeout = 0; + else { +#if !defined(sun) + /* Nothing to be done for FreeBSD */ +#endif + } - mutex_exit(&fasttrap_cleanup_mtx); in = 0; } @@ -381,11 +428,11 @@ fasttrap_pid_cleanup_cb(void *data) static void fasttrap_pid_cleanup(void) { - mutex_enter(&fasttrap_cleanup_mtx); + + mtx_lock(&fasttrap_cleanup_mtx); fasttrap_cleanup_work = 1; - if (fasttrap_timeout == 0) - fasttrap_timeout = timeout(&fasttrap_pid_cleanup_cb, NULL, 1); - mutex_exit(&fasttrap_cleanup_mtx); + callout_reset(&fasttrap_timeout, 1, &fasttrap_pid_cleanup_cb, NULL); + mtx_unlock(&fasttrap_cleanup_mtx); } /* @@ -400,9 +447,35 @@ fasttrap_fork(proc_t *p, proc_t *cp) pid_t ppid = p->p_pid; int i; +#if defined(sun) ASSERT(curproc == p); ASSERT(p->p_proc_flag & P_PR_LOCK); +#else + PROC_LOCK_ASSERT(p, MA_OWNED); +#endif +#if defined(sun) ASSERT(p->p_dtrace_count > 0); +#else + if (p->p_dtrace_helpers) { + /* + * dtrace_helpers_duplicate() allocates memory. + */ + _PHOLD(cp); + PROC_UNLOCK(p); + PROC_UNLOCK(cp); + dtrace_helpers_duplicate(p, cp); + PROC_LOCK(cp); + PROC_LOCK(p); + _PRELE(cp); + } + /* + * This check is purposely here instead of in kern_fork.c because, + * for legal resons, we cannot include the dtrace_cddl.h header + * inside kern_fork.c and insert if-clause there. + */ + if (p->p_dtrace_count == 0) + return; +#endif ASSERT(cp->p_dtrace_count == 0); /* @@ -419,9 +492,13 @@ fasttrap_fork(proc_t *p, proc_t *cp) * We don't have to worry about the child process disappearing * because we're in fork(). */ - mutex_enter(&cp->p_lock); +#if defined(sun) + mtx_lock_spin(&cp->p_slock); sprlock_proc(cp); - mutex_exit(&cp->p_lock); + mtx_unlock_spin(&cp->p_slock); +#else + _PHOLD(cp); +#endif /* * Iterate over every tracepoint looking for ones that belong to the @@ -451,8 +528,12 @@ fasttrap_fork(proc_t *p, proc_t *cp) mutex_exit(&bucket->ftb_mtx); } +#if defined(sun) mutex_enter(&cp->p_lock); sprunlock(cp); +#else + _PRELE(cp); +#endif } /* @@ -463,24 +544,30 @@ fasttrap_fork(proc_t *p, proc_t *cp) static void fasttrap_exec_exit(proc_t *p) { +#if defined(sun) ASSERT(p == curproc); - ASSERT(MUTEX_HELD(&p->p_lock)); - - mutex_exit(&p->p_lock); +#endif + PROC_LOCK_ASSERT(p, MA_OWNED); + _PHOLD(p); + PROC_UNLOCK(p); /* * We clean up the pid provider for this process here; user-land * static probes are handled by the meta-provider remove entry point. */ fasttrap_provider_retire(p->p_pid, FASTTRAP_PID_NAME, 0); - - mutex_enter(&p->p_lock); +#if !defined(sun) + if (p->p_dtrace_helpers) + dtrace_helpers_destroy(p); +#endif + PROC_LOCK(p); + _PRELE(p); } /*ARGSUSED*/ static void -fasttrap_pid_provide(void *arg, const dtrace_probedesc_t *desc) +fasttrap_pid_provide(void *arg, dtrace_probedesc_t *desc) { /* * There are no "default" pid probes. @@ -504,7 +591,9 @@ fasttrap_tracepoint_enable(proc_t *p, fasttrap_probe_t *probe, uint_t index) ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid); +#if defined(sun) ASSERT(!(p->p_flag & SVFORK)); +#endif /* * Before we make any modifications, make sure we've imposed a barrier @@ -610,7 +699,9 @@ again: * Increment the count of the number of tracepoints active in * the victim process. */ +#if defined(sun) ASSERT(p->p_proc_flag & P_PR_LOCK); +#endif p->p_dtrace_count++; return (rc); @@ -666,7 +757,7 @@ fasttrap_tracepoint_disable(proc_t *p, fasttrap_probe_t *probe, uint_t index) fasttrap_bucket_t *bucket; fasttrap_provider_t *provider = probe->ftp_prov; fasttrap_tracepoint_t **pp, *tp; - fasttrap_id_t *id, **idp; + fasttrap_id_t *id, **idp = NULL; pid_t pid; uintptr_t pc; @@ -800,7 +891,9 @@ fasttrap_tracepoint_disable(proc_t *p, fasttrap_probe_t *probe, uint_t index) * Decrement the count of the number of tracepoints active * in the victim process. */ +#if defined(sun) ASSERT(p->p_proc_flag & P_PR_LOCK); +#endif p->p_dtrace_count--; } @@ -851,26 +944,31 @@ fasttrap_enable_callbacks(void) static void fasttrap_disable_callbacks(void) { +#if defined(sun) ASSERT(MUTEX_HELD(&cpu_lock)); +#endif + mutex_enter(&fasttrap_count_mtx); ASSERT(fasttrap_pid_count > 0); fasttrap_pid_count--; if (fasttrap_pid_count == 0) { +#if defined(sun) cpu_t *cur, *cpu = CPU; for (cur = cpu->cpu_next_onln; cur != cpu; cur = cur->cpu_next_onln) { rw_enter(&cur->cpu_ft_lock, RW_WRITER); } - +#endif dtrace_pid_probe_ptr = NULL; dtrace_return_probe_ptr = NULL; - +#if defined(sun) for (cur = cpu->cpu_next_onln; cur != cpu; cur = cur->cpu_next_onln) { rw_exit(&cur->cpu_ft_lock); } +#endif } mutex_exit(&fasttrap_count_mtx); } @@ -880,13 +978,16 @@ static void fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) { fasttrap_probe_t *probe = parg; - proc_t *p; + proc_t *p = NULL; int i, rc; + ASSERT(probe != NULL); ASSERT(!probe->ftp_enabled); ASSERT(id == probe->ftp_id); +#if defined(sun) ASSERT(MUTEX_HELD(&cpu_lock)); +#endif /* * Increment the count of enabled probes on this probe's provider; @@ -911,6 +1012,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) * a fork in which the traced process is being born and we're copying * USDT probes. Otherwise, the process is gone so bail. */ +#if defined(sun) if ((p = sprlock(probe->ftp_pid)) == NULL) { if ((curproc->p_flag & SFORKING) == 0) return; @@ -934,12 +1036,23 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) ASSERT(!(p->p_flag & SVFORK)); mutex_exit(&p->p_lock); +#else + if ((p = pfind(probe->ftp_pid)) == NULL) + return; +#endif /* * We have to enable the trap entry point before any user threads have * the chance to execute the trap instruction we're about to place * in their process's text. */ +#ifdef __FreeBSD__ + /* + * pfind() returns a locked process. + */ + _PHOLD(p); + PROC_UNLOCK(p); +#endif fasttrap_enable_callbacks(); /* @@ -967,8 +1080,12 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) i--; } +#if defined(sun) mutex_enter(&p->p_lock); sprunlock(p); +#else + PRELE(p); +#endif /* * Since we're not actually enabling this probe, @@ -978,9 +1095,12 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) return; } } - +#if defined(sun) mutex_enter(&p->p_lock); sprunlock(p); +#else + PRELE(p); +#endif probe->ftp_enabled = 1; } @@ -996,18 +1116,22 @@ fasttrap_pid_disable(void *arg, dtrace_id_t id, void *parg) ASSERT(id == probe->ftp_id); + mutex_enter(&provider->ftp_mtx); + /* * We won't be able to acquire a /proc-esque lock on the process * iff the process is dead and gone. In this case, we rely on the * provider lock as a point of mutual exclusion to prevent other * DTrace consumers from disabling this probe. */ - if ((p = sprlock(probe->ftp_pid)) != NULL) { - ASSERT(!(p->p_flag & SVFORK)); - mutex_exit(&p->p_lock); + if ((p = pfind(probe->ftp_pid)) == NULL) { + mutex_exit(&provider->ftp_mtx); + return; } - - mutex_enter(&provider->ftp_mtx); +#ifdef __FreeBSD__ + _PHOLD(p); + PROC_UNLOCK(p); +#endif /* * Disable all the associated tracepoints (for fully enabled probes). @@ -1030,9 +1154,6 @@ fasttrap_pid_disable(void *arg, dtrace_id_t id, void *parg) if (provider->ftp_retired && !provider->ftp_marked) whack = provider->ftp_marked = 1; mutex_exit(&provider->ftp_mtx); - - mutex_enter(&p->p_lock); - sprunlock(p); } else { /* * If the process is dead, we're just waiting for the @@ -1046,12 +1167,17 @@ fasttrap_pid_disable(void *arg, dtrace_id_t id, void *parg) if (whack) fasttrap_pid_cleanup(); +#ifdef __FreeBSD__ + PRELE(p); +#endif if (!probe->ftp_enabled) return; probe->ftp_enabled = 0; +#if defined(sun) ASSERT(MUTEX_HELD(&cpu_lock)); +#endif fasttrap_disable_callbacks(); } @@ -1163,6 +1289,7 @@ fasttrap_proc_lookup(pid_t pid) fasttrap_bucket_t *bucket; fasttrap_proc_t *fprc, *new_fprc; + bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)]; mutex_enter(&bucket->ftb_mtx); @@ -1189,6 +1316,10 @@ fasttrap_proc_lookup(pid_t pid) new_fprc->ftpc_pid = pid; new_fprc->ftpc_rcount = 1; new_fprc->ftpc_acount = 1; +#if !defined(sun) + mutex_init(&new_fprc->ftpc_mtx, "fasttrap proc mtx", MUTEX_DEFAULT, + NULL); +#endif mutex_enter(&bucket->ftb_mtx); @@ -1311,17 +1442,8 @@ fasttrap_provider_lookup(pid_t pid, const char *name, * Make sure the process exists, isn't a child created as the result * of a vfork(2), and isn't a zombie (but may be in fork). */ - mutex_enter(&pidlock); - if ((p = prfind(pid)) == NULL) { - mutex_exit(&pidlock); - return (NULL); - } - mutex_enter(&p->p_lock); - mutex_exit(&pidlock); - if (p->p_flag & (SVFORK | SEXITING)) { - mutex_exit(&p->p_lock); + if ((p = pfind(pid)) == NULL) return (NULL); - } /* * Increment p_dtrace_probes so that the process knows to inform us @@ -1334,15 +1456,18 @@ fasttrap_provider_lookup(pid_t pid, const char *name, * Grab the credentials for this process so we have * something to pass to dtrace_register(). */ - mutex_enter(&p->p_crlock); - crhold(p->p_cred); - cred = p->p_cred; - mutex_exit(&p->p_crlock); - mutex_exit(&p->p_lock); + PROC_LOCK_ASSERT(p, MA_OWNED); + crhold(p->p_ucred); + cred = p->p_ucred; + PROC_UNLOCK(p); new_fp = kmem_zalloc(sizeof (fasttrap_provider_t), KM_SLEEP); new_fp->ftp_pid = pid; new_fp->ftp_proc = fasttrap_proc_lookup(pid); +#if !defined(sun) + mutex_init(&new_fp->ftp_mtx, "provider mtx", MUTEX_DEFAULT, NULL); + mutex_init(&new_fp->ftp_cmtx, "lock on creating", MUTEX_DEFAULT, NULL); +#endif ASSERT(new_fp->ftp_proc != NULL); @@ -1420,6 +1545,10 @@ fasttrap_provider_free(fasttrap_provider_t *provider) fasttrap_proc_release(provider->ftp_proc); +#if !defined(sun) + mutex_destroy(&provider->ftp_mtx); + mutex_destroy(&provider->ftp_cmtx); +#endif kmem_free(provider, sizeof (fasttrap_provider_t)); /* @@ -1429,17 +1558,14 @@ fasttrap_provider_free(fasttrap_provider_t *provider) * corresponds to this process's hash chain in the provider hash * table. Don't sweat it if we can't find the process. */ - mutex_enter(&pidlock); - if ((p = prfind(pid)) == NULL) { - mutex_exit(&pidlock); + if ((p = pfind(pid)) == NULL) { return; } - mutex_enter(&p->p_lock); - mutex_exit(&pidlock); - p->p_dtrace_probes--; - mutex_exit(&p->p_lock); +#if !defined(sun) + PROC_UNLOCK(p); +#endif } static void @@ -1527,7 +1653,7 @@ fasttrap_add_probe(fasttrap_probe_spec_t *pdata) fasttrap_probe_t *pp; fasttrap_tracepoint_t *tp; char *name; - int i, aframes, whack; + int i, aframes = 0, whack; /* * There needs to be at least one desired trace point. @@ -1715,7 +1841,7 @@ fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) */ if (strlen(dhpv->dthpv_provname) + 10 >= sizeof (provider->ftp_name)) { - cmn_err(CE_WARN, "failed to instantiate provider %s: " + printf("failed to instantiate provider %s: " "name too long to accomodate pid", dhpv->dthpv_provname); return (NULL); } @@ -1724,7 +1850,7 @@ fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) * Don't let folks spoof the true pid provider. */ if (strcmp(dhpv->dthpv_provname, FASTTRAP_PID_NAME) == 0) { - cmn_err(CE_WARN, "failed to instantiate provider %s: " + printf("failed to instantiate provider %s: " "%s is an invalid name", dhpv->dthpv_provname, FASTTRAP_PID_NAME); return (NULL); @@ -1747,7 +1873,7 @@ fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) if ((provider = fasttrap_provider_lookup(pid, dhpv->dthpv_provname, &dhpv->dthpv_pattr)) == NULL) { - cmn_err(CE_WARN, "failed to instantiate provider %s for " + printf("failed to instantiate provider %s for " "process %u", dhpv->dthpv_provname, (uint_t)pid); return (NULL); } @@ -1908,15 +2034,21 @@ static dtrace_mops_t fasttrap_mops = { /*ARGSUSED*/ static int -fasttrap_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) +fasttrap_open(struct cdev *dev __unused, int oflags __unused, + int devtype __unused, struct thread *td __unused) { return (0); } /*ARGSUSED*/ static int -fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) +fasttrap_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int fflag, + struct thread *td) { +#ifdef notyet + struct kinfo_proc kp; + const cred_t *cr = td->td_ucred; +#endif if (!dtrace_attached()) return (EAGAIN); @@ -1928,9 +2060,13 @@ fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) int ret; char *c; +#if defined(sun) if (copyin(&uprobe->ftps_noffs, &noffs, sizeof (uprobe->ftps_noffs))) return (EFAULT); +#else + noffs = uprobe->ftps_noffs; +#endif /* * Probes must have at least one tracepoint. @@ -1946,10 +2082,19 @@ fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) probe = kmem_alloc(size, KM_SLEEP); +#if defined(sun) if (copyin(uprobe, probe, size) != 0) { kmem_free(probe, size); return (EFAULT); } +#else + memcpy(probe, uprobe, sizeof(*probe)); + if (noffs > 1 && copyin(uprobe + 1, probe + 1, size) != 0) { + kmem_free(probe, size); + return (EFAULT); + } +#endif + /* * Verify that the function and module strings contain no @@ -1969,30 +2114,52 @@ fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) } } +#ifdef notyet if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) { proc_t *p; pid_t pid = probe->ftps_pid; +#if defined(sun) mutex_enter(&pidlock); +#endif /* * Report an error if the process doesn't exist * or is actively being birthed. */ - if ((p = prfind(pid)) == NULL || p->p_stat == SIDL) { + p = pfind(pid); + if (p) + fill_kinfo_proc(p, &kp); + if (p == NULL || kp.ki_stat == SIDL) { +#if defined(sun) mutex_exit(&pidlock); +#endif return (ESRCH); } +#if defined(sun) mutex_enter(&p->p_lock); mutex_exit(&pidlock); +#else + PROC_LOCK_ASSERT(p, MA_OWNED); +#endif +#ifdef notyet if ((ret = priv_proc_cred_perm(cr, p, NULL, VREAD | VWRITE)) != 0) { +#if defined(sun) mutex_exit(&p->p_lock); +#else + PROC_UNLOCK(p); +#endif return (ret); } - +#endif /* notyet */ +#if defined(sun) mutex_exit(&p->p_lock); +#else + PROC_UNLOCK(p); +#endif } +#endif /* notyet */ ret = fasttrap_add_probe(probe); err: @@ -2004,35 +2171,62 @@ err: fasttrap_instr_query_t instr; fasttrap_tracepoint_t *tp; uint_t index; +#if defined(sun) int ret; +#endif +#if defined(sun) if (copyin((void *)arg, &instr, sizeof (instr)) != 0) return (EFAULT); +#endif +#ifdef notyet if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) { proc_t *p; pid_t pid = instr.ftiq_pid; +#if defined(sun) mutex_enter(&pidlock); +#endif /* * Report an error if the process doesn't exist * or is actively being birthed. */ - if ((p = prfind(pid)) == NULL || p->p_stat == SIDL) { + p = pfind(pid); + if (p) + fill_kinfo_proc(p, &kp); + if (p == NULL || kp.ki_stat == SIDL) { +#if defined(sun) mutex_exit(&pidlock); +#endif return (ESRCH); } +#if defined(sun) mutex_enter(&p->p_lock); mutex_exit(&pidlock); +#else + PROC_LOCK_ASSERT(p, MA_OWNED); +#endif +#ifdef notyet if ((ret = priv_proc_cred_perm(cr, p, NULL, VREAD)) != 0) { +#if defined(sun) mutex_exit(&p->p_lock); +#else + PROC_UNLOCK(p); +#endif return (ret); } +#endif /* notyet */ +#if defined(sun) mutex_exit(&p->p_lock); +#else + PROC_UNLOCK(p); +#endif } +#endif /* notyet */ index = FASTTRAP_TPOINTS_INDEX(instr.ftiq_pid, instr.ftiq_pc); @@ -2065,84 +2259,45 @@ err: return (EINVAL); } -static struct cb_ops fasttrap_cb_ops = { - fasttrap_open, /* open */ - nodev, /* close */ - nulldev, /* strategy */ - nulldev, /* print */ - nodev, /* dump */ - nodev, /* read */ - nodev, /* write */ - fasttrap_ioctl, /* ioctl */ - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - ddi_prop_op, /* cb_prop_op */ - 0, /* streamtab */ - D_NEW | D_MP /* Driver compatibility flag */ -}; - -/*ARGSUSED*/ -static int -fasttrap_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int error; - - switch (infocmd) { - case DDI_INFO_DEVT2DEVINFO: - *result = (void *)fasttrap_devi; - error = DDI_SUCCESS; - break; - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)0; - error = DDI_SUCCESS; - break; - default: - error = DDI_FAILURE; - } - return (error); -} - static int -fasttrap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) +fasttrap_load(void) { ulong_t nent; + int i; - switch (cmd) { - case DDI_ATTACH: - break; - case DDI_RESUME: - return (DDI_SUCCESS); - default: - return (DDI_FAILURE); - } + /* Create the /dev/dtrace/fasttrap entry. */ + fasttrap_cdev = make_dev(&fasttrap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, + "dtrace/fasttrap"); - if (ddi_create_minor_node(devi, "fasttrap", S_IFCHR, 0, - DDI_PSEUDO, NULL) == DDI_FAILURE) { - ddi_remove_minor_node(devi, NULL); - return (DDI_FAILURE); - } - - ddi_report_dev(devi); - fasttrap_devi = devi; + mtx_init(&fasttrap_cleanup_mtx, "fasttrap clean", "dtrace", MTX_DEF); + callout_init_mtx(&fasttrap_timeout, &fasttrap_cleanup_mtx, 0); + mutex_init(&fasttrap_count_mtx, "fasttrap count mtx", MUTEX_DEFAULT, + NULL); /* * Install our hooks into fork(2), exec(2), and exit(2). */ - dtrace_fasttrap_fork_ptr = &fasttrap_fork; - dtrace_fasttrap_exit_ptr = &fasttrap_exec_exit; - dtrace_fasttrap_exec_ptr = &fasttrap_exec_exit; + dtrace_fasttrap_fork = &fasttrap_fork; + dtrace_fasttrap_exit = &fasttrap_exec_exit; + dtrace_fasttrap_exec = &fasttrap_exec_exit; +#if defined(sun) fasttrap_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, "fasttrap-max-probes", FASTTRAP_MAX_DEFAULT); +#else + fasttrap_max = FASTTRAP_MAX_DEFAULT; +#endif fasttrap_total = 0; /* * Conjure up the tracepoints hashtable... */ +#if defined(sun) nent = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, "fasttrap-hash-size", FASTTRAP_TPOINTS_DEFAULT_SIZE); +#else + nent = FASTTRAP_TPOINTS_DEFAULT_SIZE; +#endif if (nent == 0 || nent > 0x1000000) nent = FASTTRAP_TPOINTS_DEFAULT_SIZE; @@ -2155,6 +2310,11 @@ fasttrap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) fasttrap_tpoints.fth_mask = fasttrap_tpoints.fth_nent - 1; fasttrap_tpoints.fth_table = kmem_zalloc(fasttrap_tpoints.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); +#if !defined(sun) + for (i = 0; i < fasttrap_tpoints.fth_nent; i++) + mutex_init(&fasttrap_tpoints.fth_table[i].ftb_mtx, + "tracepoints bucket mtx", MUTEX_DEFAULT, NULL); +#endif /* * ... and the providers hash table... @@ -2168,6 +2328,11 @@ fasttrap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) fasttrap_provs.fth_mask = fasttrap_provs.fth_nent - 1; fasttrap_provs.fth_table = kmem_zalloc(fasttrap_provs.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); +#if !defined(sun) + for (i = 0; i < fasttrap_provs.fth_nent; i++) + mutex_init(&fasttrap_provs.fth_table[i].ftb_mtx, + "providers bucket mtx", MUTEX_DEFAULT, NULL); +#endif /* * ... and the procs hash table. @@ -2181,27 +2346,27 @@ fasttrap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) fasttrap_procs.fth_mask = fasttrap_procs.fth_nent - 1; fasttrap_procs.fth_table = kmem_zalloc(fasttrap_procs.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); +#if !defined(sun) + for (i = 0; i < fasttrap_procs.fth_nent; i++) + mutex_init(&fasttrap_procs.fth_table[i].ftb_mtx, + "processes bucket mtx", MUTEX_DEFAULT, NULL); + + CPU_FOREACH(i) { + mutex_init(&fasttrap_cpuc_pid_lock[i], "fasttrap barrier", + MUTEX_DEFAULT, NULL); + } +#endif (void) dtrace_meta_register("fasttrap", &fasttrap_mops, NULL, &fasttrap_meta_id); - return (DDI_SUCCESS); + return (0); } static int -fasttrap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) +fasttrap_unload(void) { int i, fail = 0; - timeout_id_t tmp; - - switch (cmd) { - case DDI_DETACH: - break; - case DDI_SUSPEND: - return (DDI_SUCCESS); - default: - return (DDI_FAILURE); - } /* * Unregister the meta-provider to make sure no new fasttrap- @@ -2212,28 +2377,16 @@ fasttrap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) */ if (fasttrap_meta_id != DTRACE_METAPROVNONE && dtrace_meta_unregister(fasttrap_meta_id) != 0) - return (DDI_FAILURE); + return (-1); /* * Prevent any new timeouts from running by setting fasttrap_timeout * to a non-zero value, and wait for the current timeout to complete. */ - mutex_enter(&fasttrap_cleanup_mtx); + mtx_lock(&fasttrap_cleanup_mtx); fasttrap_cleanup_work = 0; - - while (fasttrap_timeout != (timeout_id_t)1) { - tmp = fasttrap_timeout; - fasttrap_timeout = (timeout_id_t)1; - - if (tmp != 0) { - mutex_exit(&fasttrap_cleanup_mtx); - (void) untimeout(tmp); - mutex_enter(&fasttrap_cleanup_mtx); - } - } - - fasttrap_cleanup_work = 0; - mutex_exit(&fasttrap_cleanup_mtx); + callout_drain(&fasttrap_timeout); + mtx_unlock(&fasttrap_cleanup_mtx); /* * Iterate over all of our providers. If there's still a process @@ -2275,10 +2428,10 @@ fasttrap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) * and start a new timeout if any work has accumulated while * we've been unsuccessfully trying to detach. */ - mutex_enter(&fasttrap_cleanup_mtx); - fasttrap_timeout = 0; + mtx_lock(&fasttrap_cleanup_mtx); work = fasttrap_cleanup_work; - mutex_exit(&fasttrap_cleanup_mtx); + callout_drain(&fasttrap_timeout); + mtx_unlock(&fasttrap_cleanup_mtx); if (work) fasttrap_pid_cleanup(); @@ -2286,7 +2439,7 @@ fasttrap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) (void) dtrace_meta_register("fasttrap", &fasttrap_mops, NULL, &fasttrap_meta_id); - return (DDI_FAILURE); + return (-1); } #ifdef DEBUG @@ -2314,63 +2467,55 @@ fasttrap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) * be executing code in fasttrap_fork(). Similarly for p_dtrace_probes * and fasttrap_exec() and fasttrap_exit(). */ - ASSERT(dtrace_fasttrap_fork_ptr == &fasttrap_fork); - dtrace_fasttrap_fork_ptr = NULL; + ASSERT(dtrace_fasttrap_fork == &fasttrap_fork); + dtrace_fasttrap_fork = NULL; - ASSERT(dtrace_fasttrap_exec_ptr == &fasttrap_exec_exit); - dtrace_fasttrap_exec_ptr = NULL; + ASSERT(dtrace_fasttrap_exec == &fasttrap_exec_exit); + dtrace_fasttrap_exec = NULL; - ASSERT(dtrace_fasttrap_exit_ptr == &fasttrap_exec_exit); - dtrace_fasttrap_exit_ptr = NULL; + ASSERT(dtrace_fasttrap_exit == &fasttrap_exec_exit); + dtrace_fasttrap_exit = NULL; - ddi_remove_minor_node(devi, NULL); +#if !defined(sun) + destroy_dev(fasttrap_cdev); + mutex_destroy(&fasttrap_count_mtx); + CPU_FOREACH(i) { + mutex_destroy(&fasttrap_cpuc_pid_lock[i]); + } +#endif - return (DDI_SUCCESS); + return (0); } -static struct dev_ops fasttrap_ops = { - DEVO_REV, /* devo_rev */ - 0, /* refcnt */ - fasttrap_info, /* get_dev_info */ - nulldev, /* identify */ - nulldev, /* probe */ - fasttrap_attach, /* attach */ - fasttrap_detach, /* detach */ - nodev, /* reset */ - &fasttrap_cb_ops, /* driver operations */ - NULL, /* bus operations */ - nodev /* dev power */ -}; +/* ARGSUSED */ +static int +fasttrap_modevent(module_t mod __unused, int type, void *data __unused) +{ + int error = 0; -/* - * Module linkage information for the kernel. - */ -static struct modldrv modldrv = { - &mod_driverops, /* module type (this is a pseudo driver) */ - "Fasttrap Tracing", /* name of module */ - &fasttrap_ops, /* driver ops */ -}; + switch (type) { + case MOD_LOAD: + break; -static struct modlinkage modlinkage = { - MODREV_1, - (void *)&modldrv, - NULL -}; + case MOD_UNLOAD: + break; -int -_init(void) -{ - return (mod_install(&modlinkage)); -} + case MOD_SHUTDOWN: + break; -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); + default: + error = EOPNOTSUPP; + break; + } + return (error); } -int -_fini(void) -{ - return (mod_remove(&modlinkage)); -} +SYSINIT(fasttrap_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fasttrap_load, + NULL); +SYSUNINIT(fasttrap_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, + fasttrap_unload, NULL); + +DEV_MODULE(fasttrap, fasttrap_modevent, NULL); +MODULE_VERSION(fasttrap, 1); +MODULE_DEPEND(fasttrap, dtrace, 1, 1, 1); +MODULE_DEPEND(fasttrap, opensolaris, 1, 1, 1); diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h index 1e20f5665..3bd880361 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h @@ -2289,6 +2289,11 @@ extern int dtrace_blksuword32(uintptr_t, uint32_t *, int); extern void dtrace_getfsr(uint64_t *); #endif +#if !defined(sun) +extern void dtrace_helpers_duplicate(proc_t *, proc_t *); +extern void dtrace_helpers_destroy(proc_t *); +#endif + #define DTRACE_CPUFLAG_ISSET(flag) \ (cpu_core[curcpu].cpuc_dtrace_flags & (flag)) diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h index 369d41a35..6870a0b2b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h @@ -1268,7 +1268,7 @@ extern void dtrace_copyout(uintptr_t, uintptr_t, size_t, volatile uint16_t *); extern void dtrace_copyoutstr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); extern void dtrace_getpcstack(pc_t *, int, int, uint32_t *); -extern ulong_t dtrace_getreg(struct regs *, uint_t); +extern ulong_t dtrace_getreg(struct trapframe *, uint_t); extern int dtrace_getstackdepth(int); extern void dtrace_getupcstack(uint64_t *, int); extern void dtrace_getufpstack(uint64_t *, uint64_t *, int); diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/fasttrap.h b/sys/cddl/contrib/opensolaris/uts/common/sys/fasttrap.h index 7f803144b..58967fd6c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/fasttrap.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/fasttrap.h @@ -37,9 +37,14 @@ extern "C" { #endif +#if defined(sun) #define FASTTRAPIOC (('m' << 24) | ('r' << 16) | ('f' << 8)) #define FASTTRAPIOC_MAKEPROBE (FASTTRAPIOC | 1) #define FASTTRAPIOC_GETINSTR (FASTTRAPIOC | 2) +#else +#define FASTTRAPIOC_MAKEPROBE _IOW('f', 1, fasttrap_probe_spec_t) +#define FASTTRAPIOC_GETINSTR _IOWR('f', 2, uint8_t) +#endif typedef enum fasttrap_probe_type { DTFTP_NONE = 0, diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/fasttrap_impl.h b/sys/cddl/contrib/opensolaris/uts/common/sys/fasttrap_impl.h new file mode 100644 index 000000000..a4e51fd04 --- /dev/null +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/fasttrap_impl.h @@ -0,0 +1,199 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _FASTTRAP_IMPL_H +#define _FASTTRAP_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Fasttrap Providers, Probes and Tracepoints + * + * Each Solaris process can have multiple providers -- the pid provider as + * well as any number of user-level statically defined tracing (USDT) + * providers. Those providers are each represented by a fasttrap_provider_t. + * All providers for a given process have a pointer to a shared + * fasttrap_proc_t. The fasttrap_proc_t has two states: active or defunct. + * When the count of active providers goes to zero it becomes defunct; a + * provider drops its active count when it is removed individually or as part + * of a mass removal when a process exits or performs an exec. + * + * Each probe is represented by a fasttrap_probe_t which has a pointer to + * its associated provider as well as a list of fasttrap_id_tp_t structures + * which are tuples combining a fasttrap_id_t and a fasttrap_tracepoint_t. + * A fasttrap_tracepoint_t represents the actual point of instrumentation + * and it contains two lists of fasttrap_id_t structures (to be fired pre- + * and post-instruction emulation) that identify the probes attached to the + * tracepoint. Tracepoints also have a pointer to the fasttrap_proc_t for the + * process they trace which is used when looking up a tracepoint both when a + * probe fires and when enabling and disabling probes. + * + * It's important to note that probes are preallocated with the necessary + * number of tracepoints, but that tracepoints can be shared by probes and + * swapped between probes. If a probe's preallocated tracepoint is enabled + * (and, therefore, the associated probe is enabled), and that probe is + * then disabled, ownership of that tracepoint may be exchanged for an + * unused tracepoint belonging to another probe that was attached to the + * enabled tracepoint. + */ + +typedef struct fasttrap_proc { + pid_t ftpc_pid; /* process ID for this proc */ + uint64_t ftpc_acount; /* count of active providers */ + uint64_t ftpc_rcount; /* count of extant providers */ + kmutex_t ftpc_mtx; /* lock on all but acount */ + struct fasttrap_proc *ftpc_next; /* next proc in hash chain */ +} fasttrap_proc_t; + +typedef struct fasttrap_provider { + pid_t ftp_pid; /* process ID for this prov */ + char ftp_name[DTRACE_PROVNAMELEN]; /* prov name (w/o the pid) */ + dtrace_provider_id_t ftp_provid; /* DTrace provider handle */ + uint_t ftp_marked; /* mark for possible removal */ + uint_t ftp_retired; /* mark when retired */ + kmutex_t ftp_mtx; /* provider lock */ + kmutex_t ftp_cmtx; /* lock on creating probes */ + uint64_t ftp_rcount; /* enabled probes ref count */ + uint64_t ftp_ccount; /* consumers creating probes */ + uint64_t ftp_mcount; /* meta provider count */ + fasttrap_proc_t *ftp_proc; /* shared proc for all provs */ + struct fasttrap_provider *ftp_next; /* next prov in hash chain */ +} fasttrap_provider_t; + +typedef struct fasttrap_id fasttrap_id_t; +typedef struct fasttrap_probe fasttrap_probe_t; +typedef struct fasttrap_tracepoint fasttrap_tracepoint_t; + +struct fasttrap_id { + fasttrap_probe_t *fti_probe; /* referrring probe */ + fasttrap_id_t *fti_next; /* enabled probe list on tp */ + fasttrap_probe_type_t fti_ptype; /* probe type */ +}; + +typedef struct fasttrap_id_tp { + fasttrap_id_t fit_id; + fasttrap_tracepoint_t *fit_tp; +} fasttrap_id_tp_t; + +struct fasttrap_probe { + dtrace_id_t ftp_id; /* DTrace probe identifier */ + pid_t ftp_pid; /* pid for this probe */ + fasttrap_provider_t *ftp_prov; /* this probe's provider */ + uintptr_t ftp_faddr; /* associated function's addr */ + size_t ftp_fsize; /* associated function's size */ + uint64_t ftp_gen; /* modification generation */ + uint64_t ftp_ntps; /* number of tracepoints */ + uint8_t *ftp_argmap; /* native to translated args */ + uint8_t ftp_nargs; /* translated argument count */ + uint8_t ftp_enabled; /* is this probe enabled */ + char *ftp_xtypes; /* translated types index */ + char *ftp_ntypes; /* native types index */ + fasttrap_id_tp_t ftp_tps[1]; /* flexible array */ +}; + +#define FASTTRAP_ID_INDEX(id) \ +((fasttrap_id_tp_t *)(((char *)(id) - offsetof(fasttrap_id_tp_t, fit_id))) - \ +&(id)->fti_probe->ftp_tps[0]) + +struct fasttrap_tracepoint { + fasttrap_proc_t *ftt_proc; /* associated process struct */ + uintptr_t ftt_pc; /* address of tracepoint */ + pid_t ftt_pid; /* pid of tracepoint */ + fasttrap_machtp_t ftt_mtp; /* ISA-specific portion */ + fasttrap_id_t *ftt_ids; /* NULL-terminated list */ + fasttrap_id_t *ftt_retids; /* NULL-terminated list */ + fasttrap_tracepoint_t *ftt_next; /* link in global hash */ +}; + +typedef struct fasttrap_bucket { + kmutex_t ftb_mtx; /* bucket lock */ + void *ftb_data; /* data payload */ + + uint8_t ftb_pad[64 - sizeof (kmutex_t) - sizeof (void *)]; +} fasttrap_bucket_t; + +typedef struct fasttrap_hash { + ulong_t fth_nent; /* power-of-2 num. of entries */ + ulong_t fth_mask; /* fth_nent - 1 */ + fasttrap_bucket_t *fth_table; /* array of buckets */ +} fasttrap_hash_t; + +/* + * If at some future point these assembly functions become observable by + * DTrace, then these defines should become separate functions so that the + * fasttrap provider doesn't trigger probes during internal operations. + */ +#define fasttrap_copyout copyout +#define fasttrap_fuword32 fuword32 +#define fasttrap_suword32(_k, _u) copyout((_k), (_u), sizeof(uint32_t)) +#define fasttrap_suword64(_k, _u) copyout((_k), (_u), sizeof(uint64_t)) + +#ifdef __amd64__ +#define fasttrap_fulword fuword64 +#define fasttrap_sulword fasttrap_suword64 +#else +#define fasttrap_fulword fuword32 +#define fasttrap_sulword fasttrap_suword32 +#endif + +extern void fasttrap_sigtrap(proc_t *, kthread_t *, uintptr_t); + +extern dtrace_id_t fasttrap_probe_id; +extern fasttrap_hash_t fasttrap_tpoints; + +#define FASTTRAP_TPOINTS_INDEX(pid, pc) \ + (((pc) / sizeof (fasttrap_instr_t) + (pid)) & fasttrap_tpoints.fth_mask) + +/* + * Must be implemented by fasttrap_isa.c + */ +extern int fasttrap_tracepoint_init(proc_t *, fasttrap_tracepoint_t *, + uintptr_t, fasttrap_probe_type_t); +extern int fasttrap_tracepoint_install(proc_t *, fasttrap_tracepoint_t *); +extern int fasttrap_tracepoint_remove(proc_t *, fasttrap_tracepoint_t *); + +struct reg; +extern int fasttrap_pid_probe(struct reg *); +extern int fasttrap_return_probe(struct reg *); + +extern uint64_t fasttrap_pid_getarg(void *, dtrace_id_t, void *, int, int); +extern uint64_t fasttrap_usdt_getarg(void *, dtrace_id_t, void *, int, int); + +#ifdef __cplusplus +} +#endif + +#endif /* _FASTTRAP_IMPL_H */ diff --git a/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c b/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c new file mode 100644 index 000000000..85d9649f6 --- /dev/null +++ b/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c @@ -0,0 +1,1904 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Portions Copyright 2010 The FreeBSD Foundation + * + * $FreeBSD$ + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(sun) +#pragma ident "%Z%%M% %I% %E% SMI" +#endif + +#include +#include +#include +#include +#include +#if defined(sun) +#include +#include +#include +#include +#else +#include +#include +#include +#include +#include +#include +#include +#include +#endif +#include +#if defined(sun) +#include +#include +#else +#include + +static int +proc_ops(int op, proc_t *p, void *kaddr, off_t uaddr, size_t len) +{ + struct iovec iov; + struct uio uio; + + iov.iov_base = kaddr; + iov.iov_len = len; + uio.uio_offset = uaddr; + uio.uio_iov = &iov; + uio.uio_resid = len; + uio.uio_iovcnt = 1; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_td = curthread; + uio.uio_rw = op; + PHOLD(p); + if (proc_rwmem(p, &uio) < 0) { + PRELE(p); + return (-1); + } + PRELE(p); + + return (0); +} + +static int +uread(proc_t *p, void *kaddr, size_t len, uintptr_t uaddr) +{ + + return (proc_ops(UIO_READ, p, kaddr, uaddr, len)); +} + +static int +uwrite(proc_t *p, void *kaddr, size_t len, uintptr_t uaddr) +{ + + return (proc_ops(UIO_WRITE, p, kaddr, uaddr, len)); +} +#endif +#ifdef __i386__ +#define r_rax r_eax +#define r_rbx r_ebx +#define r_rip r_eip +#define r_rflags r_eflags +#define r_rsp r_esp +#endif + +/* + * Lossless User-Land Tracing on x86 + * --------------------------------- + * + * The execution of most instructions is not dependent on the address; for + * these instructions it is sufficient to copy them into the user process's + * address space and execute them. To effectively single-step an instruction + * in user-land, we copy out the following sequence of instructions to scratch + * space in the user thread's ulwp_t structure. + * + * We then set the program counter (%eip or %rip) to point to this scratch + * space. Once execution resumes, the original instruction is executed and + * then control flow is redirected to what was originally the subsequent + * instruction. If the kernel attemps to deliver a signal while single- + * stepping, the signal is deferred and the program counter is moved into the + * second sequence of instructions. The second sequence ends in a trap into + * the kernel where the deferred signal is then properly handled and delivered. + * + * For instructions whose execute is position dependent, we perform simple + * emulation. These instructions are limited to control transfer + * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle + * of %rip-relative addressing that means that almost any instruction can be + * position dependent. For all the details on how we emulate generic + * instructions included %rip-relative instructions, see the code in + * fasttrap_pid_probe() below where we handle instructions of type + * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing). + */ + +#define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3) +#define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7) +#define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7) +#define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm)) + +#define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3) +#define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7) +#define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7) + +#define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1) +#define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1) +#define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1) +#define FASTTRAP_REX_B(rex) ((rex) & 1) +#define FASTTRAP_REX(w, r, x, b) \ + (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b)) + +/* + * Single-byte op-codes. + */ +#define FASTTRAP_PUSHL_EBP 0x55 + +#define FASTTRAP_JO 0x70 +#define FASTTRAP_JNO 0x71 +#define FASTTRAP_JB 0x72 +#define FASTTRAP_JAE 0x73 +#define FASTTRAP_JE 0x74 +#define FASTTRAP_JNE 0x75 +#define FASTTRAP_JBE 0x76 +#define FASTTRAP_JA 0x77 +#define FASTTRAP_JS 0x78 +#define FASTTRAP_JNS 0x79 +#define FASTTRAP_JP 0x7a +#define FASTTRAP_JNP 0x7b +#define FASTTRAP_JL 0x7c +#define FASTTRAP_JGE 0x7d +#define FASTTRAP_JLE 0x7e +#define FASTTRAP_JG 0x7f + +#define FASTTRAP_NOP 0x90 + +#define FASTTRAP_MOV_EAX 0xb8 +#define FASTTRAP_MOV_ECX 0xb9 + +#define FASTTRAP_RET16 0xc2 +#define FASTTRAP_RET 0xc3 + +#define FASTTRAP_LOOPNZ 0xe0 +#define FASTTRAP_LOOPZ 0xe1 +#define FASTTRAP_LOOP 0xe2 +#define FASTTRAP_JCXZ 0xe3 + +#define FASTTRAP_CALL 0xe8 +#define FASTTRAP_JMP32 0xe9 +#define FASTTRAP_JMP8 0xeb + +#define FASTTRAP_INT3 0xcc +#define FASTTRAP_INT 0xcd + +#define FASTTRAP_2_BYTE_OP 0x0f +#define FASTTRAP_GROUP5_OP 0xff + +/* + * Two-byte op-codes (second byte only). + */ +#define FASTTRAP_0F_JO 0x80 +#define FASTTRAP_0F_JNO 0x81 +#define FASTTRAP_0F_JB 0x82 +#define FASTTRAP_0F_JAE 0x83 +#define FASTTRAP_0F_JE 0x84 +#define FASTTRAP_0F_JNE 0x85 +#define FASTTRAP_0F_JBE 0x86 +#define FASTTRAP_0F_JA 0x87 +#define FASTTRAP_0F_JS 0x88 +#define FASTTRAP_0F_JNS 0x89 +#define FASTTRAP_0F_JP 0x8a +#define FASTTRAP_0F_JNP 0x8b +#define FASTTRAP_0F_JL 0x8c +#define FASTTRAP_0F_JGE 0x8d +#define FASTTRAP_0F_JLE 0x8e +#define FASTTRAP_0F_JG 0x8f + +#define FASTTRAP_EFLAGS_OF 0x800 +#define FASTTRAP_EFLAGS_DF 0x400 +#define FASTTRAP_EFLAGS_SF 0x080 +#define FASTTRAP_EFLAGS_ZF 0x040 +#define FASTTRAP_EFLAGS_AF 0x010 +#define FASTTRAP_EFLAGS_PF 0x004 +#define FASTTRAP_EFLAGS_CF 0x001 + +/* + * Instruction prefixes. + */ +#define FASTTRAP_PREFIX_OPERAND 0x66 +#define FASTTRAP_PREFIX_ADDRESS 0x67 +#define FASTTRAP_PREFIX_CS 0x2E +#define FASTTRAP_PREFIX_DS 0x3E +#define FASTTRAP_PREFIX_ES 0x26 +#define FASTTRAP_PREFIX_FS 0x64 +#define FASTTRAP_PREFIX_GS 0x65 +#define FASTTRAP_PREFIX_SS 0x36 +#define FASTTRAP_PREFIX_LOCK 0xF0 +#define FASTTRAP_PREFIX_REP 0xF3 +#define FASTTRAP_PREFIX_REPNE 0xF2 + +#define FASTTRAP_NOREG 0xff + +/* + * Map between instruction register encodings and the kernel constants which + * correspond to indicies into struct regs. + */ +#ifdef __amd64 +static const uint8_t regmap[16] = { + REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, + REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, +}; +#else +static const uint8_t regmap[8] = { + EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI +}; +#endif + +static ulong_t fasttrap_getreg(struct reg *, uint_t); + +static uint64_t +fasttrap_anarg(struct reg *rp, int function_entry, int argno) +{ + uint64_t value = 0; + int shift = function_entry ? 1 : 0; + +#ifdef __amd64 + if (curproc->p_model == DATAMODEL_LP64) { + uintptr_t *stack; + + /* + * In 64-bit mode, the first six arguments are stored in + * registers. + */ + if (argno < 6) + return ((&rp->r_rdi)[argno]); + + stack = (uintptr_t *)rp->r_rsp; + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + value = dtrace_fulword(&stack[argno - 6 + shift]); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); + } else { +#endif +#ifdef __i386 + uint32_t *stack = (uint32_t *)rp->r_esp; + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + value = dtrace_fuword32(&stack[argno + shift]); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); +#endif +#ifdef __amd64 + } +#endif + + return (value); +} + +/*ARGSUSED*/ +int +fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, + fasttrap_probe_type_t type) +{ + uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10]; + size_t len = FASTTRAP_MAX_INSTR_SIZE; + size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET)); + uint_t start = 0; + int rmindex, size; + uint8_t seg, rex = 0; + + /* + * Read the instruction at the given address out of the process's + * address space. We don't have to worry about a debugger + * changing this instruction before we overwrite it with our trap + * instruction since P_PR_LOCK is set. Since instructions can span + * pages, we potentially read the instruction in two parts. If the + * second part fails, we just zero out that part of the instruction. + */ + if (uread(p, &instr[0], first, pc) != 0) + return (-1); + if (len > first && + uread(p, &instr[first], len - first, pc + first) != 0) { + bzero(&instr[first], len - first); + len = first; + } + + /* + * If the disassembly fails, then we have a malformed instruction. + */ + if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0) + return (-1); + + /* + * Make sure the disassembler isn't completely broken. + */ + ASSERT(-1 <= rmindex && rmindex < size); + + /* + * If the computed size is greater than the number of bytes read, + * then it was a malformed instruction possibly because it fell on a + * page boundary and the subsequent page was missing or because of + * some malicious user. + */ + if (size > len) + return (-1); + + tp->ftt_size = (uint8_t)size; + tp->ftt_segment = FASTTRAP_SEG_NONE; + + /* + * Find the start of the instruction's opcode by processing any + * legacy prefixes. + */ + for (;;) { + seg = 0; + switch (instr[start]) { + case FASTTRAP_PREFIX_SS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_GS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_FS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_ES: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_DS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_CS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_OPERAND: + case FASTTRAP_PREFIX_ADDRESS: + case FASTTRAP_PREFIX_LOCK: + case FASTTRAP_PREFIX_REP: + case FASTTRAP_PREFIX_REPNE: + if (seg != 0) { + /* + * It's illegal for an instruction to specify + * two segment prefixes -- give up on this + * illegal instruction. + */ + if (tp->ftt_segment != FASTTRAP_SEG_NONE) + return (-1); + + tp->ftt_segment = seg; + } + start++; + continue; + } + break; + } + +#ifdef __amd64 + /* + * Identify the REX prefix on 64-bit processes. + */ + if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40) + rex = instr[start++]; +#endif + + /* + * Now that we're pretty sure that the instruction is okay, copy the + * valid part to the tracepoint. + */ + bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE); + + tp->ftt_type = FASTTRAP_T_COMMON; + if (instr[start] == FASTTRAP_2_BYTE_OP) { + switch (instr[start + 1]) { + case FASTTRAP_0F_JO: + case FASTTRAP_0F_JNO: + case FASTTRAP_0F_JB: + case FASTTRAP_0F_JAE: + case FASTTRAP_0F_JE: + case FASTTRAP_0F_JNE: + case FASTTRAP_0F_JBE: + case FASTTRAP_0F_JA: + case FASTTRAP_0F_JS: + case FASTTRAP_0F_JNS: + case FASTTRAP_0F_JP: + case FASTTRAP_0F_JNP: + case FASTTRAP_0F_JL: + case FASTTRAP_0F_JGE: + case FASTTRAP_0F_JLE: + case FASTTRAP_0F_JG: + tp->ftt_type = FASTTRAP_T_JCC; + tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO; + tp->ftt_dest = pc + tp->ftt_size + + /* LINTED - alignment */ + *(int32_t *)&instr[start + 2]; + break; + } + } else if (instr[start] == FASTTRAP_GROUP5_OP) { + uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]); + uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]); + uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]); + + if (reg == 2 || reg == 4) { + uint_t i, sz; + + if (reg == 2) + tp->ftt_type = FASTTRAP_T_CALL; + else + tp->ftt_type = FASTTRAP_T_JMP; + + if (mod == 3) + tp->ftt_code = 2; + else + tp->ftt_code = 1; + + ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); + + /* + * See AMD x86-64 Architecture Programmer's Manual + * Volume 3, Section 1.2.7, Table 1-12, and + * Appendix A.3.1, Table A-15. + */ + if (mod != 3 && rm == 4) { + uint8_t sib = instr[start + 2]; + uint_t index = FASTTRAP_SIB_INDEX(sib); + uint_t base = FASTTRAP_SIB_BASE(sib); + + tp->ftt_scale = FASTTRAP_SIB_SCALE(sib); + + tp->ftt_index = (index == 4) ? + FASTTRAP_NOREG : + regmap[index | (FASTTRAP_REX_X(rex) << 3)]; + tp->ftt_base = (mod == 0 && base == 5) ? + FASTTRAP_NOREG : + regmap[base | (FASTTRAP_REX_B(rex) << 3)]; + + i = 3; + sz = mod == 1 ? 1 : 4; + } else { + /* + * In 64-bit mode, mod == 0 and r/m == 5 + * denotes %rip-relative addressing; in 32-bit + * mode, the base register isn't used. In both + * modes, there is a 32-bit operand. + */ + if (mod == 0 && rm == 5) { +#ifdef __amd64 + if (p->p_model == DATAMODEL_LP64) + tp->ftt_base = REG_RIP; + else +#endif + tp->ftt_base = FASTTRAP_NOREG; + sz = 4; + } else { + uint8_t base = rm | + (FASTTRAP_REX_B(rex) << 3); + + tp->ftt_base = regmap[base]; + sz = mod == 1 ? 1 : mod == 2 ? 4 : 0; + } + tp->ftt_index = FASTTRAP_NOREG; + i = 2; + } + + if (sz == 1) { + tp->ftt_dest = *(int8_t *)&instr[start + i]; + } else if (sz == 4) { + /* LINTED - alignment */ + tp->ftt_dest = *(int32_t *)&instr[start + i]; + } else { + tp->ftt_dest = 0; + } + } + } else { + switch (instr[start]) { + case FASTTRAP_RET: + tp->ftt_type = FASTTRAP_T_RET; + break; + + case FASTTRAP_RET16: + tp->ftt_type = FASTTRAP_T_RET16; + /* LINTED - alignment */ + tp->ftt_dest = *(uint16_t *)&instr[start + 1]; + break; + + case FASTTRAP_JO: + case FASTTRAP_JNO: + case FASTTRAP_JB: + case FASTTRAP_JAE: + case FASTTRAP_JE: + case FASTTRAP_JNE: + case FASTTRAP_JBE: + case FASTTRAP_JA: + case FASTTRAP_JS: + case FASTTRAP_JNS: + case FASTTRAP_JP: + case FASTTRAP_JNP: + case FASTTRAP_JL: + case FASTTRAP_JGE: + case FASTTRAP_JLE: + case FASTTRAP_JG: + tp->ftt_type = FASTTRAP_T_JCC; + tp->ftt_code = instr[start]; + tp->ftt_dest = pc + tp->ftt_size + + (int8_t)instr[start + 1]; + break; + + case FASTTRAP_LOOPNZ: + case FASTTRAP_LOOPZ: + case FASTTRAP_LOOP: + tp->ftt_type = FASTTRAP_T_LOOP; + tp->ftt_code = instr[start]; + tp->ftt_dest = pc + tp->ftt_size + + (int8_t)instr[start + 1]; + break; + + case FASTTRAP_JCXZ: + tp->ftt_type = FASTTRAP_T_JCXZ; + tp->ftt_dest = pc + tp->ftt_size + + (int8_t)instr[start + 1]; + break; + + case FASTTRAP_CALL: + tp->ftt_type = FASTTRAP_T_CALL; + tp->ftt_dest = pc + tp->ftt_size + + /* LINTED - alignment */ + *(int32_t *)&instr[start + 1]; + tp->ftt_code = 0; + break; + + case FASTTRAP_JMP32: + tp->ftt_type = FASTTRAP_T_JMP; + tp->ftt_dest = pc + tp->ftt_size + + /* LINTED - alignment */ + *(int32_t *)&instr[start + 1]; + break; + case FASTTRAP_JMP8: + tp->ftt_type = FASTTRAP_T_JMP; + tp->ftt_dest = pc + tp->ftt_size + + (int8_t)instr[start + 1]; + break; + + case FASTTRAP_PUSHL_EBP: + if (start == 0) + tp->ftt_type = FASTTRAP_T_PUSHL_EBP; + break; + + case FASTTRAP_NOP: +#ifdef __amd64 + ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); + + /* + * On amd64 we have to be careful not to confuse a nop + * (actually xchgl %eax, %eax) with an instruction using + * the same opcode, but that does something different + * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax). + */ + if (FASTTRAP_REX_B(rex) == 0) +#endif + tp->ftt_type = FASTTRAP_T_NOP; + break; + + case FASTTRAP_INT3: + /* + * The pid provider shares the int3 trap with debugger + * breakpoints so we can't instrument them. + */ + ASSERT(instr[start] == FASTTRAP_INSTR); + return (-1); + + case FASTTRAP_INT: + /* + * Interrupts seem like they could be traced with + * no negative implications, but it's possible that + * a thread could be redirected by the trap handling + * code which would eventually return to the + * instruction after the interrupt. If the interrupt + * were in our scratch space, the subsequent + * instruction might be overwritten before we return. + * Accordingly we refuse to instrument any interrupt. + */ + return (-1); + } + } + +#ifdef __amd64 + if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) { + /* + * If the process is 64-bit and the instruction type is still + * FASTTRAP_T_COMMON -- meaning we're going to copy it out an + * execute it -- we need to watch for %rip-relative + * addressing mode. See the portion of fasttrap_pid_probe() + * below where we handle tracepoints with type + * FASTTRAP_T_COMMON for how we emulate instructions that + * employ %rip-relative addressing. + */ + if (rmindex != -1) { + uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]); + uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]); + uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]); + + ASSERT(rmindex > start); + + if (mod == 0 && rm == 5) { + /* + * We need to be sure to avoid other + * registers used by this instruction. While + * the reg field may determine the op code + * rather than denoting a register, assuming + * that it denotes a register is always safe. + * We leave the REX field intact and use + * whatever value's there for simplicity. + */ + if (reg != 0) { + tp->ftt_ripmode = FASTTRAP_RIP_1 | + (FASTTRAP_RIP_X * + FASTTRAP_REX_B(rex)); + rm = 0; + } else { + tp->ftt_ripmode = FASTTRAP_RIP_2 | + (FASTTRAP_RIP_X * + FASTTRAP_REX_B(rex)); + rm = 1; + } + + tp->ftt_modrm = tp->ftt_instr[rmindex]; + tp->ftt_instr[rmindex] = + FASTTRAP_MODRM(2, reg, rm); + } + } + } +#endif + + return (0); +} + +int +fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) +{ + fasttrap_instr_t instr = FASTTRAP_INSTR; + + if (uwrite(p, &instr, 1, tp->ftt_pc) != 0) + return (-1); + + return (0); +} + +int +fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) +{ + uint8_t instr; + + /* + * Distinguish between read or write failures and a changed + * instruction. + */ + if (uread(p, &instr, 1, tp->ftt_pc) != 0) + return (0); + if (instr != FASTTRAP_INSTR) + return (0); + if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0) + return (-1); + + return (0); +} + +#ifdef __amd64 +static uintptr_t +fasttrap_fulword_noerr(const void *uaddr) +{ + uintptr_t ret; + + if ((ret = fasttrap_fulword(uaddr)) != -1) + return (ret); + + return (0); +} +#endif + +#ifdef __i386__ +static uint32_t +fasttrap_fuword32_noerr(const void *uaddr) +{ + uint32_t ret; + + if ((ret = fasttrap_fuword32(uaddr)) != -1) + return (ret); + + return (0); +} +#endif + +static void +fasttrap_return_common(struct reg *rp, uintptr_t pc, pid_t pid, + uintptr_t new_pc) +{ + fasttrap_tracepoint_t *tp; + fasttrap_bucket_t *bucket; + fasttrap_id_t *id; +#if defined(sun) + kmutex_t *pid_mtx; +#endif + +#if defined(sun) + pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; + mutex_enter(pid_mtx); +#endif + bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + + for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { + if (pid == tp->ftt_pid && pc == tp->ftt_pc && + tp->ftt_proc->ftpc_acount != 0) + break; + } + + /* + * Don't sweat it if we can't find the tracepoint again; unlike + * when we're in fasttrap_pid_probe(), finding the tracepoint here + * is not essential to the correct execution of the process. + */ + if (tp == NULL) { +#if defined(sun) + mutex_exit(pid_mtx); +#endif + return; + } + + for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { + /* + * If there's a branch that could act as a return site, we + * need to trace it, and check here if the program counter is + * external to the function. + */ + if (tp->ftt_type != FASTTRAP_T_RET && + tp->ftt_type != FASTTRAP_T_RET16 && + new_pc - id->fti_probe->ftp_faddr < + id->fti_probe->ftp_fsize) + continue; + + dtrace_probe(id->fti_probe->ftp_id, + pc - id->fti_probe->ftp_faddr, + rp->r_rax, rp->r_rbx, 0, 0); + } + +#if defined(sun) + mutex_exit(pid_mtx); +#endif +} + +static void +fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr) +{ +#if defined(sun) + sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); + + sqp->sq_info.si_signo = SIGSEGV; + sqp->sq_info.si_code = SEGV_MAPERR; + sqp->sq_info.si_addr = (caddr_t)addr; + + mutex_enter(&p->p_lock); + sigaddqa(p, t, sqp); + mutex_exit(&p->p_lock); + + if (t != NULL) + aston(t); +#else + ksiginfo_t *ksi = kmem_zalloc(sizeof (ksiginfo_t), KM_SLEEP); + + ksiginfo_init(ksi); + ksi->ksi_signo = SIGSEGV; + ksi->ksi_code = SEGV_MAPERR; + ksi->ksi_addr = (caddr_t)addr; + (void) tdksignal(t, SIGSEGV, ksi); +#endif +} + +#ifdef __amd64 +static void +fasttrap_usdt_args64(fasttrap_probe_t *probe, struct reg *rp, int argc, + uintptr_t *argv) +{ + int i, x, cap = MIN(argc, probe->ftp_nargs); + uintptr_t *stack = (uintptr_t *)rp->r_rsp; + + for (i = 0; i < cap; i++) { + x = probe->ftp_argmap[i]; + + if (x < 6) + argv[i] = (&rp->r_rdi)[x]; + else + argv[i] = fasttrap_fulword_noerr(&stack[x]); + } + + for (; i < argc; i++) { + argv[i] = 0; + } +} +#endif + +#ifdef __i386__ +static void +fasttrap_usdt_args32(fasttrap_probe_t *probe, struct reg *rp, int argc, + uint32_t *argv) +{ + int i, x, cap = MIN(argc, probe->ftp_nargs); + uint32_t *stack = (uint32_t *)rp->r_rsp; + + for (i = 0; i < cap; i++) { + x = probe->ftp_argmap[i]; + + argv[i] = fasttrap_fuword32_noerr(&stack[x]); + } + + for (; i < argc; i++) { + argv[i] = 0; + } +} +#endif + +static int +fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct reg *rp, uintptr_t *addr) +{ + proc_t *p = curproc; +#ifdef __i386__ + struct segment_descriptor *desc; +#else + struct user_segment_descriptor *desc; +#endif + uint16_t sel = 0, ndx, type; + uintptr_t limit; + + switch (tp->ftt_segment) { + case FASTTRAP_SEG_CS: + sel = rp->r_cs; + break; + case FASTTRAP_SEG_DS: + sel = rp->r_ds; + break; + case FASTTRAP_SEG_ES: + sel = rp->r_es; + break; + case FASTTRAP_SEG_FS: + sel = rp->r_fs; + break; + case FASTTRAP_SEG_GS: + sel = rp->r_gs; + break; + case FASTTRAP_SEG_SS: + sel = rp->r_ss; + break; + } + + /* + * Make sure the given segment register specifies a user priority + * selector rather than a kernel selector. + */ + if (ISPL(sel) != SEL_UPL) + return (-1); + + ndx = IDXSEL(sel); + + /* + * Check the bounds and grab the descriptor out of the specified + * descriptor table. + */ + if (ISLDT(sel)) { +#ifdef __i386__ + if (ndx > p->p_md.md_ldt->ldt_len) + return (-1); + + desc = (struct segment_descriptor *) + p->p_md.md_ldt[ndx].ldt_base; +#else + if (ndx > max_ldt_segment) + return (-1); + + desc = (struct user_segment_descriptor *) + p->p_md.md_ldt[ndx].ldt_base; +#endif + + } else { + if (ndx >= NGDT) + return (-1); + +#ifdef __i386__ + desc = &gdt[ndx].sd; +#else + desc = &gdt[ndx]; +#endif + } + + /* + * The descriptor must have user privilege level and it must be + * present in memory. + */ + if (desc->sd_dpl != SEL_UPL || desc->sd_p != 1) + return (-1); + + type = desc->sd_type; + + /* + * If the S bit in the type field is not set, this descriptor can + * only be used in system context. + */ + if ((type & 0x10) != 0x10) + return (-1); + + limit = USD_GETLIMIT(desc) * (desc->sd_gran ? PAGESIZE : 1); + + if (tp->ftt_segment == FASTTRAP_SEG_CS) { + /* + * The code/data bit and readable bit must both be set. + */ + if ((type & 0xa) != 0xa) + return (-1); + + if (*addr > limit) + return (-1); + } else { + /* + * The code/data bit must be clear. + */ + if ((type & 0x8) != 0) + return (-1); + + /* + * If the expand-down bit is clear, we just check the limit as + * it would naturally be applied. Otherwise, we need to check + * that the address is the range [limit + 1 .. 0xffff] or + * [limit + 1 ... 0xffffffff] depending on if the default + * operand size bit is set. + */ + if ((type & 0x4) == 0) { + if (*addr > limit) + return (-1); + } else if (desc->sd_def32) { + if (*addr < limit + 1 || 0xffff < *addr) + return (-1); + } else { + if (*addr < limit + 1 || 0xffffffff < *addr) + return (-1); + } + } + + *addr += USD_GETBASE(desc); + + return (0); +} + +int +fasttrap_pid_probe(struct reg *rp) +{ + proc_t *p = curproc; + uintptr_t pc = rp->r_rip - 1; + uintptr_t new_pc = 0; + fasttrap_bucket_t *bucket; +#if defined(sun) + kmutex_t *pid_mtx; +#endif + fasttrap_tracepoint_t *tp, tp_local; + pid_t pid; + dtrace_icookie_t cookie; + uint_t is_enabled = 0; + + /* + * It's possible that a user (in a veritable orgy of bad planning) + * could redirect this thread's flow of control before it reached the + * return probe fasttrap. In this case we need to kill the process + * since it's in a unrecoverable state. + */ + if (curthread->t_dtrace_step) { + ASSERT(curthread->t_dtrace_on); + fasttrap_sigtrap(p, curthread, pc); + return (0); + } + + /* + * Clear all user tracing flags. + */ + curthread->t_dtrace_ft = 0; + curthread->t_dtrace_pc = 0; + curthread->t_dtrace_npc = 0; + curthread->t_dtrace_scrpc = 0; + curthread->t_dtrace_astpc = 0; +#ifdef __amd64 + curthread->t_dtrace_regv = 0; +#endif + +#if defined(sun) + /* + * Treat a child created by a call to vfork(2) as if it were its + * parent. We know that there's only one thread of control in such a + * process: this one. + */ + while (p->p_flag & SVFORK) { + p = p->p_parent; + } +#endif + + PROC_LOCK(p); + pid = p->p_pid; +#if defined(sun) + pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; + mutex_enter(pid_mtx); +#endif + bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + + /* + * Lookup the tracepoint that the process just hit. + */ + for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { + if (pid == tp->ftt_pid && pc == tp->ftt_pc && + tp->ftt_proc->ftpc_acount != 0) + break; + } + + /* + * If we couldn't find a matching tracepoint, either a tracepoint has + * been inserted without using the pid ioctl interface (see + * fasttrap_ioctl), or somehow we have mislaid this tracepoint. + */ + if (tp == NULL) { +#if defined(sun) + mutex_exit(pid_mtx); +#endif + PROC_UNLOCK(p); + return (-1); + } + + /* + * Set the program counter to the address of the traced instruction + * so that it looks right in ustack() output. + */ + rp->r_rip = pc; + + if (tp->ftt_ids != NULL) { + fasttrap_id_t *id; + +#ifdef __amd64 + if (p->p_model == DATAMODEL_LP64) { + for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { + fasttrap_probe_t *probe = id->fti_probe; + + if (id->fti_ptype == DTFTP_ENTRY) { + /* + * We note that this was an entry + * probe to help ustack() find the + * first caller. + */ + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); + dtrace_probe(probe->ftp_id, rp->r_rdi, + rp->r_rsi, rp->r_rdx, rp->r_rcx, + rp->r_r8); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); + dtrace_interrupt_enable(cookie); + } else if (id->fti_ptype == DTFTP_IS_ENABLED) { + /* + * Note that in this case, we don't + * call dtrace_probe() since it's only + * an artificial probe meant to change + * the flow of control so that it + * encounters the true probe. + */ + is_enabled = 1; + } else if (probe->ftp_argmap == NULL) { + dtrace_probe(probe->ftp_id, rp->r_rdi, + rp->r_rsi, rp->r_rdx, rp->r_rcx, + rp->r_r8); + } else { + uintptr_t t[5]; + + fasttrap_usdt_args64(probe, rp, + sizeof (t) / sizeof (t[0]), t); + + dtrace_probe(probe->ftp_id, t[0], t[1], + t[2], t[3], t[4]); + } + } + } else { +#else /* __amd64 */ + uintptr_t s0, s1, s2, s3, s4, s5; + uint32_t *stack = (uint32_t *)rp->r_esp; + + /* + * In 32-bit mode, all arguments are passed on the + * stack. If this is a function entry probe, we need + * to skip the first entry on the stack as it + * represents the return address rather than a + * parameter to the function. + */ + s0 = fasttrap_fuword32_noerr(&stack[0]); + s1 = fasttrap_fuword32_noerr(&stack[1]); + s2 = fasttrap_fuword32_noerr(&stack[2]); + s3 = fasttrap_fuword32_noerr(&stack[3]); + s4 = fasttrap_fuword32_noerr(&stack[4]); + s5 = fasttrap_fuword32_noerr(&stack[5]); + + for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { + fasttrap_probe_t *probe = id->fti_probe; + + if (id->fti_ptype == DTFTP_ENTRY) { + /* + * We note that this was an entry + * probe to help ustack() find the + * first caller. + */ + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); + dtrace_probe(probe->ftp_id, s1, s2, + s3, s4, s5); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); + dtrace_interrupt_enable(cookie); + } else if (id->fti_ptype == DTFTP_IS_ENABLED) { + /* + * Note that in this case, we don't + * call dtrace_probe() since it's only + * an artificial probe meant to change + * the flow of control so that it + * encounters the true probe. + */ + is_enabled = 1; + } else if (probe->ftp_argmap == NULL) { + dtrace_probe(probe->ftp_id, s0, s1, + s2, s3, s4); + } else { + uint32_t t[5]; + + fasttrap_usdt_args32(probe, rp, + sizeof (t) / sizeof (t[0]), t); + + dtrace_probe(probe->ftp_id, t[0], t[1], + t[2], t[3], t[4]); + } + } +#endif /* __amd64 */ +#ifdef __amd64 + } +#endif + } + + /* + * We're about to do a bunch of work so we cache a local copy of + * the tracepoint to emulate the instruction, and then find the + * tracepoint again later if we need to light up any return probes. + */ + tp_local = *tp; + PROC_UNLOCK(p); +#if defined(sun) + mutex_exit(pid_mtx); +#endif + tp = &tp_local; + + /* + * Set the program counter to appear as though the traced instruction + * had completely executed. This ensures that fasttrap_getreg() will + * report the expected value for REG_RIP. + */ + rp->r_rip = pc + tp->ftt_size; + + /* + * If there's an is-enabled probe connected to this tracepoint it + * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax' + * instruction that was placed there by DTrace when the binary was + * linked. As this probe is, in fact, enabled, we need to stuff 1 + * into %eax or %rax. Accordingly, we can bypass all the instruction + * emulation logic since we know the inevitable result. It's possible + * that a user could construct a scenario where the 'is-enabled' + * probe was on some other instruction, but that would be a rather + * exotic way to shoot oneself in the foot. + */ + if (is_enabled) { + rp->r_rax = 1; + new_pc = rp->r_rip; + goto done; + } + + /* + * We emulate certain types of instructions to ensure correctness + * (in the case of position dependent instructions) or optimize + * common cases. The rest we have the thread execute back in user- + * land. + */ + switch (tp->ftt_type) { + case FASTTRAP_T_RET: + case FASTTRAP_T_RET16: + { + uintptr_t dst = 0; + uintptr_t addr = 0; + int ret = 0; + + /* + * We have to emulate _every_ facet of the behavior of a ret + * instruction including what happens if the load from %esp + * fails; in that case, we send a SIGSEGV. + */ +#ifdef __amd64 + if (p->p_model == DATAMODEL_NATIVE) { + ret = dst = fasttrap_fulword((void *)rp->r_rsp); + addr = rp->r_rsp + sizeof (uintptr_t); + } else { +#endif +#ifdef __i386__ + uint32_t dst32; + ret = dst32 = fasttrap_fuword32((void *)rp->r_esp); + dst = dst32; + addr = rp->r_esp + sizeof (uint32_t); +#endif +#ifdef __amd64 + } +#endif + + if (ret == -1) { + fasttrap_sigsegv(p, curthread, rp->r_rsp); + new_pc = pc; + break; + } + + if (tp->ftt_type == FASTTRAP_T_RET16) + addr += tp->ftt_dest; + + rp->r_rsp = addr; + new_pc = dst; + break; + } + + case FASTTRAP_T_JCC: + { + uint_t taken = 0; + + switch (tp->ftt_code) { + case FASTTRAP_JO: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) != 0; + break; + case FASTTRAP_JNO: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0; + break; + case FASTTRAP_JB: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0; + break; + case FASTTRAP_JAE: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0; + break; + case FASTTRAP_JE: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0; + break; + case FASTTRAP_JNE: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0; + break; + case FASTTRAP_JBE: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0 || + (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0; + break; + case FASTTRAP_JA: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0 && + (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0; + break; + case FASTTRAP_JS: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) != 0; + break; + case FASTTRAP_JNS: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0; + break; + case FASTTRAP_JP: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) != 0; + break; + case FASTTRAP_JNP: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) == 0; + break; + case FASTTRAP_JL: + taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) != + ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); + break; + case FASTTRAP_JGE: + taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) == + ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); + break; + case FASTTRAP_JLE: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 || + ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) != + ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); + break; + case FASTTRAP_JG: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 && + ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) == + ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); + break; + + } + + if (taken) + new_pc = tp->ftt_dest; + else + new_pc = pc + tp->ftt_size; + break; + } + + case FASTTRAP_T_LOOP: + { + uint_t taken = 0; +#ifdef __amd64 + greg_t cx = rp->r_rcx--; +#else + greg_t cx = rp->r_ecx--; +#endif + + switch (tp->ftt_code) { + case FASTTRAP_LOOPNZ: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 && + cx != 0; + break; + case FASTTRAP_LOOPZ: + taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 && + cx != 0; + break; + case FASTTRAP_LOOP: + taken = (cx != 0); + break; + } + + if (taken) + new_pc = tp->ftt_dest; + else + new_pc = pc + tp->ftt_size; + break; + } + + case FASTTRAP_T_JCXZ: + { +#ifdef __amd64 + greg_t cx = rp->r_rcx; +#else + greg_t cx = rp->r_ecx; +#endif + + if (cx == 0) + new_pc = tp->ftt_dest; + else + new_pc = pc + tp->ftt_size; + break; + } + + case FASTTRAP_T_PUSHL_EBP: + { + int ret = 0; + uintptr_t addr = 0; +#ifdef __amd64 + if (p->p_model == DATAMODEL_NATIVE) { + addr = rp->r_rsp - sizeof (uintptr_t); + ret = fasttrap_sulword((void *)addr, &rp->r_rsp); + } else { +#endif +#ifdef __i386__ + addr = rp->r_rsp - sizeof (uint32_t); + ret = fasttrap_suword32((void *)addr, &rp->r_rsp); +#endif +#ifdef __amd64 + } +#endif + + if (ret == -1) { + fasttrap_sigsegv(p, curthread, addr); + new_pc = pc; + break; + } + + rp->r_rsp = addr; + new_pc = pc + tp->ftt_size; + break; + } + + case FASTTRAP_T_NOP: + new_pc = pc + tp->ftt_size; + break; + + case FASTTRAP_T_JMP: + case FASTTRAP_T_CALL: + if (tp->ftt_code == 0) { + new_pc = tp->ftt_dest; + } else { +#ifdef __amd64 + uintptr_t value; +#endif + uintptr_t addr = tp->ftt_dest; + + if (tp->ftt_base != FASTTRAP_NOREG) + addr += fasttrap_getreg(rp, tp->ftt_base); + if (tp->ftt_index != FASTTRAP_NOREG) + addr += fasttrap_getreg(rp, tp->ftt_index) << + tp->ftt_scale; + + if (tp->ftt_code == 1) { + /* + * If there's a segment prefix for this + * instruction, we'll need to check permissions + * and bounds on the given selector, and adjust + * the address accordingly. + */ + if (tp->ftt_segment != FASTTRAP_SEG_NONE && + fasttrap_do_seg(tp, rp, &addr) != 0) { + fasttrap_sigsegv(p, curthread, addr); + new_pc = pc; + break; + } + +#ifdef __amd64 + if (p->p_model == DATAMODEL_NATIVE) { + if ((value = fasttrap_fulword((void *)addr)) + == -1) { + fasttrap_sigsegv(p, curthread, + addr); + new_pc = pc; + break; + } + new_pc = value; + } else { +#endif +#ifdef __i386__ + uint32_t value32; + addr = (uintptr_t)(uint32_t)addr; + if ((value32 = fasttrap_fuword32((void *)addr)) + == -1) { + fasttrap_sigsegv(p, curthread, + addr); + new_pc = pc; + break; + } + new_pc = value32; +#endif + } +#ifdef __amd64 + } else { + new_pc = addr; + } +#endif + } + + /* + * If this is a call instruction, we need to push the return + * address onto the stack. If this fails, we send the process + * a SIGSEGV and reset the pc to emulate what would happen if + * this instruction weren't traced. + */ + if (tp->ftt_type == FASTTRAP_T_CALL) { + int ret = 0; + uintptr_t addr = 0, pcps; +#ifdef __amd64 + if (p->p_model == DATAMODEL_NATIVE) { + addr = rp->r_rsp - sizeof (uintptr_t); + pcps = pc + tp->ftt_size; + ret = fasttrap_sulword((void *)addr, &pcps); + } else { +#endif +#ifdef __i386__ + addr = rp->r_rsp - sizeof (uint32_t); + pcps = (uint32_t)(pc + tp->ftt_size); + ret = fasttrap_suword32((void *)addr, &pcps); +#endif +#ifdef __amd64 + } +#endif + + if (ret == -1) { + fasttrap_sigsegv(p, curthread, addr); + new_pc = pc; + break; + } + + rp->r_rsp = addr; + } + + break; + + case FASTTRAP_T_COMMON: + { + uintptr_t addr; +#if defined(__amd64) + uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22]; +#else + uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7]; +#endif + uint_t i = 0; +#if defined(sun) + klwp_t *lwp = ttolwp(curthread); +#endif + + /* + * Compute the address of the ulwp_t and step over the + * ul_self pointer. The method used to store the user-land + * thread pointer is very different on 32- and 64-bit + * kernels. + */ +#if defined(sun) +#if defined(__amd64) + if (p->p_model == DATAMODEL_LP64) { + addr = lwp->lwp_pcb.pcb_fsbase; + addr += sizeof (void *); + } else { + addr = lwp->lwp_pcb.pcb_gsbase; + addr += sizeof (caddr32_t); + } +#else + addr = USD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc); + addr += sizeof (void *); +#endif +#endif /* sun */ +#ifdef __i386__ + addr = USD_GETBASE(&curthread->td_pcb->pcb_gsd); +#else + addr = curthread->td_pcb->pcb_gsbase; +#endif + addr += sizeof (void *); + + /* + * Generic Instruction Tracing + * --------------------------- + * + * This is the layout of the scratch space in the user-land + * thread structure for our generated instructions. + * + * 32-bit mode bytes + * ------------------------ ----- + * a: <= 15 + * jmp ftt_size> 5 + * b: <= 15 + * int T_DTRACE_RET 2 + * ----- + * <= 37 + * + * 64-bit mode bytes + * ------------------------ ----- + * a: <= 15 + * jmp 0(%rip) 6 + * ftt_size> 8 + * b: <= 15 + * int T_DTRACE_RET 2 + * ----- + * <= 46 + * + * The %pc is set to a, and curthread->t_dtrace_astpc is set + * to b. If we encounter a signal on the way out of the + * kernel, trap() will set %pc to curthread->t_dtrace_astpc + * so that we execute the original instruction and re-enter + * the kernel rather than redirecting to the next instruction. + * + * If there are return probes (so we know that we're going to + * need to reenter the kernel after executing the original + * instruction), the scratch space will just contain the + * original instruction followed by an interrupt -- the same + * data as at b. + * + * %rip-relative Addressing + * ------------------------ + * + * There's a further complication in 64-bit mode due to %rip- + * relative addressing. While this is clearly a beneficial + * architectural decision for position independent code, it's + * hard not to see it as a personal attack against the pid + * provider since before there was a relatively small set of + * instructions to emulate; with %rip-relative addressing, + * almost every instruction can potentially depend on the + * address at which it's executed. Rather than emulating + * the broad spectrum of instructions that can now be + * position dependent, we emulate jumps and others as in + * 32-bit mode, and take a different tack for instructions + * using %rip-relative addressing. + * + * For every instruction that uses the ModRM byte, the + * in-kernel disassembler reports its location. We use the + * ModRM byte to identify that an instruction uses + * %rip-relative addressing and to see what other registers + * the instruction uses. To emulate those instructions, + * we modify the instruction to be %rax-relative rather than + * %rip-relative (or %rcx-relative if the instruction uses + * %rax; or %r8- or %r9-relative if the REX.B is present so + * we don't have to rewrite the REX prefix). We then load + * the value that %rip would have been into the scratch + * register and generate an instruction to reset the scratch + * register back to its original value. The instruction + * sequence looks like this: + * + * 64-mode %rip-relative bytes + * ------------------------ ----- + * a: <= 15 + * movq $, % 6 + * jmp 0(%rip) 6 + * ftt_size> 8 + * b: <= 15 + * int T_DTRACE_RET 2 + * ----- + * 52 + * + * We set curthread->t_dtrace_regv so that upon receiving + * a signal we can reset the value of the scratch register. + */ + + ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE); + + curthread->t_dtrace_scrpc = addr; + bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); + i += tp->ftt_size; + +#ifdef __amd64 + if (tp->ftt_ripmode != 0) { + greg_t *reg = NULL; + + ASSERT(p->p_model == DATAMODEL_LP64); + ASSERT(tp->ftt_ripmode & + (FASTTRAP_RIP_1 | FASTTRAP_RIP_2)); + + /* + * If this was a %rip-relative instruction, we change + * it to be either a %rax- or %rcx-relative + * instruction (depending on whether those registers + * are used as another operand; or %r8- or %r9- + * relative depending on the value of REX.B). We then + * set that register and generate a movq instruction + * to reset the value. + */ + if (tp->ftt_ripmode & FASTTRAP_RIP_X) + scratch[i++] = FASTTRAP_REX(1, 0, 0, 1); + else + scratch[i++] = FASTTRAP_REX(1, 0, 0, 0); + + if (tp->ftt_ripmode & FASTTRAP_RIP_1) + scratch[i++] = FASTTRAP_MOV_EAX; + else + scratch[i++] = FASTTRAP_MOV_ECX; + + switch (tp->ftt_ripmode) { + case FASTTRAP_RIP_1: + reg = &rp->r_rax; + curthread->t_dtrace_reg = REG_RAX; + break; + case FASTTRAP_RIP_2: + reg = &rp->r_rcx; + curthread->t_dtrace_reg = REG_RCX; + break; + case FASTTRAP_RIP_1 | FASTTRAP_RIP_X: + reg = &rp->r_r8; + curthread->t_dtrace_reg = REG_R8; + break; + case FASTTRAP_RIP_2 | FASTTRAP_RIP_X: + reg = &rp->r_r9; + curthread->t_dtrace_reg = REG_R9; + break; + } + + /* LINTED - alignment */ + *(uint64_t *)&scratch[i] = *reg; + curthread->t_dtrace_regv = *reg; + *reg = pc + tp->ftt_size; + i += sizeof (uint64_t); + } +#endif + + /* + * Generate the branch instruction to what would have + * normally been the subsequent instruction. In 32-bit mode, + * this is just a relative branch; in 64-bit mode this is a + * %rip-relative branch that loads the 64-bit pc value + * immediately after the jmp instruction. + */ +#ifdef __amd64 + if (p->p_model == DATAMODEL_LP64) { + scratch[i++] = FASTTRAP_GROUP5_OP; + scratch[i++] = FASTTRAP_MODRM(0, 4, 5); + /* LINTED - alignment */ + *(uint32_t *)&scratch[i] = 0; + i += sizeof (uint32_t); + /* LINTED - alignment */ + *(uint64_t *)&scratch[i] = pc + tp->ftt_size; + i += sizeof (uint64_t); + } else { +#endif +#ifdef __i386__ + /* + * Set up the jmp to the next instruction; note that + * the size of the traced instruction cancels out. + */ + scratch[i++] = FASTTRAP_JMP32; + /* LINTED - alignment */ + *(uint32_t *)&scratch[i] = pc - addr - 5; + i += sizeof (uint32_t); +#endif +#ifdef __amd64 + } +#endif + + curthread->t_dtrace_astpc = addr + i; + bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); + i += tp->ftt_size; + scratch[i++] = FASTTRAP_INT; + scratch[i++] = T_DTRACE_RET; + + ASSERT(i <= sizeof (scratch)); + + if (fasttrap_copyout(scratch, (char *)addr, i)) { + fasttrap_sigtrap(p, curthread, pc); + new_pc = pc; + break; + } + + if (tp->ftt_retids != NULL) { + curthread->t_dtrace_step = 1; + curthread->t_dtrace_ret = 1; + new_pc = curthread->t_dtrace_astpc; + } else { + new_pc = curthread->t_dtrace_scrpc; + } + + curthread->t_dtrace_pc = pc; + curthread->t_dtrace_npc = pc + tp->ftt_size; + curthread->t_dtrace_on = 1; + break; + } + + default: + panic("fasttrap: mishandled an instruction"); + } + +done: + /* + * If there were no return probes when we first found the tracepoint, + * we should feel no obligation to honor any return probes that were + * subsequently enabled -- they'll just have to wait until the next + * time around. + */ + if (tp->ftt_retids != NULL) { + /* + * We need to wait until the results of the instruction are + * apparent before invoking any return probes. If this + * instruction was emulated we can just call + * fasttrap_return_common(); if it needs to be executed, we + * need to wait until the user thread returns to the kernel. + */ + if (tp->ftt_type != FASTTRAP_T_COMMON) { + /* + * Set the program counter to the address of the traced + * instruction so that it looks right in ustack() + * output. We had previously set it to the end of the + * instruction to simplify %rip-relative addressing. + */ + rp->r_rip = pc; + + fasttrap_return_common(rp, pc, pid, new_pc); + } else { + ASSERT(curthread->t_dtrace_ret != 0); + ASSERT(curthread->t_dtrace_pc == pc); + ASSERT(curthread->t_dtrace_scrpc != 0); + ASSERT(new_pc == curthread->t_dtrace_astpc); + } + } + + rp->r_rip = new_pc; + set_regs(curthread, rp); + + return (0); +} + +int +fasttrap_return_probe(struct reg *rp) +{ + proc_t *p = curproc; + uintptr_t pc = curthread->t_dtrace_pc; + uintptr_t npc = curthread->t_dtrace_npc; + + curthread->t_dtrace_pc = 0; + curthread->t_dtrace_npc = 0; + curthread->t_dtrace_scrpc = 0; + curthread->t_dtrace_astpc = 0; + +#if defined(sun) + /* + * Treat a child created by a call to vfork(2) as if it were its + * parent. We know that there's only one thread of control in such a + * process: this one. + */ + while (p->p_flag & SVFORK) { + p = p->p_parent; + } +#endif + + /* + * We set rp->r_rip to the address of the traced instruction so + * that it appears to dtrace_probe() that we're on the original + * instruction, and so that the user can't easily detect our + * complex web of lies. dtrace_return_probe() (our caller) + * will correctly set %pc after we return. + */ + rp->r_rip = pc; + + fasttrap_return_common(rp, pc, p->p_pid, npc); + + return (0); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, + int aframes) +{ + struct reg r; + + fill_regs(curthread, &r); + + return (fasttrap_anarg(&r, 1, argno)); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, + int aframes) +{ + struct reg r; + + fill_regs(curthread, &r); + + return (fasttrap_anarg(&r, 0, argno)); +} + +static ulong_t +fasttrap_getreg(struct reg *rp, uint_t reg) +{ +#ifdef __amd64 + switch (reg) { + case REG_R15: return (rp->r_r15); + case REG_R14: return (rp->r_r14); + case REG_R13: return (rp->r_r13); + case REG_R12: return (rp->r_r12); + case REG_R11: return (rp->r_r11); + case REG_R10: return (rp->r_r10); + case REG_R9: return (rp->r_r9); + case REG_R8: return (rp->r_r8); + case REG_RDI: return (rp->r_rdi); + case REG_RSI: return (rp->r_rsi); + case REG_RBP: return (rp->r_rbp); + case REG_RBX: return (rp->r_rbx); + case REG_RDX: return (rp->r_rdx); + case REG_RCX: return (rp->r_rcx); + case REG_RAX: return (rp->r_rax); + case REG_TRAPNO: return (rp->r_trapno); + case REG_ERR: return (rp->r_err); + case REG_RIP: return (rp->r_rip); + case REG_CS: return (rp->r_cs); +#if defined(sun) + case REG_RFL: return (rp->r_rfl); +#endif + case REG_RSP: return (rp->r_rsp); + case REG_SS: return (rp->r_ss); + case REG_FS: return (rp->r_fs); + case REG_GS: return (rp->r_gs); + case REG_DS: return (rp->r_ds); + case REG_ES: return (rp->r_es); + case REG_FSBASE: return (rdmsr(MSR_FSBASE)); + case REG_GSBASE: return (rdmsr(MSR_GSBASE)); + } + + panic("dtrace: illegal register constant"); + /*NOTREACHED*/ +#else +#define _NGREG 19 + if (reg >= _NGREG) + panic("dtrace: illegal register constant"); + + return (((greg_t *)&rp->r_gs)[reg]); +#endif +} diff --git a/sys/cddl/contrib/opensolaris/uts/sparc/dtrace/fasttrap_isa.c b/sys/cddl/contrib/opensolaris/uts/sparc/dtrace/fasttrap_isa.c new file mode 100644 index 000000000..45d87478d --- /dev/null +++ b/sys/cddl/contrib/opensolaris/uts/sparc/dtrace/fasttrap_isa.c @@ -0,0 +1,1597 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Lossless User-Land Tracing on SPARC + * ----------------------------------- + * + * The Basic Idea + * + * The most important design constraint is, of course, correct execution of + * the user thread above all else. The next most important goal is rapid + * execution. We combine execution of instructions in user-land with + * emulation of certain instructions in the kernel to aim for complete + * correctness and maximal performance. + * + * We take advantage of the split PC/NPC architecture to speed up logical + * single-stepping; when we copy an instruction out to the scratch space in + * the ulwp_t structure (held in the %g7 register on SPARC), we can + * effectively single step by setting the PC to our scratch space and leaving + * the NPC alone. This executes the replaced instruction and then continues + * on without having to reenter the kernel as with single- stepping. The + * obvious caveat is for instructions whose execution is PC dependant -- + * branches, call and link instructions (call and jmpl), and the rdpc + * instruction. These instructions cannot be executed in the manner described + * so they must be emulated in the kernel. + * + * Emulation for this small set of instructions if fairly simple; the most + * difficult part being emulating branch conditions. + * + * + * A Cache Heavy Portfolio + * + * It's important to note at this time that copying an instruction out to the + * ulwp_t scratch space in user-land is rather complicated. SPARC has + * separate data and instruction caches so any writes to the D$ (using a + * store instruction for example) aren't necessarily reflected in the I$. + * The flush instruction can be used to synchronize the two and must be used + * for any self-modifying code, but the flush instruction only applies to the + * primary address space (the absence of a flusha analogue to the flush + * instruction that accepts an ASI argument is an obvious omission from SPARC + * v9 where the notion of the alternate address space was introduced on + * SPARC). To correctly copy out the instruction we must use a block store + * that doesn't allocate in the D$ and ensures synchronization with the I$; + * see dtrace_blksuword32() for the implementation (this function uses + * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner + * described). Refer to the UltraSPARC I/II manual for details on the + * ASI_BLK_COMMIT_S ASI. + * + * + * Return Subtleties + * + * When we're firing a return probe we need to expose the value returned by + * the function being traced. Since the function can set the return value + * in its last instruction, we need to fire the return probe only _after_ + * the effects of the instruction are apparent. For instructions that we + * emulate, we can call dtrace_probe() after we've performed the emulation; + * for instructions that we execute after we return to user-land, we set + * %pc to the instruction we copied out (as described above) and set %npc + * to a trap instruction stashed in the ulwp_t structure. After the traced + * instruction is executed, the trap instruction returns control to the + * kernel where we can fire the return probe. + * + * This need for a second trap in cases where we execute the traced + * instruction makes it all the more important to emulate the most common + * instructions to avoid the second trip in and out of the kernel. + * + * + * Making it Fast + * + * Since copying out an instruction is neither simple nor inexpensive for the + * CPU, we should attempt to avoid doing it in as many cases as possible. + * Since function entry and return are usually the most interesting probe + * sites, we attempt to tune the performance of the fasttrap provider around + * instructions typically in those places. + * + * Looking at a bunch of functions in libraries and executables reveals that + * most functions begin with either a save or a sethi (to setup a larger + * argument to the save) and end with a restore or an or (in the case of leaf + * functions). To try to improve performance, we emulate all of these + * instructions in the kernel. + * + * The save and restore instructions are a little tricky since they perform + * register window maniplulation. Rather than trying to tinker with the + * register windows from the kernel, we emulate the implicit add that takes + * place as part of those instructions and set the %pc to point to a simple + * save or restore we've hidden in the ulwp_t structure. If we're in a return + * probe so want to make it seem as though the tracepoint has been completely + * executed we need to remember that we've pulled this trick with restore and + * pull registers from the previous window (the one that we'll switch to once + * the simple store instruction is executed) rather than the current one. This + * is why in the case of emulating a restore we set the DTrace CPU flag + * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes + * (see fasttrap_return_common()). + */ + +#define OP(x) ((x) >> 30) +#define OP2(x) (((x) >> 22) & 0x07) +#define OP3(x) (((x) >> 19) & 0x3f) +#define RCOND(x) (((x) >> 25) & 0x07) +#define COND(x) (((x) >> 25) & 0x0f) +#define A(x) (((x) >> 29) & 0x01) +#define I(x) (((x) >> 13) & 0x01) +#define RD(x) (((x) >> 25) & 0x1f) +#define RS1(x) (((x) >> 14) & 0x1f) +#define RS2(x) (((x) >> 0) & 0x1f) +#define CC(x) (((x) >> 20) & 0x03) +#define DISP16(x) ((((x) >> 6) & 0xc000) | ((x) & 0x3fff)) +#define DISP22(x) ((x) & 0x3fffff) +#define DISP19(x) ((x) & 0x7ffff) +#define DISP30(x) ((x) & 0x3fffffff) +#define SW_TRAP(x) ((x) & 0x7f) + +#define OP3_OR 0x02 +#define OP3_RD 0x28 +#define OP3_JMPL 0x38 +#define OP3_RETURN 0x39 +#define OP3_TCC 0x3a +#define OP3_SAVE 0x3c +#define OP3_RESTORE 0x3d + +#define OP3_PREFETCH 0x2d +#define OP3_CASA 0x3c +#define OP3_PREFETCHA 0x3d +#define OP3_CASXA 0x3e + +#define OP2_ILLTRAP 0x0 +#define OP2_BPcc 0x1 +#define OP2_Bicc 0x2 +#define OP2_BPr 0x3 +#define OP2_SETHI 0x4 +#define OP2_FBPfcc 0x5 +#define OP2_FBfcc 0x6 + +#define R_G0 0 +#define R_O0 8 +#define R_SP 14 +#define R_I0 24 +#define R_I1 25 +#define R_I2 26 +#define R_I3 27 +#define R_I4 28 + +/* + * Check the comment in fasttrap.h when changing these offsets or adding + * new instructions. + */ +#define FASTTRAP_OFF_SAVE 64 +#define FASTTRAP_OFF_RESTORE 68 +#define FASTTRAP_OFF_FTRET 72 +#define FASTTRAP_OFF_RETURN 76 + +#define BREAKPOINT_INSTR 0x91d02001 /* ta 1 */ + +/* + * Tunable to let users turn off the fancy save instruction optimization. + * If a program is non-ABI compliant, there's a possibility that the save + * instruction optimization could cause an error. + */ +int fasttrap_optimize_save = 1; + +static uint64_t +fasttrap_anarg(struct regs *rp, int argno) +{ + uint64_t value; + + if (argno < 6) + return ((&rp->r_o0)[argno]); + + if (curproc->p_model == DATAMODEL_NATIVE) { + struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); + + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + value = dtrace_fulword(&fr->fr_argd[argno]); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | + CPU_DTRACE_BADALIGN); + } else { + struct frame32 *fr = (struct frame32 *)rp->r_sp; + + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + value = dtrace_fuword32(&fr->fr_argd[argno]); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | + CPU_DTRACE_BADALIGN); + } + + return (value); +} + +static ulong_t fasttrap_getreg(struct regs *, uint_t); +static void fasttrap_putreg(struct regs *, uint_t, ulong_t); + +static void +fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp, + uint_t fake_restore, int argc, uintptr_t *argv) +{ + int i, x, cap = MIN(argc, probe->ftp_nargs); + int inc = (fake_restore ? 16 : 0); + + /* + * The only way we'll hit the fake_restore case is if a USDT probe is + * invoked as a tail-call. While it wouldn't be incorrect, we can + * avoid a call to fasttrap_getreg(), and safely use rp->r_sp + * directly since a tail-call can't be made if the invoked function + * would use the argument dump space (i.e. if there were more than + * 6 arguments). We take this shortcut because unconditionally rooting + * around for R_FP (R_SP + 16) would be unnecessarily painful. + */ + + if (curproc->p_model == DATAMODEL_NATIVE) { + struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); + uintptr_t v; + + for (i = 0; i < cap; i++) { + x = probe->ftp_argmap[i]; + + if (x < 6) + argv[i] = fasttrap_getreg(rp, R_O0 + x + inc); + else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0) + argv[i] = 0; + } + + } else { + struct frame32 *fr = (struct frame32 *)rp->r_sp; + uint32_t v; + + for (i = 0; i < cap; i++) { + x = probe->ftp_argmap[i]; + + if (x < 6) + argv[i] = fasttrap_getreg(rp, R_O0 + x + inc); + else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0) + argv[i] = 0; + } + } + + for (; i < argc; i++) { + argv[i] = 0; + } +} + +static void +fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid, + uint_t fake_restore) +{ + fasttrap_tracepoint_t *tp; + fasttrap_bucket_t *bucket; + fasttrap_id_t *id; + kmutex_t *pid_mtx; + dtrace_icookie_t cookie; + + pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; + mutex_enter(pid_mtx); + bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + + for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { + if (pid == tp->ftt_pid && pc == tp->ftt_pc && + tp->ftt_proc->ftpc_acount != 0) + break; + } + + /* + * Don't sweat it if we can't find the tracepoint again; unlike + * when we're in fasttrap_pid_probe(), finding the tracepoint here + * is not essential to the correct execution of the process. + */ + if (tp == NULL || tp->ftt_retids == NULL) { + mutex_exit(pid_mtx); + return; + } + + for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { + fasttrap_probe_t *probe = id->fti_probe; + + if (id->fti_ptype == DTFTP_POST_OFFSETS) { + if (probe->ftp_argmap != NULL && fake_restore) { + uintptr_t t[5]; + + fasttrap_usdt_args(probe, rp, fake_restore, + sizeof (t) / sizeof (t[0]), t); + + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); + dtrace_probe(probe->ftp_id, t[0], t[1], + t[2], t[3], t[4]); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); + dtrace_interrupt_enable(cookie); + + } else if (probe->ftp_argmap != NULL) { + uintptr_t t[5]; + + fasttrap_usdt_args(probe, rp, fake_restore, + sizeof (t) / sizeof (t[0]), t); + + dtrace_probe(probe->ftp_id, t[0], t[1], + t[2], t[3], t[4]); + + } else if (fake_restore) { + uintptr_t arg0 = fasttrap_getreg(rp, R_I0); + uintptr_t arg1 = fasttrap_getreg(rp, R_I1); + uintptr_t arg2 = fasttrap_getreg(rp, R_I2); + uintptr_t arg3 = fasttrap_getreg(rp, R_I3); + uintptr_t arg4 = fasttrap_getreg(rp, R_I4); + + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); + dtrace_probe(probe->ftp_id, arg0, arg1, + arg2, arg3, arg4); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); + dtrace_interrupt_enable(cookie); + + } else { + dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, + rp->r_o2, rp->r_o3, rp->r_o4); + } + + continue; + } + + /* + * If this is only a possible return point, we must + * be looking at a potential tail call in leaf context. + * If the %npc is still within this function, then we + * must have misidentified a jmpl as a tail-call when it + * is, in fact, part of a jump table. It would be nice to + * remove this tracepoint, but this is neither the time + * nor the place. + */ + if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) && + rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) + continue; + + /* + * It's possible for a function to branch to the delay slot + * of an instruction that we've identified as a return site. + * We can dectect this spurious return probe activation by + * observing that in this case %npc will be %pc + 4 and %npc + * will be inside the current function (unless the user is + * doing _crazy_ instruction picking in which case there's + * very little we can do). The second check is important + * in case the last instructions of a function make a tail- + * call to the function located immediately subsequent. + */ + if (rp->r_npc == rp->r_pc + 4 && + rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) + continue; + + /* + * The first argument is the offset of return tracepoint + * in the function; the remaining arguments are the return + * values. + * + * If fake_restore is set, we need to pull the return values + * out of the %i's rather than the %o's -- a little trickier. + */ + if (!fake_restore) { + dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, + rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3); + } else { + uintptr_t arg0 = fasttrap_getreg(rp, R_I0); + uintptr_t arg1 = fasttrap_getreg(rp, R_I1); + uintptr_t arg2 = fasttrap_getreg(rp, R_I2); + uintptr_t arg3 = fasttrap_getreg(rp, R_I3); + + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); + dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, + arg0, arg1, arg2, arg3); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); + dtrace_interrupt_enable(cookie); + } + } + + mutex_exit(pid_mtx); +} + +int +fasttrap_pid_probe(struct regs *rp) +{ + proc_t *p = curproc; + fasttrap_tracepoint_t *tp, tp_local; + fasttrap_id_t *id; + pid_t pid; + uintptr_t pc = rp->r_pc; + uintptr_t npc = rp->r_npc; + uintptr_t orig_pc = pc; + fasttrap_bucket_t *bucket; + kmutex_t *pid_mtx; + uint_t fake_restore = 0, is_enabled = 0; + dtrace_icookie_t cookie; + + /* + * It's possible that a user (in a veritable orgy of bad planning) + * could redirect this thread's flow of control before it reached the + * return probe fasttrap. In this case we need to kill the process + * since it's in a unrecoverable state. + */ + if (curthread->t_dtrace_step) { + ASSERT(curthread->t_dtrace_on); + fasttrap_sigtrap(p, curthread, pc); + return (0); + } + + /* + * Clear all user tracing flags. + */ + curthread->t_dtrace_ft = 0; + curthread->t_dtrace_pc = 0; + curthread->t_dtrace_npc = 0; + curthread->t_dtrace_scrpc = 0; + curthread->t_dtrace_astpc = 0; + + /* + * Treat a child created by a call to vfork(2) as if it were its + * parent. We know that there's only one thread of control in such a + * process: this one. + */ + while (p->p_flag & SVFORK) { + p = p->p_parent; + } + + pid = p->p_pid; + pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; + mutex_enter(pid_mtx); + bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + + /* + * Lookup the tracepoint that the process just hit. + */ + for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { + if (pid == tp->ftt_pid && pc == tp->ftt_pc && + tp->ftt_proc->ftpc_acount != 0) + break; + } + + /* + * If we couldn't find a matching tracepoint, either a tracepoint has + * been inserted without using the pid ioctl interface (see + * fasttrap_ioctl), or somehow we have mislaid this tracepoint. + */ + if (tp == NULL) { + mutex_exit(pid_mtx); + return (-1); + } + + for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { + fasttrap_probe_t *probe = id->fti_probe; + int isentry = (id->fti_ptype == DTFTP_ENTRY); + + if (id->fti_ptype == DTFTP_IS_ENABLED) { + is_enabled = 1; + continue; + } + + /* + * We note that this was an entry probe to help ustack() find + * the first caller. + */ + if (isentry) { + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); + } + dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2, + rp->r_o3, rp->r_o4); + if (isentry) { + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); + dtrace_interrupt_enable(cookie); + } + } + + /* + * We're about to do a bunch of work so we cache a local copy of + * the tracepoint to emulate the instruction, and then find the + * tracepoint again later if we need to light up any return probes. + */ + tp_local = *tp; + mutex_exit(pid_mtx); + tp = &tp_local; + + /* + * If there's an is-enabled probe conntected to this tracepoint it + * means that there was a 'mov %g0, %o0' instruction that was placed + * there by DTrace when the binary was linked. As this probe is, in + * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can + * bypass all the instruction emulation logic since we know the + * inevitable result. It's possible that a user could construct a + * scenario where the 'is-enabled' probe was on some other + * instruction, but that would be a rather exotic way to shoot oneself + * in the foot. + */ + if (is_enabled) { + rp->r_o0 = 1; + pc = rp->r_npc; + npc = pc + 4; + goto done; + } + + /* + * We emulate certain types of instructions to ensure correctness + * (in the case of position dependent instructions) or optimize + * common cases. The rest we have the thread execute back in user- + * land. + */ + switch (tp->ftt_type) { + case FASTTRAP_T_SAVE: + { + int32_t imm; + + /* + * This an optimization to let us handle function entry + * probes more efficiently. Many functions begin with a save + * instruction that follows the pattern: + * save %sp, , %sp + * + * Meanwhile, we've stashed the instruction: + * save %g1, %g0, %sp + * + * off of %g7, so all we have to do is stick the right value + * into %g1 and reset %pc to point to the instruction we've + * cleverly hidden (%npc should not be touched). + */ + + imm = tp->ftt_instr << 19; + imm >>= 19; + rp->r_g1 = rp->r_sp + imm; + pc = rp->r_g7 + FASTTRAP_OFF_SAVE; + break; + } + + case FASTTRAP_T_RESTORE: + { + ulong_t value; + uint_t rd; + + /* + * This is an optimization to let us handle function + * return probes more efficiently. Most non-leaf functions + * end with the sequence: + * ret + * restore , , %oX + * + * We've stashed the instruction: + * restore %g0, %g0, %g0 + * + * off of %g7 so we just need to place the correct value + * in the right %i register (since after our fake-o + * restore, the %i's will become the %o's) and set the %pc + * to point to our hidden restore. We also set fake_restore to + * let fasttrap_return_common() know that it will find the + * return values in the %i's rather than the %o's. + */ + + if (I(tp->ftt_instr)) { + int32_t imm; + + imm = tp->ftt_instr << 19; + imm >>= 19; + value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; + } else { + value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + + fasttrap_getreg(rp, RS2(tp->ftt_instr)); + } + + /* + * Convert %o's to %i's; leave %g's as they are. + */ + rd = RD(tp->ftt_instr); + fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value); + + pc = rp->r_g7 + FASTTRAP_OFF_RESTORE; + fake_restore = 1; + break; + } + + case FASTTRAP_T_RETURN: + { + uintptr_t target; + + /* + * A return instruction is like a jmpl (without the link + * part) that executes an implicit restore. We've stashed + * the instruction: + * return %o0 + * + * off of %g7 so we just need to place the target in %o0 + * and set the %pc to point to the stashed return instruction. + * We use %o0 since that register disappears after the return + * executes, erasing any evidence of this tampering. + */ + if (I(tp->ftt_instr)) { + int32_t imm; + + imm = tp->ftt_instr << 19; + imm >>= 19; + target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; + } else { + target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + + fasttrap_getreg(rp, RS2(tp->ftt_instr)); + } + + fasttrap_putreg(rp, R_O0, target); + + pc = rp->r_g7 + FASTTRAP_OFF_RETURN; + fake_restore = 1; + break; + } + + case FASTTRAP_T_OR: + { + ulong_t value; + + if (I(tp->ftt_instr)) { + int32_t imm; + + imm = tp->ftt_instr << 19; + imm >>= 19; + value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm; + } else { + value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | + fasttrap_getreg(rp, RS2(tp->ftt_instr)); + } + + fasttrap_putreg(rp, RD(tp->ftt_instr), value); + pc = rp->r_npc; + npc = pc + 4; + break; + } + + case FASTTRAP_T_SETHI: + if (RD(tp->ftt_instr) != R_G0) { + uint32_t imm32 = tp->ftt_instr << 10; + fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32); + } + pc = rp->r_npc; + npc = pc + 4; + break; + + case FASTTRAP_T_CCR: + { + uint_t c, v, z, n, taken; + uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT; + + if (tp->ftt_cc != 0) + ccr >>= 4; + + c = (ccr >> 0) & 1; + v = (ccr >> 1) & 1; + z = (ccr >> 2) & 1; + n = (ccr >> 3) & 1; + + switch (tp->ftt_code) { + case 0x0: /* BN */ + taken = 0; break; + case 0x1: /* BE */ + taken = z; break; + case 0x2: /* BLE */ + taken = z | (n ^ v); break; + case 0x3: /* BL */ + taken = n ^ v; break; + case 0x4: /* BLEU */ + taken = c | z; break; + case 0x5: /* BCS (BLU) */ + taken = c; break; + case 0x6: /* BNEG */ + taken = n; break; + case 0x7: /* BVS */ + taken = v; break; + case 0x8: /* BA */ + /* + * We handle the BA case differently since the annul + * bit means something slightly different. + */ + panic("fasttrap: mishandled a branch"); + taken = 1; break; + case 0x9: /* BNE */ + taken = ~z; break; + case 0xa: /* BG */ + taken = ~(z | (n ^ v)); break; + case 0xb: /* BGE */ + taken = ~(n ^ v); break; + case 0xc: /* BGU */ + taken = ~(c | z); break; + case 0xd: /* BCC (BGEU) */ + taken = ~c; break; + case 0xe: /* BPOS */ + taken = ~n; break; + case 0xf: /* BVC */ + taken = ~v; break; + } + + if (taken & 1) { + pc = rp->r_npc; + npc = tp->ftt_dest; + } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { + /* + * Untaken annulled branches don't execute the + * instruction in the delay slot. + */ + pc = rp->r_npc + 4; + npc = pc + 4; + } else { + pc = rp->r_npc; + npc = pc + 4; + } + break; + } + + case FASTTRAP_T_FCC: + { + uint_t fcc; + uint_t taken; + uint64_t fsr; + + dtrace_getfsr(&fsr); + + if (tp->ftt_cc == 0) { + fcc = (fsr >> 10) & 0x3; + } else { + uint_t shift; + ASSERT(tp->ftt_cc <= 3); + shift = 30 + tp->ftt_cc * 2; + fcc = (fsr >> shift) & 0x3; + } + + switch (tp->ftt_code) { + case 0x0: /* FBN */ + taken = (1 << fcc) & (0|0|0|0); break; + case 0x1: /* FBNE */ + taken = (1 << fcc) & (8|4|2|0); break; + case 0x2: /* FBLG */ + taken = (1 << fcc) & (0|4|2|0); break; + case 0x3: /* FBUL */ + taken = (1 << fcc) & (8|0|2|0); break; + case 0x4: /* FBL */ + taken = (1 << fcc) & (0|0|2|0); break; + case 0x5: /* FBUG */ + taken = (1 << fcc) & (8|4|0|0); break; + case 0x6: /* FBG */ + taken = (1 << fcc) & (0|4|0|0); break; + case 0x7: /* FBU */ + taken = (1 << fcc) & (8|0|0|0); break; + case 0x8: /* FBA */ + /* + * We handle the FBA case differently since the annul + * bit means something slightly different. + */ + panic("fasttrap: mishandled a branch"); + taken = (1 << fcc) & (8|4|2|1); break; + case 0x9: /* FBE */ + taken = (1 << fcc) & (0|0|0|1); break; + case 0xa: /* FBUE */ + taken = (1 << fcc) & (8|0|0|1); break; + case 0xb: /* FBGE */ + taken = (1 << fcc) & (0|4|0|1); break; + case 0xc: /* FBUGE */ + taken = (1 << fcc) & (8|4|0|1); break; + case 0xd: /* FBLE */ + taken = (1 << fcc) & (0|0|2|1); break; + case 0xe: /* FBULE */ + taken = (1 << fcc) & (8|0|2|1); break; + case 0xf: /* FBO */ + taken = (1 << fcc) & (0|4|2|1); break; + } + + if (taken) { + pc = rp->r_npc; + npc = tp->ftt_dest; + } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { + /* + * Untaken annulled branches don't execute the + * instruction in the delay slot. + */ + pc = rp->r_npc + 4; + npc = pc + 4; + } else { + pc = rp->r_npc; + npc = pc + 4; + } + break; + } + + case FASTTRAP_T_REG: + { + int64_t value; + uint_t taken; + uint_t reg = RS1(tp->ftt_instr); + + /* + * An ILP32 process shouldn't be using a branch predicated on + * an %i or an %l since it would violate the ABI. It's a + * violation of the ABI because we can't ensure deterministic + * behavior. We should have identified this case when we + * enabled the probe. + */ + ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16); + + value = (int64_t)fasttrap_getreg(rp, reg); + + switch (tp->ftt_code) { + case 0x1: /* BRZ */ + taken = (value == 0); break; + case 0x2: /* BRLEZ */ + taken = (value <= 0); break; + case 0x3: /* BRLZ */ + taken = (value < 0); break; + case 0x5: /* BRNZ */ + taken = (value != 0); break; + case 0x6: /* BRGZ */ + taken = (value > 0); break; + case 0x7: /* BRGEZ */ + taken = (value >= 0); break; + default: + case 0x0: + case 0x4: + panic("fasttrap: mishandled a branch"); + } + + if (taken) { + pc = rp->r_npc; + npc = tp->ftt_dest; + } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { + /* + * Untaken annulled branches don't execute the + * instruction in the delay slot. + */ + pc = rp->r_npc + 4; + npc = pc + 4; + } else { + pc = rp->r_npc; + npc = pc + 4; + } + break; + } + + case FASTTRAP_T_ALWAYS: + /* + * BAs, BA,As... + */ + + if (tp->ftt_flags & FASTTRAP_F_ANNUL) { + /* + * Annulled branch always instructions never execute + * the instruction in the delay slot. + */ + pc = tp->ftt_dest; + npc = tp->ftt_dest + 4; + } else { + pc = rp->r_npc; + npc = tp->ftt_dest; + } + break; + + case FASTTRAP_T_RDPC: + fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); + pc = rp->r_npc; + npc = pc + 4; + break; + + case FASTTRAP_T_CALL: + /* + * It's a call _and_ link remember... + */ + rp->r_o7 = rp->r_pc; + pc = rp->r_npc; + npc = tp->ftt_dest; + break; + + case FASTTRAP_T_JMPL: + pc = rp->r_npc; + + if (I(tp->ftt_instr)) { + uint_t rs1 = RS1(tp->ftt_instr); + int32_t imm; + + imm = tp->ftt_instr << 19; + imm >>= 19; + npc = fasttrap_getreg(rp, rs1) + imm; + } else { + uint_t rs1 = RS1(tp->ftt_instr); + uint_t rs2 = RS2(tp->ftt_instr); + + npc = fasttrap_getreg(rp, rs1) + + fasttrap_getreg(rp, rs2); + } + + /* + * Do the link part of the jump-and-link instruction. + */ + fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); + + break; + + case FASTTRAP_T_COMMON: + { + curthread->t_dtrace_scrpc = rp->r_g7; + curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET; + + /* + * Copy the instruction to a reserved location in the + * user-land thread structure, then set the PC to that + * location and leave the NPC alone. We take pains to ensure + * consistency in the instruction stream (See SPARC + * Architecture Manual Version 9, sections 8.4.7, A.20, and + * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1, + * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the + * instruction into the user's address space without + * bypassing the I$. There's no AS_USER version of this ASI + * (as exist for other ASIs) so we use the lofault + * mechanism to catch faults. + */ + if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) { + /* + * If the copyout fails, then the process's state + * is not consistent (the effects of the traced + * instruction will never be seen). This process + * cannot be allowed to continue execution. + */ + fasttrap_sigtrap(curproc, curthread, pc); + return (0); + } + + curthread->t_dtrace_pc = pc; + curthread->t_dtrace_npc = npc; + curthread->t_dtrace_on = 1; + + pc = curthread->t_dtrace_scrpc; + + if (tp->ftt_retids != NULL) { + curthread->t_dtrace_step = 1; + curthread->t_dtrace_ret = 1; + npc = curthread->t_dtrace_astpc; + } + break; + } + + default: + panic("fasttrap: mishandled an instruction"); + } + + /* + * This bit me in the ass a couple of times, so lets toss this + * in as a cursory sanity check. + */ + ASSERT(pc != rp->r_g7 + 4); + ASSERT(pc != rp->r_g7 + 8); + +done: + /* + * If there were no return probes when we first found the tracepoint, + * we should feel no obligation to honor any return probes that were + * subsequently enabled -- they'll just have to wait until the next + * time around. + */ + if (tp->ftt_retids != NULL) { + /* + * We need to wait until the results of the instruction are + * apparent before invoking any return probes. If this + * instruction was emulated we can just call + * fasttrap_return_common(); if it needs to be executed, we + * need to wait until we return to the kernel. + */ + if (tp->ftt_type != FASTTRAP_T_COMMON) { + fasttrap_return_common(rp, orig_pc, pid, fake_restore); + } else { + ASSERT(curthread->t_dtrace_ret != 0); + ASSERT(curthread->t_dtrace_pc == orig_pc); + ASSERT(curthread->t_dtrace_scrpc == rp->r_g7); + ASSERT(npc == curthread->t_dtrace_astpc); + } + } + + ASSERT(pc != 0); + rp->r_pc = pc; + rp->r_npc = npc; + + return (0); +} + +int +fasttrap_return_probe(struct regs *rp) +{ + proc_t *p = ttoproc(curthread); + pid_t pid; + uintptr_t pc = curthread->t_dtrace_pc; + uintptr_t npc = curthread->t_dtrace_npc; + + curthread->t_dtrace_pc = 0; + curthread->t_dtrace_npc = 0; + curthread->t_dtrace_scrpc = 0; + curthread->t_dtrace_astpc = 0; + + /* + * Treat a child created by a call to vfork(2) as if it were its + * parent. We know there's only one thread of control in such a + * process: this one. + */ + while (p->p_flag & SVFORK) { + p = p->p_parent; + } + + /* + * We set the %pc and %npc to their values when the traced + * instruction was initially executed so that it appears to + * dtrace_probe() that we're on the original instruction, and so that + * the user can't easily detect our complex web of lies. + * dtrace_return_probe() (our caller) will correctly set %pc and %npc + * after we return. + */ + rp->r_pc = pc; + rp->r_npc = npc; + + pid = p->p_pid; + fasttrap_return_common(rp, pc, pid, 0); + + return (0); +} + +int +fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) +{ + fasttrap_instr_t instr = FASTTRAP_INSTR; + + if (uwrite(p, &instr, 4, tp->ftt_pc) != 0) + return (-1); + + return (0); +} + +int +fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) +{ + fasttrap_instr_t instr; + + /* + * Distinguish between read or write failures and a changed + * instruction. + */ + if (uread(p, &instr, 4, tp->ftt_pc) != 0) + return (0); + if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR) + return (0); + if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0) + return (-1); + + return (0); +} + +int +fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, + fasttrap_probe_type_t type) +{ + uint32_t instr; + int32_t disp; + + /* + * Read the instruction at the given address out of the process's + * address space. We don't have to worry about a debugger + * changing this instruction before we overwrite it with our trap + * instruction since P_PR_LOCK is set. + */ + if (uread(p, &instr, 4, pc) != 0) + return (-1); + + /* + * Decode the instruction to fill in the probe flags. We can have + * the process execute most instructions on its own using a pc/npc + * trick, but pc-relative control transfer present a problem since + * we're relocating the instruction. We emulate these instructions + * in the kernel. We assume a default type and over-write that as + * needed. + * + * pc-relative instructions must be emulated for correctness; + * other instructions (which represent a large set of commonly traced + * instructions) are emulated or otherwise optimized for performance. + */ + tp->ftt_type = FASTTRAP_T_COMMON; + if (OP(instr) == 1) { + /* + * Call instructions. + */ + tp->ftt_type = FASTTRAP_T_CALL; + disp = DISP30(instr) << 2; + tp->ftt_dest = pc + (intptr_t)disp; + + } else if (OP(instr) == 0) { + /* + * Branch instructions. + * + * Unconditional branches need careful attention when they're + * annulled: annulled unconditional branches never execute + * the instruction in the delay slot. + */ + switch (OP2(instr)) { + case OP2_ILLTRAP: + case 0x7: + /* + * The compiler may place an illtrap after a call to + * a function that returns a structure. In the case of + * a returned structure, the compiler places an illtrap + * whose const22 field is the size of the returned + * structure immediately following the delay slot of + * the call. To stay out of the way, we refuse to + * place tracepoints on top of illtrap instructions. + * + * This is one of the dumbest architectural decisions + * I've ever had to work around. + * + * We also identify the only illegal op2 value (See + * SPARC Architecture Manual Version 9, E.2 table 31). + */ + return (-1); + + case OP2_BPcc: + if (COND(instr) == 8) { + tp->ftt_type = FASTTRAP_T_ALWAYS; + } else { + /* + * Check for an illegal instruction. + */ + if (CC(instr) & 1) + return (-1); + tp->ftt_type = FASTTRAP_T_CCR; + tp->ftt_cc = CC(instr); + tp->ftt_code = COND(instr); + } + + if (A(instr) != 0) + tp->ftt_flags |= FASTTRAP_F_ANNUL; + + disp = DISP19(instr); + disp <<= 13; + disp >>= 11; + tp->ftt_dest = pc + (intptr_t)disp; + break; + + case OP2_Bicc: + if (COND(instr) == 8) { + tp->ftt_type = FASTTRAP_T_ALWAYS; + } else { + tp->ftt_type = FASTTRAP_T_CCR; + tp->ftt_cc = 0; + tp->ftt_code = COND(instr); + } + + if (A(instr) != 0) + tp->ftt_flags |= FASTTRAP_F_ANNUL; + + disp = DISP22(instr); + disp <<= 10; + disp >>= 8; + tp->ftt_dest = pc + (intptr_t)disp; + break; + + case OP2_BPr: + /* + * Check for an illegal instruction. + */ + if ((RCOND(instr) & 3) == 0) + return (-1); + + /* + * It's a violation of the v8plus ABI to use a + * register-predicated branch in a 32-bit app if + * the register used is an %l or an %i (%gs and %os + * are legit because they're not saved to the stack + * in 32-bit words when we take a trap). + */ + if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16) + return (-1); + + tp->ftt_type = FASTTRAP_T_REG; + if (A(instr) != 0) + tp->ftt_flags |= FASTTRAP_F_ANNUL; + disp = DISP16(instr); + disp <<= 16; + disp >>= 14; + tp->ftt_dest = pc + (intptr_t)disp; + tp->ftt_code = RCOND(instr); + break; + + case OP2_SETHI: + tp->ftt_type = FASTTRAP_T_SETHI; + break; + + case OP2_FBPfcc: + if (COND(instr) == 8) { + tp->ftt_type = FASTTRAP_T_ALWAYS; + } else { + tp->ftt_type = FASTTRAP_T_FCC; + tp->ftt_cc = CC(instr); + tp->ftt_code = COND(instr); + } + + if (A(instr) != 0) + tp->ftt_flags |= FASTTRAP_F_ANNUL; + + disp = DISP19(instr); + disp <<= 13; + disp >>= 11; + tp->ftt_dest = pc + (intptr_t)disp; + break; + + case OP2_FBfcc: + if (COND(instr) == 8) { + tp->ftt_type = FASTTRAP_T_ALWAYS; + } else { + tp->ftt_type = FASTTRAP_T_FCC; + tp->ftt_cc = 0; + tp->ftt_code = COND(instr); + } + + if (A(instr) != 0) + tp->ftt_flags |= FASTTRAP_F_ANNUL; + + disp = DISP22(instr); + disp <<= 10; + disp >>= 8; + tp->ftt_dest = pc + (intptr_t)disp; + break; + } + + } else if (OP(instr) == 2) { + switch (OP3(instr)) { + case OP3_RETURN: + tp->ftt_type = FASTTRAP_T_RETURN; + break; + + case OP3_JMPL: + tp->ftt_type = FASTTRAP_T_JMPL; + break; + + case OP3_RD: + if (RS1(instr) == 5) + tp->ftt_type = FASTTRAP_T_RDPC; + break; + + case OP3_SAVE: + /* + * We optimize for save instructions at function + * entry; see the comment in fasttrap_pid_probe() + * (near FASTTRAP_T_SAVE) for details. + */ + if (fasttrap_optimize_save != 0 && + type == DTFTP_ENTRY && + I(instr) == 1 && RD(instr) == R_SP) + tp->ftt_type = FASTTRAP_T_SAVE; + break; + + case OP3_RESTORE: + /* + * We optimize restore instructions at function + * return; see the comment in fasttrap_pid_probe() + * (near FASTTRAP_T_RESTORE) for details. + * + * rd must be an %o or %g register. + */ + if ((RD(instr) & 0x10) == 0) + tp->ftt_type = FASTTRAP_T_RESTORE; + break; + + case OP3_OR: + /* + * A large proportion of instructions in the delay + * slot of retl instructions are or's so we emulate + * these downstairs as an optimization. + */ + tp->ftt_type = FASTTRAP_T_OR; + break; + + case OP3_TCC: + /* + * Breakpoint instructions are effectively position- + * dependent since the debugger uses the %pc value + * to lookup which breakpoint was executed. As a + * result, we can't actually instrument breakpoints. + */ + if (SW_TRAP(instr) == ST_BREAKPOINT) + return (-1); + break; + + case 0x19: + case 0x1d: + case 0x29: + case 0x33: + case 0x3f: + /* + * Identify illegal instructions (See SPARC + * Architecture Manual Version 9, E.2 table 32). + */ + return (-1); + } + } else if (OP(instr) == 3) { + uint32_t op3 = OP3(instr); + + /* + * Identify illegal instructions (See SPARC Architecture + * Manual Version 9, E.2 table 33). + */ + if ((op3 & 0x28) == 0x28) { + if (op3 != OP3_PREFETCH && op3 != OP3_CASA && + op3 != OP3_PREFETCHA && op3 != OP3_CASXA) + return (-1); + } else { + if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31) + return (-1); + } + } + + tp->ftt_instr = instr; + + /* + * We don't know how this tracepoint is going to be used, but in case + * it's used as part of a function return probe, we need to indicate + * whether it's always a return site or only potentially a return + * site. If it's part of a return probe, it's always going to be a + * return from that function if it's a restore instruction or if + * the previous instruction was a return. If we could reliably + * distinguish jump tables from return sites, this wouldn't be + * necessary. + */ + if (tp->ftt_type != FASTTRAP_T_RESTORE && + (uread(p, &instr, 4, pc - sizeof (instr)) != 0 || + !(OP(instr) == 2 && OP3(instr) == OP3_RETURN))) + tp->ftt_flags |= FASTTRAP_F_RETMAYBE; + + return (0); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, + int aframes) +{ + return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, + int aframes) +{ + return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); +} + +static uint64_t fasttrap_getreg_fast_cnt; +static uint64_t fasttrap_getreg_mpcb_cnt; +static uint64_t fasttrap_getreg_slow_cnt; + +static ulong_t +fasttrap_getreg(struct regs *rp, uint_t reg) +{ + ulong_t value; + dtrace_icookie_t cookie; + struct machpcb *mpcb; + extern ulong_t dtrace_getreg_win(uint_t, uint_t); + + /* + * We have the %os and %gs in our struct regs, but if we need to + * snag a %l or %i we need to go scrounging around in the process's + * address space. + */ + if (reg == 0) + return (0); + + if (reg < 16) + return ((&rp->r_g1)[reg - 1]); + + /* + * Before we look at the user's stack, we'll check the register + * windows to see if the information we want is in there. + */ + cookie = dtrace_interrupt_disable(); + if (dtrace_getotherwin() > 0) { + value = dtrace_getreg_win(reg, 1); + dtrace_interrupt_enable(cookie); + + atomic_add_64(&fasttrap_getreg_fast_cnt, 1); + + return (value); + } + dtrace_interrupt_enable(cookie); + + /* + * First check the machpcb structure to see if we've already read + * in the register window we're looking for; if we haven't, (and + * we probably haven't) try to copy in the value of the register. + */ + /* LINTED - alignment */ + mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); + + if (get_udatamodel() == DATAMODEL_NATIVE) { + struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); + + if (mpcb->mpcb_wbcnt > 0) { + struct rwindow *rwin = (void *)mpcb->mpcb_wbuf; + int i = mpcb->mpcb_wbcnt; + do { + i--; + if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) + continue; + + atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1); + return (rwin[i].rw_local[reg - 16]); + } while (i > 0); + } + + if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0) + goto err; + } else { + struct frame32 *fr = + (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; + uint32_t *v32 = (uint32_t *)&value; + + if (mpcb->mpcb_wbcnt > 0) { + struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf; + int i = mpcb->mpcb_wbcnt; + do { + i--; + if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) + continue; + + atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1); + return (rwin[i].rw_local[reg - 16]); + } while (i > 0); + } + + if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0) + goto err; + + v32[0] = 0; + } + + atomic_add_64(&fasttrap_getreg_slow_cnt, 1); + return (value); + +err: + /* + * If the copy in failed, the process will be in a irrecoverable + * state, and we have no choice but to kill it. + */ + psignal(ttoproc(curthread), SIGILL); + return (0); +} + +static uint64_t fasttrap_putreg_fast_cnt; +static uint64_t fasttrap_putreg_mpcb_cnt; +static uint64_t fasttrap_putreg_slow_cnt; + +static void +fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value) +{ + dtrace_icookie_t cookie; + struct machpcb *mpcb; + extern void dtrace_putreg_win(uint_t, ulong_t); + + if (reg == 0) + return; + + if (reg < 16) { + (&rp->r_g1)[reg - 1] = value; + return; + } + + /* + * If the user process is still using some register windows, we + * can just place the value in the correct window. + */ + cookie = dtrace_interrupt_disable(); + if (dtrace_getotherwin() > 0) { + dtrace_putreg_win(reg, value); + dtrace_interrupt_enable(cookie); + atomic_add_64(&fasttrap_putreg_fast_cnt, 1); + return; + } + dtrace_interrupt_enable(cookie); + + /* + * First see if there's a copy of the register window in the + * machpcb structure that we can modify; if there isn't try to + * copy out the value. If that fails, we try to create a new + * register window in the machpcb structure. While this isn't + * _precisely_ the intended use of the machpcb structure, it + * can't cause any problems since we know at this point in the + * code that all of the user's data have been flushed out of the + * register file (since %otherwin is 0). + */ + /* LINTED - alignment */ + mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); + + if (get_udatamodel() == DATAMODEL_NATIVE) { + struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); + /* LINTED - alignment */ + struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf; + + if (mpcb->mpcb_wbcnt > 0) { + int i = mpcb->mpcb_wbcnt; + do { + i--; + if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) + continue; + + rwin[i].rw_local[reg - 16] = value; + atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); + return; + } while (i > 0); + } + + if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) { + if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, + &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) + goto err; + + rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value; + mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; + mpcb->mpcb_wbcnt++; + atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); + return; + } + } else { + struct frame32 *fr = + (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; + /* LINTED - alignment */ + struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf; + uint32_t v32 = (uint32_t)value; + + if (mpcb->mpcb_wbcnt > 0) { + int i = mpcb->mpcb_wbcnt; + do { + i--; + if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) + continue; + + rwin[i].rw_local[reg - 16] = v32; + atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); + return; + } while (i > 0); + } + + if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) { + if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, + &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) + goto err; + + rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32; + mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; + mpcb->mpcb_wbcnt++; + atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); + return; + } + } + + atomic_add_64(&fasttrap_putreg_slow_cnt, 1); + return; + +err: + /* + * If we couldn't record this register's value, the process is in an + * irrecoverable state and we have no choice but to euthanize it. + */ + psignal(ttoproc(curthread), SIGILL); +} diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_isa.c b/sys/cddl/dev/dtrace/amd64/dtrace_isa.c index c58e88ec7..d9ed08006 100644 --- a/sys/cddl/dev/dtrace/amd64/dtrace_isa.c +++ b/sys/cddl/dev/dtrace/amd64/dtrace_isa.c @@ -42,6 +42,7 @@ #include #include +#include "regset.h" uint8_t dtrace_fuword8_nocheck(void *); uint16_t dtrace_fuword16_nocheck(void *); @@ -103,12 +104,11 @@ dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, { volatile uint16_t *flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; - struct amd64_frame *frame; int ret = 0; ASSERT(pcstack == NULL || pcstack_limit > 0); - while (pc != 0 && sp != 0) { + while (pc != 0) { ret++; if (pcstack != NULL) { *pcstack++ = (uint64_t)pc; @@ -117,10 +117,12 @@ dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, break; } - frame = (struct amd64_frame *) sp; + if (sp == 0) + break; - pc = dtrace_fulword(&frame->f_retaddr); - sp = dtrace_fulword(&frame->f_frame); + pc = dtrace_fuword64((void *)(sp + + offsetof(struct amd64_frame, f_retaddr))); + sp = dtrace_fuword64((void *)sp); /* * This is totally bogus: if we faulted, we're going to clear @@ -141,7 +143,7 @@ dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) { proc_t *p = curproc; struct trapframe *tf; - uintptr_t pc, sp; + uintptr_t pc, sp, fp; volatile uint16_t *flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; int n; @@ -165,18 +167,28 @@ dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) return; pc = tf->tf_rip; + fp = tf->tf_rbp; sp = tf->tf_rsp; if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { + /* + * In an entry probe. The frame pointer has not yet been + * pushed (that happens in the function prologue). The + * best approach is to add the current pc as a missing top + * of stack and back the pc up to the caller, which is stored + * at the current stack pointer address since the call + * instruction puts it there right before the branch. + */ + *pcstack++ = (uint64_t)pc; pcstack_limit--; if (pcstack_limit <= 0) return; - pc = dtrace_fulword((void *) sp); + pc = dtrace_fuword64((void *) sp); } - n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp); + n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp); ASSERT(n >= 0); ASSERT(n <= pcstack_limit); @@ -193,7 +205,7 @@ dtrace_getustackdepth(void) { proc_t *p = curproc; struct trapframe *tf; - uintptr_t pc, sp; + uintptr_t pc, fp, sp; int n = 0; if (p == NULL || (tf = curthread->td_frame) == NULL) @@ -203,30 +215,40 @@ dtrace_getustackdepth(void) return (-1); pc = tf->tf_rip; + fp = tf->tf_rbp; sp = tf->tf_rsp; if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { - n++; + /* + * In an entry probe. The frame pointer has not yet been + * pushed (that happens in the function prologue). The + * best approach is to add the current pc as a missing top + * of stack and back the pc up to the caller, which is stored + * at the current stack pointer address since the call + * instruction puts it there right before the branch. + */ - pc = dtrace_fulword((void *) sp); + pc = dtrace_fuword64((void *) sp); + n++; } - n += dtrace_getustack_common(NULL, 0, pc, sp); + n += dtrace_getustack_common(NULL, 0, pc, fp); return (n); } -#ifdef notyet void dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) { - klwp_t *lwp = ttolwp(curthread); proc_t *p = curproc; - struct regs *rp; - uintptr_t pc, sp, oldcontext; + struct trapframe *tf; + uintptr_t pc, sp, fp; volatile uint16_t *flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; +#ifdef notyet /* XXX signal stack */ + uintptr_t oldcontext; size_t s1, s2; +#endif if (*flags & CPU_DTRACE_FAULT) return; @@ -237,7 +259,7 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) /* * If there's no user context we still need to zero the stack. */ - if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL) + if (p == NULL || (tf = curthread->td_frame) == NULL) goto zero; *pcstack++ = (uint64_t)p->p_pid; @@ -246,12 +268,15 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) if (pcstack_limit <= 0) return; - pc = rp->r_pc; - sp = rp->r_fp; - oldcontext = lwp->lwp_oldcontext; + pc = tf->tf_rip; + sp = tf->tf_rsp; + fp = tf->tf_rbp; +#ifdef notyet /* XXX signal stack */ + oldcontext = lwp->lwp_oldcontext; s1 = sizeof (struct xframe) + 2 * sizeof (long); s2 = s1 + sizeof (siginfo_t); +#endif if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { *pcstack++ = (uint64_t)pc; @@ -260,19 +285,20 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) if (pcstack_limit <= 0) return; - if (p->p_model == DATAMODEL_NATIVE) - pc = dtrace_fulword((void *)rp->r_sp); - else - pc = dtrace_fuword32((void *)rp->r_sp); + pc = dtrace_fuword64((void *)sp); } - while (pc != 0 && sp != 0) { + while (pc != 0) { *pcstack++ = (uint64_t)pc; - *fpstack++ = sp; + *fpstack++ = fp; pcstack_limit--; if (pcstack_limit <= 0) break; + if (fp == 0) + break; + +#ifdef notyet /* XXX signal stack */ if (oldcontext == sp + s1 || oldcontext == sp + s2) { ucontext_t *ucp = (ucontext_t *)oldcontext; greg_t *gregs = ucp->uc_mcontext.gregs; @@ -281,11 +307,12 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) pc = dtrace_fulword(&gregs[REG_PC]); oldcontext = dtrace_fulword(&ucp->uc_link); - } else { - struct xframe *fr = (struct xframe *)sp; - - pc = dtrace_fulword(&fr->fr_savpc); - sp = dtrace_fulword(&fr->fr_savfp); + } else +#endif /* XXX */ + { + pc = dtrace_fuword64((void *)(fp + + offsetof(struct amd64_frame, f_retaddr))); + fp = dtrace_fuword64((void *)fp); } /* @@ -301,9 +328,8 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) zero: while (pcstack_limit-- > 0) - *pcstack++ = NULL; + *pcstack++ = 0; } -#endif /*ARGSUSED*/ uint64_t @@ -412,31 +438,30 @@ dtrace_getstackdepth(int aframes) return depth - aframes; } -#ifdef notyet ulong_t -dtrace_getreg(struct regs *rp, uint_t reg) +dtrace_getreg(struct trapframe *rp, uint_t reg) { -#if defined(__amd64) + /* This table is dependent on reg.d. */ int regmap[] = { - REG_GS, /* GS */ - REG_FS, /* FS */ - REG_ES, /* ES */ - REG_DS, /* DS */ - REG_RDI, /* EDI */ - REG_RSI, /* ESI */ - REG_RBP, /* EBP */ - REG_RSP, /* ESP */ - REG_RBX, /* EBX */ - REG_RDX, /* EDX */ - REG_RCX, /* ECX */ - REG_RAX, /* EAX */ - REG_TRAPNO, /* TRAPNO */ - REG_ERR, /* ERR */ - REG_RIP, /* EIP */ - REG_CS, /* CS */ - REG_RFL, /* EFL */ - REG_RSP, /* UESP */ - REG_SS /* SS */ + REG_GS, /* 0 GS */ + REG_FS, /* 1 FS */ + REG_ES, /* 2 ES */ + REG_DS, /* 3 DS */ + REG_RDI, /* 4 EDI */ + REG_RSI, /* 5 ESI */ + REG_RBP, /* 6 EBP, REG_FP */ + REG_RSP, /* 7 ESP */ + REG_RBX, /* 8 EBX, REG_R1 */ + REG_RDX, /* 9 EDX */ + REG_RCX, /* 10 ECX */ + REG_RAX, /* 11 EAX, REG_R0 */ + REG_TRAPNO, /* 12 TRAPNO */ + REG_ERR, /* 13 ERR */ + REG_RIP, /* 14 EIP, REG_PC */ + REG_CS, /* 15 CS */ + REG_RFL, /* 16 EFL, REG_PS */ + REG_RSP, /* 17 UESP, REG_SP */ + REG_SS /* 18 SS */ }; if (reg <= SS) { @@ -447,77 +472,68 @@ dtrace_getreg(struct regs *rp, uint_t reg) reg = regmap[reg]; } else { + /* This is dependent on reg.d. */ reg -= SS + 1; } switch (reg) { case REG_RDI: - return (rp->r_rdi); + return (rp->tf_rdi); case REG_RSI: - return (rp->r_rsi); + return (rp->tf_rsi); case REG_RDX: - return (rp->r_rdx); + return (rp->tf_rdx); case REG_RCX: - return (rp->r_rcx); + return (rp->tf_rcx); case REG_R8: - return (rp->r_r8); + return (rp->tf_r8); case REG_R9: - return (rp->r_r9); + return (rp->tf_r9); case REG_RAX: - return (rp->r_rax); + return (rp->tf_rax); case REG_RBX: - return (rp->r_rbx); + return (rp->tf_rbx); case REG_RBP: - return (rp->r_rbp); + return (rp->tf_rbp); case REG_R10: - return (rp->r_r10); + return (rp->tf_r10); case REG_R11: - return (rp->r_r11); + return (rp->tf_r11); case REG_R12: - return (rp->r_r12); + return (rp->tf_r12); case REG_R13: - return (rp->r_r13); + return (rp->tf_r13); case REG_R14: - return (rp->r_r14); + return (rp->tf_r14); case REG_R15: - return (rp->r_r15); + return (rp->tf_r15); case REG_DS: - return (rp->r_ds); + return (rp->tf_ds); case REG_ES: - return (rp->r_es); + return (rp->tf_es); case REG_FS: - return (rp->r_fs); + return (rp->tf_fs); case REG_GS: - return (rp->r_gs); + return (rp->tf_gs); case REG_TRAPNO: - return (rp->r_trapno); + return (rp->tf_trapno); case REG_ERR: - return (rp->r_err); + return (rp->tf_err); case REG_RIP: - return (rp->r_rip); + return (rp->tf_rip); case REG_CS: - return (rp->r_cs); + return (rp->tf_cs); case REG_SS: - return (rp->r_ss); + return (rp->tf_ss); case REG_RFL: - return (rp->r_rfl); + return (rp->tf_rflags); case REG_RSP: - return (rp->r_rsp); + return (rp->tf_rsp); default: DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return (0); } - -#else - if (reg > SS) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); - return (0); - } - - return ((&rp->r_gs)[reg]); -#endif } -#endif static int dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size) diff --git a/sys/cddl/dev/dtrace/amd64/instr_size.c b/sys/cddl/dev/dtrace/amd64/instr_size.c index 418d9f1fb..1acf2c581 100644 --- a/sys/cddl/dev/dtrace/amd64/instr_size.c +++ b/sys/cddl/dev/dtrace/amd64/instr_size.c @@ -47,6 +47,7 @@ typedef u_int model_t; #define DATAMODEL_NATIVE 0 int dtrace_instr_size(uchar_t *); +int dtrace_instr_size_isa(uchar_t *, model_t, int *); #endif #include @@ -124,6 +125,12 @@ dtrace_dis_isize(uchar_t *instr, dis_isize_t which, model_t model, int *rmindex) return (sz); } +int +dtrace_instr_size_isa(uchar_t *instr, model_t model, int *rmindex) +{ + return (dtrace_dis_isize(instr, DIS_ISIZE_INSTR, model, rmindex)); +} + int dtrace_instr_size(uchar_t *instr) { diff --git a/sys/cddl/dev/dtrace/amd64/regset.h b/sys/cddl/dev/dtrace/amd64/regset.h new file mode 100644 index 000000000..d6732ff5e --- /dev/null +++ b/sys/cddl/dev/dtrace/amd64/regset.h @@ -0,0 +1,127 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +#ifndef _REGSET_H +#define _REGSET_H + +/* + * #pragma ident "@(#)regset.h 1.11 05/06/08 SMI" + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The names and offsets defined here should be specified by the + * AMD64 ABI suppl. + * + * We make fsbase and gsbase part of the lwp context (since they're + * the only way to access the full 64-bit address range via the segment + * registers) and thus belong here too. However we treat them as + * read-only; if %fs or %gs are updated, the results of the descriptor + * table lookup that those updates implicitly cause will be reflected + * in the corresponding fsbase and/or gsbase values the next time the + * context can be inspected. However it is NOT possible to override + * the fsbase/gsbase settings via this interface. + * + * Direct modification of the base registers (thus overriding the + * descriptor table base address) can be achieved with _lwp_setprivate. + */ + +#define REG_GSBASE 27 +#define REG_FSBASE 26 +#define REG_DS 25 +#define REG_ES 24 + +#define REG_GS 23 +#define REG_FS 22 +#define REG_SS 21 +#define REG_RSP 20 +#define REG_RFL 19 +#define REG_CS 18 +#define REG_RIP 17 +#define REG_ERR 16 +#define REG_TRAPNO 15 +#define REG_RAX 14 +#define REG_RCX 13 +#define REG_RDX 12 +#define REG_RBX 11 +#define REG_RBP 10 +#define REG_RSI 9 +#define REG_RDI 8 +#define REG_R8 7 +#define REG_R9 6 +#define REG_R10 5 +#define REG_R11 4 +#define REG_R12 3 +#define REG_R13 2 +#define REG_R14 1 +#define REG_R15 0 + +/* + * The names and offsets defined here are specified by i386 ABI suppl. + */ + +#define SS 18 /* only stored on a privilege transition */ +#define UESP 17 /* only stored on a privilege transition */ +#define EFL 16 +#define CS 15 +#define EIP 14 +#define ERR 13 +#define TRAPNO 12 +#define EAX 11 +#define ECX 10 +#define EDX 9 +#define EBX 8 +#define ESP 7 +#define EBP 6 +#define ESI 5 +#define EDI 4 +#define DS 3 +#define ES 2 +#define FS 1 +#define GS 0 + +#define REG_PC EIP +#define REG_FP EBP +#define REG_SP UESP +#define REG_PS EFL +#define REG_R0 EAX +#define REG_R1 EDX + +#ifdef __cplusplus +} +#endif + +#endif /* _REGSET_H */ diff --git a/sys/cddl/dev/dtrace/dtrace_cddl.h b/sys/cddl/dev/dtrace/dtrace_cddl.h index 75fe8648c..d2adfbc07 100644 --- a/sys/cddl/dev/dtrace/dtrace_cddl.h +++ b/sys/cddl/dev/dtrace/dtrace_cddl.h @@ -36,6 +36,7 @@ typedef struct kdtrace_proc { int p_dtrace_probes; /* Are there probes for this proc? */ u_int64_t p_dtrace_count; /* Number of DTrace tracepoints */ void *p_dtrace_helpers; /* DTrace helpers, if any */ + int p_dtrace_model; } kdtrace_proc_t; @@ -59,6 +60,9 @@ typedef struct kdtrace_thread { /* Handling a return probe. */ u_int8_t _td_dtrace_ast; /* Saved ast flag. */ +#ifdef __amd64__ + u_int8_t _td_dtrace_reg; +#endif } _tds; u_long _td_dtrace_ft; /* Bitwise or of these flags. */ } _tdu; @@ -67,6 +71,7 @@ typedef struct kdtrace_thread { #define td_dtrace_step _tdu._tds._td_dtrace_step #define td_dtrace_ret _tdu._tds._td_dtrace_ret #define td_dtrace_ast _tdu._tds._td_dtrace_ast +#define td_dtrace_reg _tdu._tds._td_dtrace_reg uintptr_t td_dtrace_pc; /* DTrace saved pc from fasttrap. */ uintptr_t td_dtrace_npc; /* DTrace next pc from fasttrap. */ @@ -74,6 +79,9 @@ typedef struct kdtrace_thread { /* DTrace per-thread scratch location. */ uintptr_t td_dtrace_astpc; /* DTrace return sequence location. */ +#ifdef __amd64__ + uintptr_t td_dtrace_regv; +#endif u_int64_t td_hrtime; /* Last time on cpu. */ int td_errno; /* Syscall return value. */ } kdtrace_thread_t; @@ -89,16 +97,38 @@ typedef struct kdtrace_thread { #define t_dtrace_stop td_dtrace->td_dtrace_stop #define t_dtrace_sig td_dtrace->td_dtrace_sig #define t_predcache td_dtrace->td_predcache -#define p_dtrace_helpers p_dtrace->p_dtrace_helpers +#define t_dtrace_ft td_dtrace->td_dtrace_ft +#define t_dtrace_on td_dtrace->td_dtrace_on +#define t_dtrace_step td_dtrace->td_dtrace_step +#define t_dtrace_ret td_dtrace->td_dtrace_ret +#define t_dtrace_ast td_dtrace->td_dtrace_ast +#define t_dtrace_reg td_dtrace->td_dtrace_reg +#define t_dtrace_pc td_dtrace->td_dtrace_pc +#define t_dtrace_npc td_dtrace->td_dtrace_npc +#define t_dtrace_scrpc td_dtrace->td_dtrace_scrpc +#define t_dtrace_astpc td_dtrace->td_dtrace_astpc +#define t_dtrace_regv td_dtrace->td_dtrace_regv +#define p_dtrace_helpers p_dtrace->p_dtrace_helpers +#define p_dtrace_count p_dtrace->p_dtrace_count +#define p_dtrace_probes p_dtrace->p_dtrace_probes +#define p_model p_dtrace->p_dtrace_model +#define DATAMODEL_NATIVE 0 +#ifdef __amd64__ +#define DATAMODEL_LP64 0 +#define DATAMODEL_ILP32 1 +#else +#define DATAMODEL_LP64 1 +#define DATAMODEL_ILP32 0 +#endif /* - * Definitions for fields in struct proc which are named differntly in FreeBSD. + * Definitions for fields in struct proc which are named differently in FreeBSD. */ #define p_cred p_ucred #define p_parent p_pptr /* - * Definitions for fields in struct thread which are named differntly in FreeBSD. + * Definitions for fields in struct thread which are named differently in FreeBSD. */ #define t_procp td_proc #define t_tid td_tid diff --git a/sys/cddl/dev/dtrace/dtrace_ioctl.c b/sys/cddl/dev/dtrace/dtrace_ioctl.c index bc408c43e..79fea7bb7 100644 --- a/sys/cddl/dev/dtrace/dtrace_ioctl.c +++ b/sys/cddl/dev/dtrace/dtrace_ioctl.c @@ -27,6 +27,55 @@ SYSCTL_INT(_debug_dtrace, OID_AUTO, verbose_ioctl, CTLFLAG_RW, &dtrace_verbose_i #define DTRACE_IOCTL_PRINTF(fmt, ...) if (dtrace_verbose_ioctl) printf(fmt, ## __VA_ARGS__ ) +static int +dtrace_ioctl_helper(struct cdev *dev, u_long cmd, caddr_t addr, int flags, + struct thread *td) +{ + int rval; + dof_helper_t *dhp = NULL; + dof_hdr_t *dof = NULL; + + switch (cmd) { + case DTRACEHIOC_ADDDOF: + dhp = (dof_helper_t *)addr; + /* XXX all because dofhp_dof is 64 bit */ +#ifdef __i386 + addr = (caddr_t)(uint32_t)dhp->dofhp_dof; +#else + addr = (caddr_t)dhp->dofhp_dof; +#endif + /* FALLTHROUGH */ + case DTRACEHIOC_ADD: + dof = dtrace_dof_copyin((intptr_t)addr, &rval); + + if (dof == NULL) + return (rval); + + mutex_enter(&dtrace_lock); + if ((rval = dtrace_helper_slurp((dof_hdr_t *)dof, dhp)) != -1) { + if (dhp) { + dhp->gen = rval; + copyout(dhp, addr, sizeof(*dhp)); + } + rval = 0; + } else { + rval = EINVAL; + } + mutex_exit(&dtrace_lock); + return (rval); + case DTRACEHIOC_REMOVE: + mutex_enter(&dtrace_lock); + rval = dtrace_helper_destroygen((int)*addr); + mutex_exit(&dtrace_lock); + + return (rval); + default: + break; + } + + return (ENOTTY); +} + /* ARGSUSED */ static int dtrace_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, diff --git a/sys/cddl/dev/dtrace/dtrace_load.c b/sys/cddl/dev/dtrace/dtrace_load.c index accee4706..59267259c 100644 --- a/sys/cddl/dev/dtrace/dtrace_load.c +++ b/sys/cddl/dev/dtrace/dtrace_load.c @@ -161,7 +161,10 @@ dtrace_load(void *dummy) /* Setup device cloning events. */ eh_tag = EVENTHANDLER_REGISTER(dev_clone, dtrace_clone, 0, 1000); #else - dtrace_dev = make_dev(&dtrace_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "dtrace/dtrace"); + dtrace_dev = make_dev(&dtrace_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, + "dtrace/dtrace"); + helper_dev = make_dev(&helper_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660, + "dtrace/helper"); #endif return; diff --git a/sys/cddl/dev/dtrace/dtrace_unload.c b/sys/cddl/dev/dtrace/dtrace_unload.c index 40b09dc46..247cbb183 100644 --- a/sys/cddl/dev/dtrace/dtrace_unload.c +++ b/sys/cddl/dev/dtrace/dtrace_unload.c @@ -43,6 +43,7 @@ dtrace_unload() } #else destroy_dev(dtrace_dev); + destroy_dev(helper_dev); #endif mutex_enter(&dtrace_provider_lock); diff --git a/sys/cddl/dev/dtrace/i386/dtrace_isa.c b/sys/cddl/dev/dtrace/i386/dtrace_isa.c index bf891aa39..3f73a50ef 100644 --- a/sys/cddl/dev/dtrace/i386/dtrace_isa.c +++ b/sys/cddl/dev/dtrace/i386/dtrace_isa.c @@ -33,13 +33,17 @@ #include #include +#include #include +#include #include #include #include #include +#include "regset.h" + extern uintptr_t kernbase; uintptr_t kernelbase = (uintptr_t) &kernbase; @@ -100,21 +104,22 @@ dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, } } -#ifdef notyet static int dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, uintptr_t sp) { - klwp_t *lwp = ttolwp(curthread); +#ifdef notyet proc_t *p = curproc; - uintptr_t oldcontext = lwp->lwp_oldcontext; + uintptr_t oldcontext = lwp->lwp_oldcontext; /* XXX signal stack. */ + size_t s1, s2; +#endif volatile uint16_t *flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; - size_t s1, s2; int ret = 0; ASSERT(pcstack == NULL || pcstack_limit > 0); +#ifdef notyet /* XXX signal stack. */ if (p->p_model == DATAMODEL_NATIVE) { s1 = sizeof (struct frame) + 2 * sizeof (long); s2 = s1 + sizeof (siginfo_t); @@ -122,8 +127,9 @@ dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, s1 = sizeof (struct frame32) + 3 * sizeof (int); s2 = s1 + sizeof (siginfo32_t); } +#endif - while (pc != 0 && sp != 0) { + while (pc != 0) { ret++; if (pcstack != NULL) { *pcstack++ = (uint64_t)pc; @@ -132,6 +138,10 @@ dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, break; } + if (sp == 0) + break; + +#ifdef notyet /* XXX signal stack. */ if (oldcontext == sp + s1 || oldcontext == sp + s2) { if (p->p_model == DATAMODEL_NATIVE) { ucontext_t *ucp = (ucontext_t *)oldcontext; @@ -163,6 +173,11 @@ dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, sp = dtrace_fuword32(&fr->fr_savfp); } } +#else + pc = dtrace_fuword32((void *)(sp + + offsetof(struct i386_frame, f_retaddr))); + sp = dtrace_fuword32((void *)sp); +#endif /* ! notyet */ /* * This is totally bogus: if we faulted, we're going to clear @@ -181,10 +196,9 @@ dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, void dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) { - klwp_t *lwp = ttolwp(curthread); proc_t *p = curproc; - struct regs *rp; - uintptr_t pc, sp; + struct trapframe *tf; + uintptr_t pc, sp, fp; volatile uint16_t *flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; int n; @@ -198,7 +212,7 @@ dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) /* * If there's no user context we still need to zero the stack. */ - if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL) + if (p == NULL || (tf = curthread->td_frame) == NULL) goto zero; *pcstack++ = (uint64_t)p->p_pid; @@ -207,19 +221,26 @@ dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) if (pcstack_limit <= 0) return; - pc = rp->r_pc; - sp = rp->r_fp; + pc = tf->tf_eip; + fp = tf->tf_ebp; + sp = tf->tf_esp; if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { + /* + * In an entry probe. The frame pointer has not yet been + * pushed (that happens in the function prologue). The + * best approach is to add the current pc as a missing top + * of stack and back the pc up to the caller, which is stored + * at the current stack pointer address since the call + * instruction puts it there right before the branch. + */ + *pcstack++ = (uint64_t)pc; pcstack_limit--; if (pcstack_limit <= 0) return; - if (p->p_model == DATAMODEL_NATIVE) - pc = dtrace_fulword((void *)rp->r_sp); - else - pc = dtrace_fuword32((void *)rp->r_sp); + pc = dtrace_fuword32((void *) sp); } n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp); @@ -231,24 +252,58 @@ dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) zero: while (pcstack_limit-- > 0) - *pcstack++ = NULL; + *pcstack++ = 0; } int dtrace_getustackdepth(void) { + proc_t *p = curproc; + struct trapframe *tf; + uintptr_t pc, fp, sp; + int n = 0; + + if (p == NULL || (tf = curthread->td_frame) == NULL) + return (0); + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) + return (-1); + + pc = tf->tf_eip; + fp = tf->tf_ebp; + sp = tf->tf_esp; + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { + /* + * In an entry probe. The frame pointer has not yet been + * pushed (that happens in the function prologue). The + * best approach is to add the current pc as a missing top + * of stack and back the pc up to the caller, which is stored + * at the current stack pointer address since the call + * instruction puts it there right before the branch. + */ + + pc = dtrace_fuword32((void *) sp); + n++; + } + + n += dtrace_getustack_common(NULL, 0, pc, fp); + + return (n); } void dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) { - klwp_t *lwp = ttolwp(curthread); proc_t *p = curproc; - struct regs *rp; - uintptr_t pc, sp, oldcontext; + struct trapframe *tf; + uintptr_t pc, sp, fp; volatile uint16_t *flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; +#ifdef notyet /* XXX signal stack */ + uintptr_t oldcontext; size_t s1, s2; +#endif if (*flags & CPU_DTRACE_FAULT) return; @@ -259,7 +314,7 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) /* * If there's no user context we still need to zero the stack. */ - if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL) + if (p == NULL || (tf = curthread->td_frame) == NULL) goto zero; *pcstack++ = (uint64_t)p->p_pid; @@ -268,8 +323,11 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) if (pcstack_limit <= 0) return; - pc = rp->r_pc; - sp = rp->r_fp; + pc = tf->tf_eip; + fp = tf->tf_ebp; + sp = tf->tf_esp; + +#ifdef notyet /* XXX signal stack */ oldcontext = lwp->lwp_oldcontext; if (p->p_model == DATAMODEL_NATIVE) { @@ -279,6 +337,7 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) s1 = sizeof (struct frame32) + 3 * sizeof (int); s2 = s1 + sizeof (siginfo32_t); } +#endif if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { *pcstack++ = (uint64_t)pc; @@ -287,19 +346,20 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) if (pcstack_limit <= 0) return; - if (p->p_model == DATAMODEL_NATIVE) - pc = dtrace_fulword((void *)rp->r_sp); - else - pc = dtrace_fuword32((void *)rp->r_sp); + pc = dtrace_fuword32((void *)sp); } - while (pc != 0 && sp != 0) { + while (pc != 0) { *pcstack++ = (uint64_t)pc; - *fpstack++ = sp; + *fpstack++ = fp; pcstack_limit--; if (pcstack_limit <= 0) break; + if (fp == 0) + break; + +#ifdef notyet /* XXX signal stack */ if (oldcontext == sp + s1 || oldcontext == sp + s2) { if (p->p_model == DATAMODEL_NATIVE) { ucontext_t *ucp = (ucontext_t *)oldcontext; @@ -318,18 +378,12 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) oldcontext = dtrace_fuword32(&ucp->uc_link); } - } else { - if (p->p_model == DATAMODEL_NATIVE) { - struct frame *fr = (struct frame *)sp; - - pc = dtrace_fulword(&fr->fr_savpc); - sp = dtrace_fulword(&fr->fr_savfp); - } else { - struct frame32 *fr = (struct frame32 *)sp; - - pc = dtrace_fuword32(&fr->fr_savpc); - sp = dtrace_fuword32(&fr->fr_savfp); - } + } else +#endif /* XXX */ + { + pc = dtrace_fuword32((void *)(fp + + offsetof(struct i386_frame, f_retaddr))); + fp = dtrace_fuword32((void *)fp); } /* @@ -345,9 +399,8 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) zero: while (pcstack_limit-- > 0) - *pcstack++ = NULL; + *pcstack++ = 0; } -#endif uint64_t dtrace_getarg(int arg, int aframes) @@ -424,112 +477,92 @@ dtrace_getstackdepth(int aframes) return depth - aframes; } -#ifdef notyet ulong_t -dtrace_getreg(struct regs *rp, uint_t reg) +dtrace_getreg(struct trapframe *rp, uint_t reg) { -#if defined(__amd64) - int regmap[] = { - REG_GS, /* GS */ - REG_FS, /* FS */ - REG_ES, /* ES */ - REG_DS, /* DS */ - REG_RDI, /* EDI */ - REG_RSI, /* ESI */ - REG_RBP, /* EBP */ - REG_RSP, /* ESP */ - REG_RBX, /* EBX */ - REG_RDX, /* EDX */ - REG_RCX, /* ECX */ - REG_RAX, /* EAX */ - REG_TRAPNO, /* TRAPNO */ - REG_ERR, /* ERR */ - REG_RIP, /* EIP */ - REG_CS, /* CS */ - REG_RFL, /* EFL */ - REG_RSP, /* UESP */ - REG_SS /* SS */ + struct pcb *pcb; + int regmap[] = { /* Order is dependent on reg.d */ + REG_GS, /* 0 GS */ + REG_FS, /* 1 FS */ + REG_ES, /* 2 ES */ + REG_DS, /* 3 DS */ + REG_RDI, /* 4 EDI */ + REG_RSI, /* 5 ESI */ + REG_RBP, /* 6 EBP, REG_FP */ + REG_RSP, /* 7 ESP */ + REG_RBX, /* 8 EBX */ + REG_RDX, /* 9 EDX, REG_R1 */ + REG_RCX, /* 10 ECX */ + REG_RAX, /* 11 EAX, REG_R0 */ + REG_TRAPNO, /* 12 TRAPNO */ + REG_ERR, /* 13 ERR */ + REG_RIP, /* 14 EIP, REG_PC */ + REG_CS, /* 15 CS */ + REG_RFL, /* 16 EFL, REG_PS */ + REG_RSP, /* 17 UESP, REG_SP */ + REG_SS /* 18 SS */ }; - if (reg <= SS) { - if (reg >= sizeof (regmap) / sizeof (int)) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); - return (0); - } + if (reg > SS) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } - reg = regmap[reg]; - } else { - reg -= SS + 1; + if (reg >= sizeof (regmap) / sizeof (int)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); } - switch (reg) { + reg = regmap[reg]; + + switch(reg) { + case REG_GS: + if ((pcb = curthread->td_pcb) == NULL) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + return (pcb->pcb_gs); + case REG_FS: + return (rp->tf_fs); + case REG_ES: + return (rp->tf_es); + case REG_DS: + return (rp->tf_ds); case REG_RDI: - return (rp->r_rdi); + return (rp->tf_edi); case REG_RSI: - return (rp->r_rsi); - case REG_RDX: - return (rp->r_rdx); + return (rp->tf_esi); + case REG_RBP: + return (rp->tf_ebp); + case REG_RSP: + return (rp->tf_isp); + case REG_RBX: + return (rp->tf_ebx); case REG_RCX: - return (rp->r_rcx); - case REG_R8: - return (rp->r_r8); - case REG_R9: - return (rp->r_r9); + return (rp->tf_ecx); case REG_RAX: - return (rp->r_rax); - case REG_RBX: - return (rp->r_rbx); - case REG_RBP: - return (rp->r_rbp); - case REG_R10: - return (rp->r_r10); - case REG_R11: - return (rp->r_r11); - case REG_R12: - return (rp->r_r12); - case REG_R13: - return (rp->r_r13); - case REG_R14: - return (rp->r_r14); - case REG_R15: - return (rp->r_r15); - case REG_DS: - return (rp->r_ds); - case REG_ES: - return (rp->r_es); - case REG_FS: - return (rp->r_fs); - case REG_GS: - return (rp->r_gs); + return (rp->tf_eax); case REG_TRAPNO: - return (rp->r_trapno); + return (rp->tf_trapno); case REG_ERR: - return (rp->r_err); + return (rp->tf_err); case REG_RIP: - return (rp->r_rip); + return (rp->tf_eip); case REG_CS: - return (rp->r_cs); - case REG_SS: - return (rp->r_ss); + return (rp->tf_cs); case REG_RFL: - return (rp->r_rfl); + return (rp->tf_eflags); +#if 0 case REG_RSP: - return (rp->r_rsp); + return (rp->tf_esp); +#endif + case REG_SS: + return (rp->tf_ss); default: DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return (0); } - -#else - if (reg > SS) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); - return (0); - } - - return ((&rp->r_gs)[reg]); -#endif } -#endif static int dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size) diff --git a/sys/cddl/dev/dtrace/i386/instr_size.c b/sys/cddl/dev/dtrace/i386/instr_size.c index fb6af2d0a..7f667f7e5 100644 --- a/sys/cddl/dev/dtrace/i386/instr_size.c +++ b/sys/cddl/dev/dtrace/i386/instr_size.c @@ -47,6 +47,7 @@ typedef u_int model_t; #define DATAMODEL_NATIVE 0 int dtrace_instr_size(uchar_t *); +int dtrace_instr_size_isa(uchar_t *, model_t, int *); #endif #include @@ -124,6 +125,12 @@ dtrace_dis_isize(uchar_t *instr, dis_isize_t which, model_t model, int *rmindex) return (sz); } +int +dtrace_instr_size_isa(uchar_t *instr, model_t model, int *rmindex) +{ + return (dtrace_dis_isize(instr, DIS_ISIZE_INSTR, model, rmindex)); +} + int dtrace_instr_size(uchar_t *instr) { diff --git a/sys/cddl/dev/dtrace/i386/regset.h b/sys/cddl/dev/dtrace/i386/regset.h new file mode 100644 index 000000000..d6732ff5e --- /dev/null +++ b/sys/cddl/dev/dtrace/i386/regset.h @@ -0,0 +1,127 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +#ifndef _REGSET_H +#define _REGSET_H + +/* + * #pragma ident "@(#)regset.h 1.11 05/06/08 SMI" + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The names and offsets defined here should be specified by the + * AMD64 ABI suppl. + * + * We make fsbase and gsbase part of the lwp context (since they're + * the only way to access the full 64-bit address range via the segment + * registers) and thus belong here too. However we treat them as + * read-only; if %fs or %gs are updated, the results of the descriptor + * table lookup that those updates implicitly cause will be reflected + * in the corresponding fsbase and/or gsbase values the next time the + * context can be inspected. However it is NOT possible to override + * the fsbase/gsbase settings via this interface. + * + * Direct modification of the base registers (thus overriding the + * descriptor table base address) can be achieved with _lwp_setprivate. + */ + +#define REG_GSBASE 27 +#define REG_FSBASE 26 +#define REG_DS 25 +#define REG_ES 24 + +#define REG_GS 23 +#define REG_FS 22 +#define REG_SS 21 +#define REG_RSP 20 +#define REG_RFL 19 +#define REG_CS 18 +#define REG_RIP 17 +#define REG_ERR 16 +#define REG_TRAPNO 15 +#define REG_RAX 14 +#define REG_RCX 13 +#define REG_RDX 12 +#define REG_RBX 11 +#define REG_RBP 10 +#define REG_RSI 9 +#define REG_RDI 8 +#define REG_R8 7 +#define REG_R9 6 +#define REG_R10 5 +#define REG_R11 4 +#define REG_R12 3 +#define REG_R13 2 +#define REG_R14 1 +#define REG_R15 0 + +/* + * The names and offsets defined here are specified by i386 ABI suppl. + */ + +#define SS 18 /* only stored on a privilege transition */ +#define UESP 17 /* only stored on a privilege transition */ +#define EFL 16 +#define CS 15 +#define EIP 14 +#define ERR 13 +#define TRAPNO 12 +#define EAX 11 +#define ECX 10 +#define EDX 9 +#define EBX 8 +#define ESP 7 +#define EBP 6 +#define ESI 5 +#define EDI 4 +#define DS 3 +#define ES 2 +#define FS 1 +#define GS 0 + +#define REG_PC EIP +#define REG_FP EBP +#define REG_SP UESP +#define REG_PS EFL +#define REG_R0 EAX +#define REG_R1 EDX + +#ifdef __cplusplus +} +#endif + +#endif /* _REGSET_H */ diff --git a/sys/cddl/dev/systrace/systrace.c b/sys/cddl/dev/systrace/systrace.c index 72c25e4e2..3e992e3a5 100644 --- a/sys/cddl/dev/systrace/systrace.c +++ b/sys/cddl/dev/systrace/systrace.c @@ -153,22 +153,24 @@ static dtrace_provider_id_t systrace_id; * compat syscall from something like Linux. */ static void -systrace_probe(u_int32_t id, int sysnum, struct sysent *sysent, void *params) +systrace_probe(u_int32_t id, int sysnum, struct sysent *sysent, void *params, + int ret) { int n_args = 0; u_int64_t uargs[8]; + memset(uargs, 0, sizeof(uargs)); /* * Check if this syscall has an argument conversion function * registered. */ - if (sysent->sy_systrace_args_func != NULL) + if (params && sysent->sy_systrace_args_func != NULL) { /* * Convert the syscall parameters using the registered * function. */ (*sysent->sy_systrace_args_func)(sysnum, params, uargs, &n_args); - else + } else if (params) { /* * Use the built-in system call argument conversion * function to translate the syscall structure fields @@ -176,6 +178,13 @@ systrace_probe(u_int32_t id, int sysnum, struct sysent *sysent, void *params) * expects. */ systrace_args(sysnum, params, uargs, &n_args); + } else { + /* + * Since params is NULL, this is a 'return' probe. + * Set arg0 and arg1 as the return value of this syscall. + */ + uargs[0] = uargs[1] = ret; + } /* Process the probe using the converted argments. */ dtrace_probe(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]); diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s index 575b98dcc..13562fa6e 100644 --- a/sys/i386/i386/exception.s +++ b/sys/i386/i386/exception.s @@ -108,6 +108,8 @@ IDTVEC(nmi) pushl $0; TRAP(T_NMI) IDTVEC(bpt) pushl $0; TRAP(T_BPTFLT) +IDTVEC(dtrace_ret) + pushl $0; TRAP(T_DTRACE_RET) IDTVEC(ofl) pushl $0; TRAP(T_OFLOW) IDTVEC(bnd) diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index a7c84368a..36ac1de47 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include "opt_npx.h" #include "opt_perfmon.h" #include "opt_xbox.h" +#include "opt_kdtrace.h" #include #include @@ -1888,7 +1889,11 @@ extern inthand_t IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), - IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); + IDTVEC(xmm), +#ifdef KDTRACE_HOOKS + IDTVEC(dtrace_ret), +#endif + IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); #ifdef DDB /* @@ -2843,6 +2848,10 @@ init386(first) GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); +#ifdef KDTRACE_HOOKS + setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL, + GSEL(GCODE_SEL, SEL_KPL)); +#endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 45e92a0f9..25ac3bfd1 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -122,6 +122,13 @@ dtrace_doubletrap_func_t dtrace_doubletrap_func; * implementation opaque. */ systrace_probe_func_t systrace_probe_func; + +/* + * These hooks are necessary for the pid, usdt and fasttrap providers. + */ +dtrace_fasttrap_probe_ptr_t dtrace_fasttrap_probe_ptr; +dtrace_pid_probe_ptr_t dtrace_pid_probe_ptr; +dtrace_return_probe_ptr_t dtrace_return_probe_ptr; #endif extern void trap(struct trapframe *frame); @@ -264,6 +271,38 @@ trap(struct trapframe *frame) dtrace_trap_func != NULL) if ((*dtrace_trap_func)(frame, type)) goto out; + if (type == T_DTRACE_PROBE || type == T_DTRACE_RET || + type == T_BPTFLT) { + struct reg regs; + + regs.r_fs = frame->tf_fs; + regs.r_es = frame->tf_es; + regs.r_ds = frame->tf_ds; + regs.r_edi = frame->tf_edi; + regs.r_esi = frame->tf_esi; + regs.r_ebp = frame->tf_ebp; + regs.r_ebx = frame->tf_ebx; + regs.r_edx = frame->tf_edx; + regs.r_ecx = frame->tf_ecx; + regs.r_eax = frame->tf_eax; + regs.r_eip = frame->tf_eip; + regs.r_cs = frame->tf_cs; + regs.r_eflags = frame->tf_eflags; + regs.r_esp = frame->tf_esp; + regs.r_ss = frame->tf_ss; + if (type == T_DTRACE_PROBE && + dtrace_fasttrap_probe_ptr != NULL && + dtrace_fasttrap_probe_ptr(®s) == 0) + goto out; + if (type == T_BPTFLT && + dtrace_pid_probe_ptr != NULL && + dtrace_pid_probe_ptr(®s) == 0) + goto out; + if (type == T_DTRACE_RET && + dtrace_return_probe_ptr != NULL && + dtrace_return_probe_ptr(®s) == 0) + goto out; + } #endif if ((frame->tf_eflags & PSL_I) == 0) { diff --git a/sys/i386/include/segments.h b/sys/i386/include/segments.h index bfc3bb9c8..1c1ddee86 100644 --- a/sys/i386/include/segments.h +++ b/sys/i386/include/segments.h @@ -75,6 +75,13 @@ struct segment_descriptor { unsigned sd_hibase:8 ; /* segment base address (msb) */ } ; +#define USD_GETBASE(sd) (((sd)->sd_lobase) | (sd)->sd_hibase << 24) +#define USD_SETBASE(sd, b) (sd)->sd_lobase = (b); \ + (sd)->sd_hibase = ((b) >> 24); +#define USD_GETLIMIT(sd) (((sd)->sd_lolimit) | (sd)->sd_hilimit << 16) +#define USD_SETLIMIT(sd, l) (sd)->sd_lolimit = (l); \ + (sd)->sd_hilimit = ((l) >> 16); + /* * Gate descriptors (e.g. indirect descriptors) */ @@ -200,6 +207,7 @@ struct region_descriptor { #define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */ #define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */ #define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */ +#define IDT_DTRACE_RET 0x92 /* DTrace pid provider Interrupt Vector */ /* * Entries in the Global Descriptor Table (GDT) diff --git a/sys/i386/include/trap.h b/sys/i386/include/trap.h index f0176b2c9..d8e36b5aa 100644 --- a/sys/i386/include/trap.h +++ b/sys/i386/include/trap.h @@ -62,6 +62,8 @@ #define T_MCHK 28 /* machine check trap */ #define T_XMMFLT 29 /* SIMD floating-point exception */ #define T_RESERVED 30 /* reserved (unknown) */ +#define T_DTRACE_RET 31 /* DTrace pid return */ +#define T_DTRACE_PROBE 32 /* DTrace fasttrap probe */ /* XXX most of the following codes aren't used, but could be. */ diff --git a/sys/kern/kern_dtrace.c b/sys/kern/kern_dtrace.c index 145535201..603b4cf8c 100644 --- a/sys/kern/kern_dtrace.c +++ b/sys/kern/kern_dtrace.c @@ -39,9 +39,7 @@ __FBSDID("$FreeBSD$"); #include #define KDTRACE_PROC_SIZE 64 -#define KDTRACE_PROC_ZERO 8 #define KDTRACE_THREAD_SIZE 256 -#define KDTRACE_THREAD_ZERO 64 MALLOC_DEFINE(M_KDTRACE, "kdtrace", "DTrace hooks"); @@ -49,20 +47,21 @@ MALLOC_DEFINE(M_KDTRACE, "kdtrace", "DTrace hooks"); size_t kdtrace_proc_size() { - return(KDTRACE_PROC_SIZE); + + return (KDTRACE_PROC_SIZE); } static void kdtrace_proc_ctor(void *arg __unused, struct proc *p) { - p->p_dtrace = malloc(KDTRACE_PROC_SIZE, M_KDTRACE, M_WAITOK); - bzero(p->p_dtrace, KDTRACE_PROC_ZERO); + p->p_dtrace = malloc(KDTRACE_PROC_SIZE, M_KDTRACE, M_WAITOK|M_ZERO); } static void kdtrace_proc_dtor(void *arg __unused, struct proc *p) { + if (p->p_dtrace != NULL) { free(p->p_dtrace, M_KDTRACE); p->p_dtrace = NULL; @@ -73,20 +72,21 @@ kdtrace_proc_dtor(void *arg __unused, struct proc *p) size_t kdtrace_thread_size() { - return(KDTRACE_THREAD_SIZE); + + return (KDTRACE_THREAD_SIZE); } static void kdtrace_thread_ctor(void *arg __unused, struct thread *td) { - td->td_dtrace = malloc(KDTRACE_THREAD_SIZE, M_KDTRACE, M_WAITOK); - bzero(td->td_dtrace, KDTRACE_THREAD_ZERO); + td->td_dtrace = malloc(KDTRACE_THREAD_SIZE, M_KDTRACE, M_WAITOK|M_ZERO); } static void kdtrace_thread_dtor(void *arg __unused, struct thread *td) { + if (td->td_dtrace != NULL) { free(td->td_dtrace, M_KDTRACE); td->td_dtrace = NULL; @@ -99,10 +99,15 @@ kdtrace_thread_dtor(void *arg __unused, struct thread *td) static void init_dtrace(void *dummy __unused) { - EVENTHANDLER_REGISTER(process_ctor, kdtrace_proc_ctor, NULL, EVENTHANDLER_PRI_ANY); - EVENTHANDLER_REGISTER(process_dtor, kdtrace_proc_dtor, NULL, EVENTHANDLER_PRI_ANY); - EVENTHANDLER_REGISTER(thread_ctor, kdtrace_thread_ctor, NULL, EVENTHANDLER_PRI_ANY); - EVENTHANDLER_REGISTER(thread_dtor, kdtrace_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); + + EVENTHANDLER_REGISTER(process_ctor, kdtrace_proc_ctor, NULL, + EVENTHANDLER_PRI_ANY); + EVENTHANDLER_REGISTER(process_dtor, kdtrace_proc_dtor, NULL, + EVENTHANDLER_PRI_ANY); + EVENTHANDLER_REGISTER(thread_ctor, kdtrace_thread_ctor, NULL, + EVENTHANDLER_PRI_ANY); + EVENTHANDLER_REGISTER(thread_dtor, kdtrace_thread_dtor, NULL, + EVENTHANDLER_PRI_ANY); } SYSINIT(kdtrace, SI_SUB_KDTRACE, SI_ORDER_FIRST, init_dtrace, NULL); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index f9daff15a..45a832759 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -91,11 +91,11 @@ dtrace_execexit_func_t dtrace_fasttrap_exec; #endif SDT_PROVIDER_DECLARE(proc); -SDT_PROBE_DEFINE(proc, kernel, , exec); +SDT_PROBE_DEFINE(proc, kernel, , exec, exec); SDT_PROBE_ARGTYPE(proc, kernel, , exec, 0, "char *"); -SDT_PROBE_DEFINE(proc, kernel, , exec_failure); +SDT_PROBE_DEFINE(proc, kernel, , exec_failure, exec-failure); SDT_PROBE_ARGTYPE(proc, kernel, , exec_failure, 0, "int"); -SDT_PROBE_DEFINE(proc, kernel, , exec_success); +SDT_PROBE_DEFINE(proc, kernel, , exec_success, exec-success); SDT_PROBE_ARGTYPE(proc, kernel, , exec_success, 0, "char *"); MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index abd980dd8..53d33f5b6 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -90,7 +90,7 @@ dtrace_execexit_func_t dtrace_fasttrap_exit; #endif SDT_PROVIDER_DECLARE(proc); -SDT_PROBE_DEFINE(proc, kernel, , exit); +SDT_PROBE_DEFINE(proc, kernel, , exit, exit); SDT_PROBE_ARGTYPE(proc, kernel, , exit, 0, "int"); /* Required to be non-static for SysVR4 emulator */ diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 4084505ab..6ab99c60c 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -84,7 +84,7 @@ dtrace_fork_func_t dtrace_fasttrap_fork; #endif SDT_PROVIDER_DECLARE(proc); -SDT_PROBE_DEFINE(proc, kernel, , create); +SDT_PROBE_DEFINE(proc, kernel, , create, create); SDT_PROBE_ARGTYPE(proc, kernel, , create, 0, "struct proc *"); SDT_PROBE_ARGTYPE(proc, kernel, , create, 1, "struct proc *"); SDT_PROBE_ARGTYPE(proc, kernel, , create, 2, "int"); @@ -662,15 +662,6 @@ again: p2->p_pfsflags = p1->p_pfsflags; } -#ifdef KDTRACE_HOOKS - /* - * Tell the DTrace fasttrap provider about the new process - * if it has registered an interest. - */ - if (dtrace_fasttrap_fork) - dtrace_fasttrap_fork(p1, p2); -#endif - /* * This begins the section where we must prevent the parent * from being swapped. @@ -735,6 +726,21 @@ again: PROC_SLOCK(p2); p2->p_state = PRS_NORMAL; PROC_SUNLOCK(p2); +#ifdef KDTRACE_HOOKS + /* + * Tell the DTrace fasttrap provider about the new process + * if it has registered an interest. We have to do this only after + * p_state is PRS_NORMAL since the fasttrap module will use pfind() + * later on. + */ + if (dtrace_fasttrap_fork) { + PROC_LOCK(p1); + PROC_LOCK(p2); + dtrace_fasttrap_fork(p1, p2); + PROC_UNLOCK(p2); + PROC_UNLOCK(p1); + } +#endif PROC_LOCK(p1); if ((p1->p_flag & (P_TRACED | P_FOLLOWFORK)) == (P_TRACED | diff --git a/sys/kern/kern_priv.c b/sys/kern/kern_priv.c index 352e50233..fd3a95c57 100644 --- a/sys/kern/kern_priv.c +++ b/sys/kern/kern_priv.c @@ -60,8 +60,8 @@ SYSCTL_INT(_security_bsd, OID_AUTO, suser_enabled, CTLFLAG_RW, TUNABLE_INT("security.bsd.suser_enabled", &suser_enabled); SDT_PROVIDER_DEFINE(priv); -SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv_ok, "int"); -SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv_err, "int"); +SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv_ok, priv-ok, "int"); +SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv_err, priv-err, "int"); /* * Check a credential for privilege. Lots of good reasons to deny privilege; diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 045b3849f..314a08f7c 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -82,30 +82,30 @@ __FBSDID("$FreeBSD$"); #endif SDT_PROVIDER_DEFINE(proc); -SDT_PROBE_DEFINE(proc, kernel, ctor, entry); +SDT_PROBE_DEFINE(proc, kernel, ctor, entry, entry); SDT_PROBE_ARGTYPE(proc, kernel, ctor, entry, 0, "struct proc *"); SDT_PROBE_ARGTYPE(proc, kernel, ctor, entry, 1, "int"); SDT_PROBE_ARGTYPE(proc, kernel, ctor, entry, 2, "void *"); SDT_PROBE_ARGTYPE(proc, kernel, ctor, entry, 3, "int"); -SDT_PROBE_DEFINE(proc, kernel, ctor, return); +SDT_PROBE_DEFINE(proc, kernel, ctor, return, return); SDT_PROBE_ARGTYPE(proc, kernel, ctor, return, 0, "struct proc *"); SDT_PROBE_ARGTYPE(proc, kernel, ctor, return, 1, "int"); SDT_PROBE_ARGTYPE(proc, kernel, ctor, return, 2, "void *"); SDT_PROBE_ARGTYPE(proc, kernel, ctor, return, 3, "int"); -SDT_PROBE_DEFINE(proc, kernel, dtor, entry); +SDT_PROBE_DEFINE(proc, kernel, dtor, entry, entry); SDT_PROBE_ARGTYPE(proc, kernel, dtor, entry, 0, "struct proc *"); SDT_PROBE_ARGTYPE(proc, kernel, dtor, entry, 1, "int"); SDT_PROBE_ARGTYPE(proc, kernel, dtor, entry, 2, "void *"); SDT_PROBE_ARGTYPE(proc, kernel, dtor, entry, 3, "struct thread *"); -SDT_PROBE_DEFINE(proc, kernel, dtor, return); +SDT_PROBE_DEFINE(proc, kernel, dtor, return, return); SDT_PROBE_ARGTYPE(proc, kernel, dtor, return, 0, "struct proc *"); SDT_PROBE_ARGTYPE(proc, kernel, dtor, return, 1, "int"); SDT_PROBE_ARGTYPE(proc, kernel, dtor, return, 2, "void *"); -SDT_PROBE_DEFINE(proc, kernel, init, entry); +SDT_PROBE_DEFINE(proc, kernel, init, entry, entry); SDT_PROBE_ARGTYPE(proc, kernel, init, entry, 0, "struct proc *"); SDT_PROBE_ARGTYPE(proc, kernel, init, entry, 1, "int"); SDT_PROBE_ARGTYPE(proc, kernel, init, entry, 2, "int"); -SDT_PROBE_DEFINE(proc, kernel, init, return); +SDT_PROBE_DEFINE(proc, kernel, init, return, return); SDT_PROBE_ARGTYPE(proc, kernel, init, return, 0, "struct proc *"); SDT_PROBE_ARGTYPE(proc, kernel, init, return, 1, "int"); SDT_PROBE_ARGTYPE(proc, kernel, init, return, 2, "int"); diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 0b4111d56..14e517cd8 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -85,14 +85,14 @@ __FBSDID("$FreeBSD$"); #define ONSIG 32 /* NSIG for osig* syscalls. XXX. */ SDT_PROVIDER_DECLARE(proc); -SDT_PROBE_DEFINE(proc, kernel, , signal_send); +SDT_PROBE_DEFINE(proc, kernel, , signal_send, signal-send); SDT_PROBE_ARGTYPE(proc, kernel, , signal_send, 0, "struct thread *"); SDT_PROBE_ARGTYPE(proc, kernel, , signal_send, 1, "struct proc *"); SDT_PROBE_ARGTYPE(proc, kernel, , signal_send, 2, "int"); -SDT_PROBE_DEFINE(proc, kernel, , signal_clear); +SDT_PROBE_DEFINE(proc, kernel, , signal_clear, signal-clear); SDT_PROBE_ARGTYPE(proc, kernel, , signal_clear, 0, "int"); SDT_PROBE_ARGTYPE(proc, kernel, , signal_clear, 1, "ksiginfo_t *"); -SDT_PROBE_DEFINE(proc, kernel, , signal_discard); +SDT_PROBE_DEFINE(proc, kernel, , signal_discard, signal-discard); SDT_PROBE_ARGTYPE(proc, kernel, , signal_discard, 0, "struct thread *"); SDT_PROBE_ARGTYPE(proc, kernel, , signal_discard, 1, "struct proc *"); SDT_PROBE_ARGTYPE(proc, kernel, , signal_discard, 2, "int"); diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index 78b094928..32d569181 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -57,10 +57,10 @@ __FBSDID("$FreeBSD$"); #include SDT_PROVIDER_DEFINE(callout_execute); -SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start); +SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start, callout-start); SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0, "struct callout *"); -SDT_PROBE_DEFINE(callout_execute, kernel, , callout_end); +SDT_PROBE_DEFINE(callout_execute, kernel, , callout_end, callout-end); SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0, "struct callout *"); diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index fc3eafdc4..76c473811 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -308,7 +308,7 @@ syscallenter(struct thread *td, struct syscall_args *sa) */ if (systrace_probe_func != NULL && sa->callp->sy_entry != 0) (*systrace_probe_func)(sa->callp->sy_entry, sa->code, - sa->callp, sa->args); + sa->callp, sa->args, 0); #endif AUDIT_SYSCALL_ENTER(sa->code, td); @@ -326,7 +326,7 @@ syscallenter(struct thread *td, struct syscall_args *sa) */ if (systrace_probe_func != NULL && sa->callp->sy_return != 0) (*systrace_probe_func)(sa->callp->sy_return, sa->code, - sa->callp, sa->args); + sa->callp, NULL, (error) ? -1 : td->td_retval[0]); #endif CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx", p, error, td->td_retval[0], td->td_retval[1]); diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index a82702f68..a39aad002 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -135,7 +135,7 @@ __FBSDID("$FreeBSD$"); #define WITNESS_COUNT 1024 #define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4) #define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ -#define WITNESS_PENDLIST 512 +#define WITNESS_PENDLIST 768 /* Allocate 256 KB of stack data space */ #define WITNESS_LO_DATA_COUNT 2048 diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 0d0a70577..4ca5eeb22 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -61,28 +61,28 @@ __FBSDID("$FreeBSD$"); #include SDT_PROVIDER_DECLARE(vfs); -SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *", +SDT_PROBE_DEFINE3(vfs, namecache, enter, done, done, "struct vnode *", "char *", "struct vnode *"); -SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *", +SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, done, "struct vnode *", "char *"); -SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *"); -SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *", +SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, entry, "struct vnode *"); +SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, hit, "struct vnode *", "struct char *", "struct vnode *"); -SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, "struct vnode *"); -SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, "int", "struct vnode *", - "struct char *"); -SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", "char *", +SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, miss, "struct vnode *"); +SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, return, "int", + "struct vnode *", "struct char *"); +SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, hit, "struct vnode *", "char *", "struct vnode *"); -SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit_negative, "struct vnode *", +SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit_negative, hit-negative, + "struct vnode *", "char *"); +SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, miss, "struct vnode *", "char *"); -SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, "struct vnode *", - "char *"); -SDT_PROBE_DEFINE1(vfs, namecache, purge, done, "struct vnode *"); -SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, "struct vnode *"); -SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *"); -SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *", +SDT_PROBE_DEFINE1(vfs, namecache, purge, done, done, "struct vnode *"); +SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, done, "struct vnode *"); +SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, done, "struct mount *"); +SDT_PROBE_DEFINE3(vfs, namecache, zap, done, done, "struct vnode *", "char *", "struct vnode *"); -SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *", +SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, done, "struct vnode *", "char *"); /* diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 5b6ccf6ff..a68e4d23f 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -68,9 +68,9 @@ __FBSDID("$FreeBSD$"); #undef NAMEI_DIAGNOSTIC SDT_PROVIDER_DECLARE(vfs); -SDT_PROBE_DEFINE3(vfs, namei, lookup, entry, "struct vnode *", "char *", +SDT_PROBE_DEFINE3(vfs, namei, lookup, entry, entry, "struct vnode *", "char *", "unsigned long"); -SDT_PROBE_DEFINE2(vfs, namei, lookup, return, "int", "struct vnode *"); +SDT_PROBE_DEFINE2(vfs, namei, lookup, return, return, "int", "struct vnode *"); /* * Allocation zone for namei diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 24de15e25..8cfd27853 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -85,10 +85,10 @@ __FBSDID("$FreeBSD$"); #include SDT_PROVIDER_DEFINE(vfs); -SDT_PROBE_DEFINE(vfs, , stat, mode); +SDT_PROBE_DEFINE(vfs, , stat, mode, mode); SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *"); SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int"); -SDT_PROBE_DEFINE(vfs, , stat, reg); +SDT_PROBE_DEFINE(vfs, , stat, reg, reg); SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *"); SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int"); diff --git a/sys/modules/dtrace/Makefile b/sys/modules/dtrace/Makefile index ba468788b..a85bf3e9c 100644 --- a/sys/modules/dtrace/Makefile +++ b/sys/modules/dtrace/Makefile @@ -14,10 +14,8 @@ SUBDIR= dtmalloc \ sdt \ systrace -.if ${MACHINE_ARCH} == "amd64" -SUBDIR+= fbt -.elif ${MACHINE_ARCH} == "i386" -SUBDIR+= fbt +.if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386" +SUBDIR+= fasttrap fbt .endif .include diff --git a/sys/modules/dtrace/dtrace/Makefile b/sys/modules/dtrace/dtrace/Makefile index 08e61a94c..8d32edb61 100644 --- a/sys/modules/dtrace/dtrace/Makefile +++ b/sys/modules/dtrace/dtrace/Makefile @@ -15,6 +15,7 @@ SRCS= dtrace.c \ .if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386" SRCS+= dis_tables.c \ instr_size.c +CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/uts/intel .endif SRCS+= bus_if.h device_if.h vnode_if.h diff --git a/sys/modules/dtrace/dtraceall/dtraceall.c b/sys/modules/dtrace/dtraceall/dtraceall.c index d8d330fda..ecb5e344b 100644 --- a/sys/modules/dtrace/dtraceall/dtraceall.c +++ b/sys/modules/dtrace/dtraceall/dtraceall.c @@ -68,6 +68,7 @@ MODULE_DEPEND(dtraceall, dtmalloc, 1, 1, 1); MODULE_DEPEND(dtraceall, dtnfsclient, 1, 1, 1); #if defined(__amd64__) || defined(__i386__) MODULE_DEPEND(dtraceall, fbt, 1, 1, 1); +MODULE_DEPEND(dtraceall, fasttrap, 1, 1, 1); #endif MODULE_DEPEND(dtraceall, lockstat, 1, 1, 1); MODULE_DEPEND(dtraceall, sdt, 1, 1, 1); diff --git a/sys/modules/dtrace/fasttrap/Makefile b/sys/modules/dtrace/fasttrap/Makefile index 104fd9af4..40aabd7bd 100644 --- a/sys/modules/dtrace/fasttrap/Makefile +++ b/sys/modules/dtrace/fasttrap/Makefile @@ -1,9 +1,9 @@ # $FreeBSD$ -.PATH: ${.CURDIR}/../../../cddl/dev/fasttrap +.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/uts/common/dtrace KMOD= fasttrap -SRCS= fasttrap.c +SRCS= fasttrap.c fasttrap_isa.c opt_compat.h SRCS+= vnode_if.h CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris \ @@ -12,6 +12,7 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris \ .if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386" CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/uts/intel +.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/uts/intel/dtrace .endif CFLAGS+= -DSMP -DDEBUG diff --git a/sys/net/vnet.c b/sys/net/vnet.c index e6d7cccdc..faa296b83 100644 --- a/sys/net/vnet.c +++ b/sys/net/vnet.c @@ -210,11 +210,15 @@ static TAILQ_HEAD(, vnet_data_free) vnet_data_free_head = static struct sx vnet_data_free_lock; SDT_PROVIDER_DEFINE(vnet); -SDT_PROBE_DEFINE1(vnet, functions, vnet_alloc, entry, "int"); -SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, alloc, "int", "struct vnet *"); -SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, return, "int", "struct vnet *"); -SDT_PROBE_DEFINE2(vnet, functions, vnet_destroy, entry, "int", "struct vnet *"); -SDT_PROBE_DEFINE1(vnet, functions, vnet_destroy, return, "int"); +SDT_PROBE_DEFINE1(vnet, functions, vnet_alloc, entry, entry, "int"); +SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, alloc, alloc, "int", + "struct vnet *"); +SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, return, return, + "int", "struct vnet *"); +SDT_PROBE_DEFINE2(vnet, functions, vnet_destroy, entry, entry, + "int", "struct vnet *"); +SDT_PROBE_DEFINE1(vnet, functions, vnet_destroy, return, entry, + "int"); #ifdef DDB static void db_show_vnet_print_vs(struct vnet_sysinit *, int); diff --git a/sys/opencrypto/deflate.c b/sys/opencrypto/deflate.c index 2113611ae..deaf05273 100644 --- a/sys/opencrypto/deflate.c +++ b/sys/opencrypto/deflate.c @@ -50,13 +50,13 @@ __FBSDID("$FreeBSD$"); #include SDT_PROVIDER_DECLARE(opencrypto); -SDT_PROBE_DEFINE2(opencrypto, deflate, deflate_global, entry, +SDT_PROBE_DEFINE2(opencrypto, deflate, deflate_global, entry, entry, "int", "u_int32_t"); -SDT_PROBE_DEFINE5(opencrypto, deflate, deflate_global, bad, +SDT_PROBE_DEFINE5(opencrypto, deflate, deflate_global, bad, bad, "int", "int", "int", "int", "int"); -SDT_PROBE_DEFINE5(opencrypto, deflate, deflate_global, iter, +SDT_PROBE_DEFINE5(opencrypto, deflate, deflate_global, iter, iter, "int", "int", "int", "int", "int"); -SDT_PROBE_DEFINE2(opencrypto, deflate, deflate_global, return, +SDT_PROBE_DEFINE2(opencrypto, deflate, deflate_global, return, return, "int", "u_int32_t"); int window_inflate = -1 * MAX_WBITS; diff --git a/sys/security/mac/mac_framework.c b/sys/security/mac/mac_framework.c index 1f43c0278..fa069280d 100644 --- a/sys/security/mac/mac_framework.c +++ b/sys/security/mac/mac_framework.c @@ -94,10 +94,12 @@ __FBSDID("$FreeBSD$"); SDT_PROVIDER_DEFINE(mac); SDT_PROVIDER_DEFINE(mac_framework); -SDT_PROBE_DEFINE2(mac, kernel, policy, modevent, "int", +SDT_PROBE_DEFINE2(mac, kernel, policy, modevent, modevent, "int", "struct mac_policy_conf *mpc"); -SDT_PROBE_DEFINE1(mac, kernel, policy, register, "struct mac_policy_conf *"); -SDT_PROBE_DEFINE1(mac, kernel, policy, unregister, "struct mac_policy_conf *"); +SDT_PROBE_DEFINE1(mac, kernel, policy, register, register, + "struct mac_policy_conf *"); +SDT_PROBE_DEFINE1(mac, kernel, policy, unregister, unregister, + "struct mac_policy_conf *"); /* * Root sysctl node for all MAC and MAC policy controls. diff --git a/sys/security/mac/mac_internal.h b/sys/security/mac/mac_internal.h index 39fc4042b..72444772a 100644 --- a/sys/security/mac/mac_internal.h +++ b/sys/security/mac/mac_internal.h @@ -75,27 +75,27 @@ SDT_PROVIDER_DECLARE(mac_framework); /* Entry points to MAC. */ #define MAC_CHECK_PROBE_DEFINE4(name, arg0, arg1, arg2, arg3) \ SDT_PROBE_DEFINE5(mac_framework, kernel, name, mac_check_err, \ - "int", arg0, arg1, arg2, arg3); \ + mac-check-ok, "int", arg0, arg1, arg2, arg3); \ SDT_PROBE_DEFINE5(mac_framework, kernel, name, mac_check_ok, \ - "int", arg0, arg1, arg2, arg3); + mac-check-ok, "int", arg0, arg1, arg2, arg3); #define MAC_CHECK_PROBE_DEFINE3(name, arg0, arg1, arg2) \ SDT_PROBE_DEFINE4(mac_framework, kernel, name, mac_check_err, \ - "int", arg0, arg1, arg2); \ + mac-check-err, "int", arg0, arg1, arg2); \ SDT_PROBE_DEFINE4(mac_framework, kernel, name, mac_check_ok, \ - "int", arg0, arg1, arg2); + mac-check-ok, "int", arg0, arg1, arg2); #define MAC_CHECK_PROBE_DEFINE2(name, arg0, arg1) \ SDT_PROBE_DEFINE3(mac_framework, kernel, name, mac_check_err, \ - "int", arg0, arg1); \ + mac-check-err, "int", arg0, arg1); \ SDT_PROBE_DEFINE3(mac_framework, kernel, name, mac_check_ok, \ - "int", arg0, arg1); + mac-check-ok, "int", arg0, arg1); #define MAC_CHECK_PROBE_DEFINE1(name, arg0) \ SDT_PROBE_DEFINE2(mac_framework, kernel, name, mac_check_err, \ - "int", arg0); \ + mac-check-err, "int", arg0); \ SDT_PROBE_DEFINE2(mac_framework, kernel, name, mac_check_ok, \ - "int", arg0); + mac-check-ok, "int", arg0); #define MAC_CHECK_PROBE4(name, error, arg0, arg1, arg2, arg3) do { \ if (error) { \ @@ -117,9 +117,9 @@ SDT_PROVIDER_DECLARE(mac_framework); /* Entry points to MAC. */ #define MAC_GRANT_PROBE_DEFINE2(name, arg0, arg1) \ SDT_PROBE_DEFINE3(mac_framework, kernel, name, mac_grant_err, \ - "int", arg0, arg1); \ + mac-grant-err, "int", arg0, arg1); \ SDT_PROBE_DEFINE3(mac_framework, kernel, name, mac_grant_ok, \ - "INT", arg0, arg1); + mac-grant-ok, "INT", arg0, arg1); #define MAC_GRANT_PROBE2(name, error, arg0, arg1) do { \ if (error) { \ diff --git a/sys/sys/dtrace_bsd.h b/sys/sys/dtrace_bsd.h index a14a1a160..2eded7b68 100644 --- a/sys/sys/dtrace_bsd.h +++ b/sys/sys/dtrace_bsd.h @@ -37,6 +37,7 @@ struct trapframe; struct thread; struct vattr; struct vnode; +struct reg; /* * Cyclic clock function type definition used to hook the cyclic @@ -71,6 +72,14 @@ typedef void (*dtrace_doubletrap_func_t)(void); extern dtrace_invop_func_t dtrace_invop_func; extern dtrace_doubletrap_func_t dtrace_doubletrap_func; +/* Pid provider hooks */ +typedef int (*dtrace_fasttrap_probe_ptr_t)(struct reg *); +extern dtrace_fasttrap_probe_ptr_t dtrace_fasttrap_probe_ptr; +typedef int (*dtrace_pid_probe_ptr_t)(struct reg *); +extern dtrace_pid_probe_ptr_t dtrace_pid_probe_ptr; +typedef int (*dtrace_return_probe_ptr_t)(struct reg *); +extern dtrace_return_probe_ptr_t dtrace_return_probe_ptr; + /* Virtual time hook function type. */ typedef void (*dtrace_vtime_switch_func_t)(struct thread *); diff --git a/sys/sys/param.h b/sys/sys/param.h index 0a97039ef..dac0afb0e 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 802500 /* Master, propagated to newvers */ +#define __FreeBSD_version 802501 /* Master, propagated to newvers */ #ifdef _KERNEL #define P_OSREL_SIGSEGV 700004 diff --git a/sys/sys/priv.h b/sys/sys/priv.h index 5738fca5d..44d1d9422 100644 --- a/sys/sys/priv.h +++ b/sys/sys/priv.h @@ -497,7 +497,7 @@ #ifdef _KERNEL /* - * Privilege check interfaces, modeled after historic suser() interfacs, but + * Privilege check interfaces, modeled after historic suser() interfaces, but * with the addition of a specific privilege name. No flags are currently * defined for the API. Historically, flags specified using the real uid * instead of the effective uid, and whether or not the check should be diff --git a/sys/sys/sdt.h b/sys/sys/sdt.h index eeae665c1..d9031023c 100644 --- a/sys/sys/sdt.h +++ b/sys/sys/sdt.h @@ -31,33 +31,66 @@ #ifndef _SYS_SDT_H #define _SYS_SDT_H -/* Stub these for the time being. */ -#define DTRACE_PROBE(name) -#define DTRACE_PROBE1(name, type1, arg1) -#define DTRACE_PROBE2(name, type1, arg1, type2, arg2) -#define DTRACE_PROBE3(name, type1, arg1, type2, arg2, type3, arg3) -#define DTRACE_PROBE4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) - #ifndef _KERNEL -/* The promise of things to come. Worlds to explore. People to meet. Things to do. */ - -#else +#define _DTRACE_VERSION 1 + +#define DTRACE_PROBE(prov, name) { \ + extern void __dtrace_##prov##___##name(void); \ + __dtrace_##prov##___##name(); \ +} + +#define DTRACE_PROBE1(prov, name, arg1) { \ + extern void __dtrace_##prov##___##name(unsigned long); \ + __dtrace_##prov##___##name((unsigned long)arg1); \ +} + +#define DTRACE_PROBE2(prov, name, arg1, arg2) { \ + extern void __dtrace_##prov##___##name(unsigned long, \ + unsigned long); \ + __dtrace_##prov##___##name((unsigned long)arg1, \ + (unsigned long)arg2); \ +} + +#define DTRACE_PROBE3(prov, name, arg1, arg2, arg3) { \ + extern void __dtrace_##prov##___##name(unsigned long, \ + unsigned long, unsigned long); \ + __dtrace_##prov##___##name((unsigned long)arg1, \ + (unsigned long)arg2, (unsigned long)arg3); \ +} + +#define DTRACE_PROBE4(prov, name, arg1, arg2, arg3, arg4) { \ + extern void __dtrace_##prov##___##name(unsigned long, \ + unsigned long, unsigned long, unsigned long); \ + __dtrace_##prov##___##name((unsigned long)arg1, \ + (unsigned long)arg2, (unsigned long)arg3, \ + (unsigned long)arg4); \ +} + +#define DTRACE_PROBE5(prov, name, arg1, arg2, arg3, arg4, arg5) { \ + extern void __dtrace_##prov##___##name(unsigned long, \ + unsigned long, unsigned long, unsigned long, unsigned long);\ + __dtrace_##prov##___##name((unsigned long)arg1, \ + (unsigned long)arg2, (unsigned long)arg3, \ + (unsigned long)arg4, (unsigned long)arg5); \ +} + +#else /* _KERNEL */ #ifndef KDTRACE_HOOKS #define SDT_PROVIDER_DEFINE(prov) #define SDT_PROVIDER_DECLARE(prov) -#define SDT_PROBE_DEFINE(prov, mod, func, name) +#define SDT_PROBE_DEFINE(prov, mod, func, name, sname) #define SDT_PROBE_DECLARE(prov, mod, func, name) #define SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) #define SDT_PROBE_ARGTYPE(prov, mod, func, name, num, type) -#define SDT_PROBE_DEFINE1(prov, mod, func, name, arg0) -#define SDT_PROBE_DEFINE2(prov, mod, func, name, arg0, arg1) -#define SDT_PROBE_DEFINE3(prov, mod, func, name, arg0, arg1, arg2) -#define SDT_PROBE_DEFINE4(prov, mod, func, name, arg0, arg1, arg2, arg3) -#define SDT_PROBE_DEFINE5(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) +#define SDT_PROBE_DEFINE1(prov, mod, func, name, sname, arg0) +#define SDT_PROBE_DEFINE2(prov, mod, func, name, sname, arg0, arg1) +#define SDT_PROBE_DEFINE3(prov, mod, func, name, sname, arg0, arg1, arg2) +#define SDT_PROBE_DEFINE4(prov, mod, func, name, sname, arg0, arg1, arg2, arg3) +#define SDT_PROBE_DEFINE5(prov, mod, func, name, sname, arg0, arg1, arg2, arg3, arg4) #define SDT_PROBE1(prov, mod, func, name, arg0) #define SDT_PROBE2(prov, mod, func, name, arg0, arg1) @@ -135,10 +168,10 @@ struct sdt_provider { #define SDT_PROVIDER_DECLARE(prov) \ extern struct sdt_provider sdt_provider_##prov[1] -#define SDT_PROBE_DEFINE(prov, mod, func, name) \ +#define SDT_PROBE_DEFINE(prov, mod, func, name, sname) \ struct sdt_probe sdt_##prov##_##mod##_##func##_##name[1] = { \ { sizeof(struct sdt_probe), 0, sdt_provider_##prov, \ - { NULL, NULL }, { NULL, NULL }, #mod, #func, #name, 0, 0 } \ + { NULL, NULL }, { NULL, NULL }, #mod, #func, #sname, 0, 0 } \ }; \ SYSINIT(sdt_##prov##_##mod##_##func##_##name##_init, SI_SUB_KDTRACE, \ SI_ORDER_SECOND + 1, sdt_probe_register, \ @@ -168,30 +201,30 @@ struct sdt_provider { SI_SUB_KDTRACE, SI_ORDER_SECOND + 2, sdt_argtype_deregister, \ sdt_##prov##_##mod##_##func##_##name##num ) -#define SDT_PROBE_DEFINE1(prov, mod, func, name, arg0) \ - SDT_PROBE_DEFINE(prov, mod, func, name); \ +#define SDT_PROBE_DEFINE1(prov, mod, func, name, sname, arg0) \ + SDT_PROBE_DEFINE(prov, mod, func, name, sname); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 0, arg0) -#define SDT_PROBE_DEFINE2(prov, mod, func, name, arg0, arg1) \ - SDT_PROBE_DEFINE(prov, mod, func, name); \ +#define SDT_PROBE_DEFINE2(prov, mod, func, name, sname, arg0, arg1) \ + SDT_PROBE_DEFINE(prov, mod, func, name, sname); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 0, arg0); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 1, arg1) -#define SDT_PROBE_DEFINE3(prov, mod, func, name, arg0, arg1, arg2) \ - SDT_PROBE_DEFINE(prov, mod, func, name); \ +#define SDT_PROBE_DEFINE3(prov, mod, func, name, sname, arg0, arg1, arg2)\ + SDT_PROBE_DEFINE(prov, mod, func, name, sname); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 0, arg0); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 1, arg1); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 2, arg2) -#define SDT_PROBE_DEFINE4(prov, mod, func, name, arg0, arg1, arg2, arg3) \ - SDT_PROBE_DEFINE(prov, mod, func, name); \ +#define SDT_PROBE_DEFINE4(prov, mod, func, name, sname, arg0, arg1, arg2, arg3) \ + SDT_PROBE_DEFINE(prov, mod, func, name, sname); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 0, arg0); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 1, arg1); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 2, arg2); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 3, arg3) -#define SDT_PROBE_DEFINE5(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) \ - SDT_PROBE_DEFINE(prov, mod, func, name); \ +#define SDT_PROBE_DEFINE5(prov, mod, func, name, sname, arg0, arg1, arg2, arg3, arg4) \ + SDT_PROBE_DEFINE(prov, mod, func, name, sname); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 0, arg0); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 1, arg1); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 2, arg2); \ diff --git a/sys/sys/signal.h b/sys/sys/signal.h index cab3cfd34..154585d86 100644 --- a/sys/sys/signal.h +++ b/sys/sys/signal.h @@ -266,6 +266,7 @@ typedef struct __siginfo { /* codes for SIGTRAP */ #define TRAP_BRKPT 1 /* Process breakpoint. */ #define TRAP_TRACE 2 /* Process trace trap. */ +#define TRAP_DTRACE 3 /* DTrace induced trap. */ /* codes for SIGCHLD */ #define CLD_EXITED 1 /* Child has exited */ diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index 85c990c7d..4bf1355f9 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -42,7 +42,8 @@ struct ksiginfo; typedef int sy_call_t(struct thread *, void *); /* Used by the machine dependent syscall() code. */ -typedef void (*systrace_probe_func_t)(u_int32_t, int, struct sysent *, void *); +typedef void (*systrace_probe_func_t)(u_int32_t, int, struct sysent *, void *, + int); /* * Used by loaded syscalls to convert arguments to a DTrace array diff --git a/sys/tools/vnode_if.awk b/sys/tools/vnode_if.awk index 86c6bdc52..3fdea0a24 100644 --- a/sys/tools/vnode_if.awk +++ b/sys/tools/vnode_if.awk @@ -355,8 +355,8 @@ while ((getline < srcfile) > 0) { printc("};"); printc("\n"); - printc("SDT_PROBE_DEFINE2(vfs, vop, " name ", entry, \"struct vnode *\", \"struct " name "_args *\");\n"); - printc("SDT_PROBE_DEFINE3(vfs, vop, " name ", return, \"struct vnode *\", \"struct " name "_args *\", \"int\");\n"); + printc("SDT_PROBE_DEFINE2(vfs, vop, " name ", entry, entry, \"struct vnode *\", \"struct " name "_args *\");\n"); + printc("SDT_PROBE_DEFINE3(vfs, vop, " name ", return, return, \"struct vnode *\", \"struct " name "_args *\", \"int\");\n"); # Print out function. printc("\nint\n" uname "_AP(struct " name "_args *a)"); diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index e8f9bc856..b36ff5fcc 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -93,6 +93,7 @@ CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); /* Magic IRQ values for the timer and syscalls. */ #define IRQ_TIMER (NUM_IO_INTS + 1) #define IRQ_SYSCALL (NUM_IO_INTS + 2) +#define IRQ_DTRACE_RET (NUM_IO_INTS + 3) /* * Support for local APICs. Local APICs manage interrupts on each @@ -278,6 +279,10 @@ lapic_create(u_int apic_id, int boot_cpu) lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = IRQ_TIMER; +#ifdef KDTRACE_HOOKS + lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = IRQ_DTRACE_RET; +#endif + #ifdef SMP cpu_add(apic_id, boot_cpu); @@ -1033,6 +1038,10 @@ apic_enable_vector(u_int apic_id, u_int vector) KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); +#ifdef KDTRACE_HOOKS + KASSERT(vector != IDT_DTRACE_RET, + ("Attempt to overwrite DTrace entry")); +#endif setidt(vector, ioint_handlers[vector / 32], SDT_APIC, SEL_KPL, GSEL_APIC); } @@ -1042,6 +1051,10 @@ apic_disable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); +#ifdef KDTRACE_HOOKS + KASSERT(vector != IDT_DTRACE_RET, + ("Attempt to overwrite DTrace entry")); +#endif KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); #ifdef notyet @@ -1065,6 +1078,10 @@ apic_free_vector(u_int apic_id, u_int vector, u_int irq) KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == irq, ("IRQ mismatch")); +#ifdef KDTRACE_HOOKS + KASSERT(vector != IDT_DTRACE_RET, + ("Attempt to overwrite DTrace entry")); +#endif /* * Bind us to the cpu that owned the vector before freeing it so @@ -1097,6 +1114,10 @@ apic_idt_to_irq(u_int apic_id, u_int vector) KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); +#ifdef KDTRACE_HOOKS + KASSERT(vector != IDT_DTRACE_RET, + ("Attempt to overwrite DTrace entry")); +#endif irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; if (irq < 0) irq = 0; @@ -1128,6 +1149,10 @@ DB_SHOW_COMMAND(apic, db_show_apic) irq = lapics[apic_id].la_ioint_irqs[i]; if (irq == -1 || irq == IRQ_SYSCALL) continue; +#ifdef KDTRACE_HOOKS + if (irq == IRQ_DTRACE_RET) + continue; +#endif db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); if (irq == IRQ_TIMER) db_printf("lapic timer\n"); -- 2.45.0