4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/fasttrap_isa.h>
28 #include <sys/fasttrap_impl.h>
29 #include <sys/dtrace.h>
30 #include <sys/dtrace_impl.h>
31 #include <sys/cmn_err.h>
32 #include <sys/frame.h>
33 #include <sys/stack.h>
34 #include <sys/sysmacros.h>
37 #include <v9/sys/machpcb.h>
38 #include <v9/sys/privregs.h>
41 * Lossless User-Land Tracing on SPARC
42 * -----------------------------------
46 * The most important design constraint is, of course, correct execution of
47 * the user thread above all else. The next most important goal is rapid
48 * execution. We combine execution of instructions in user-land with
49 * emulation of certain instructions in the kernel to aim for complete
50 * correctness and maximal performance.
52 * We take advantage of the split PC/NPC architecture to speed up logical
53 * single-stepping; when we copy an instruction out to the scratch space in
54 * the ulwp_t structure (held in the %g7 register on SPARC), we can
55 * effectively single step by setting the PC to our scratch space and leaving
56 * the NPC alone. This executes the replaced instruction and then continues
57 * on without having to reenter the kernel as with single- stepping. The
58 * obvious caveat is for instructions whose execution is PC dependant --
59 * branches, call and link instructions (call and jmpl), and the rdpc
60 * instruction. These instructions cannot be executed in the manner described
61 * so they must be emulated in the kernel.
63 * Emulation for this small set of instructions if fairly simple; the most
64 * difficult part being emulating branch conditions.
67 * A Cache Heavy Portfolio
69 * It's important to note at this time that copying an instruction out to the
70 * ulwp_t scratch space in user-land is rather complicated. SPARC has
71 * separate data and instruction caches so any writes to the D$ (using a
72 * store instruction for example) aren't necessarily reflected in the I$.
73 * The flush instruction can be used to synchronize the two and must be used
74 * for any self-modifying code, but the flush instruction only applies to the
75 * primary address space (the absence of a flusha analogue to the flush
76 * instruction that accepts an ASI argument is an obvious omission from SPARC
77 * v9 where the notion of the alternate address space was introduced on
78 * SPARC). To correctly copy out the instruction we must use a block store
79 * that doesn't allocate in the D$ and ensures synchronization with the I$;
80 * see dtrace_blksuword32() for the implementation (this function uses
81 * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner
82 * described). Refer to the UltraSPARC I/II manual for details on the
83 * ASI_BLK_COMMIT_S ASI.
88 * When we're firing a return probe we need to expose the value returned by
89 * the function being traced. Since the function can set the return value
90 * in its last instruction, we need to fire the return probe only _after_
91 * the effects of the instruction are apparent. For instructions that we
92 * emulate, we can call dtrace_probe() after we've performed the emulation;
93 * for instructions that we execute after we return to user-land, we set
94 * %pc to the instruction we copied out (as described above) and set %npc
95 * to a trap instruction stashed in the ulwp_t structure. After the traced
96 * instruction is executed, the trap instruction returns control to the
97 * kernel where we can fire the return probe.
99 * This need for a second trap in cases where we execute the traced
100 * instruction makes it all the more important to emulate the most common
101 * instructions to avoid the second trip in and out of the kernel.
106 * Since copying out an instruction is neither simple nor inexpensive for the
107 * CPU, we should attempt to avoid doing it in as many cases as possible.
108 * Since function entry and return are usually the most interesting probe
109 * sites, we attempt to tune the performance of the fasttrap provider around
110 * instructions typically in those places.
112 * Looking at a bunch of functions in libraries and executables reveals that
113 * most functions begin with either a save or a sethi (to setup a larger
114 * argument to the save) and end with a restore or an or (in the case of leaf
115 * functions). To try to improve performance, we emulate all of these
116 * instructions in the kernel.
118 * The save and restore instructions are a little tricky since they perform
119 * register window maniplulation. Rather than trying to tinker with the
120 * register windows from the kernel, we emulate the implicit add that takes
121 * place as part of those instructions and set the %pc to point to a simple
122 * save or restore we've hidden in the ulwp_t structure. If we're in a return
123 * probe so want to make it seem as though the tracepoint has been completely
124 * executed we need to remember that we've pulled this trick with restore and
125 * pull registers from the previous window (the one that we'll switch to once
126 * the simple store instruction is executed) rather than the current one. This
127 * is why in the case of emulating a restore we set the DTrace CPU flag
128 * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes
129 * (see fasttrap_return_common()).
132 #define OP(x) ((x) >> 30)
133 #define OP2(x) (((x) >> 22) & 0x07)
134 #define OP3(x) (((x) >> 19) & 0x3f)
135 #define RCOND(x) (((x) >> 25) & 0x07)
136 #define COND(x) (((x) >> 25) & 0x0f)
137 #define A(x) (((x) >> 29) & 0x01)
138 #define I(x) (((x) >> 13) & 0x01)
139 #define RD(x) (((x) >> 25) & 0x1f)
140 #define RS1(x) (((x) >> 14) & 0x1f)
141 #define RS2(x) (((x) >> 0) & 0x1f)
142 #define CC(x) (((x) >> 20) & 0x03)
143 #define DISP16(x) ((((x) >> 6) & 0xc000) | ((x) & 0x3fff))
144 #define DISP22(x) ((x) & 0x3fffff)
145 #define DISP19(x) ((x) & 0x7ffff)
146 #define DISP30(x) ((x) & 0x3fffffff)
147 #define SW_TRAP(x) ((x) & 0x7f)
151 #define OP3_JMPL 0x38
152 #define OP3_RETURN 0x39
154 #define OP3_SAVE 0x3c
155 #define OP3_RESTORE 0x3d
157 #define OP3_PREFETCH 0x2d
158 #define OP3_CASA 0x3c
159 #define OP3_PREFETCHA 0x3d
160 #define OP3_CASXA 0x3e
162 #define OP2_ILLTRAP 0x0
166 #define OP2_SETHI 0x4
167 #define OP2_FBPfcc 0x5
168 #define OP2_FBfcc 0x6
180 * Check the comment in fasttrap.h when changing these offsets or adding
183 #define FASTTRAP_OFF_SAVE 64
184 #define FASTTRAP_OFF_RESTORE 68
185 #define FASTTRAP_OFF_FTRET 72
186 #define FASTTRAP_OFF_RETURN 76
188 #define BREAKPOINT_INSTR 0x91d02001 /* ta 1 */
191 * Tunable to let users turn off the fancy save instruction optimization.
192 * If a program is non-ABI compliant, there's a possibility that the save
193 * instruction optimization could cause an error.
195 int fasttrap_optimize_save = 1;
198 fasttrap_anarg(struct regs *rp, int argno)
203 return ((&rp->r_o0)[argno]);
205 if (curproc->p_model == DATAMODEL_NATIVE) {
206 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
208 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
209 value = dtrace_fulword(&fr->fr_argd[argno]);
210 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
211 CPU_DTRACE_BADALIGN);
213 struct frame32 *fr = (struct frame32 *)rp->r_sp;
215 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
216 value = dtrace_fuword32(&fr->fr_argd[argno]);
217 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
218 CPU_DTRACE_BADALIGN);
224 static ulong_t fasttrap_getreg(struct regs *, uint_t);
225 static void fasttrap_putreg(struct regs *, uint_t, ulong_t);
228 fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp,
229 uint_t fake_restore, int argc, uintptr_t *argv)
231 int i, x, cap = MIN(argc, probe->ftp_nargs);
232 int inc = (fake_restore ? 16 : 0);
235 * The only way we'll hit the fake_restore case is if a USDT probe is
236 * invoked as a tail-call. While it wouldn't be incorrect, we can
237 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp
238 * directly since a tail-call can't be made if the invoked function
239 * would use the argument dump space (i.e. if there were more than
240 * 6 arguments). We take this shortcut because unconditionally rooting
241 * around for R_FP (R_SP + 16) would be unnecessarily painful.
244 if (curproc->p_model == DATAMODEL_NATIVE) {
245 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
248 for (i = 0; i < cap; i++) {
249 x = probe->ftp_argmap[i];
252 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
253 else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0)
258 struct frame32 *fr = (struct frame32 *)rp->r_sp;
261 for (i = 0; i < cap; i++) {
262 x = probe->ftp_argmap[i];
265 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
266 else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0)
271 for (; i < argc; i++) {
277 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
280 fasttrap_tracepoint_t *tp;
281 fasttrap_bucket_t *bucket;
284 dtrace_icookie_t cookie;
286 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
287 mutex_enter(pid_mtx);
288 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
290 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
291 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
292 tp->ftt_proc->ftpc_acount != 0)
297 * Don't sweat it if we can't find the tracepoint again; unlike
298 * when we're in fasttrap_pid_probe(), finding the tracepoint here
299 * is not essential to the correct execution of the process.
301 if (tp == NULL || tp->ftt_retids == NULL) {
306 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
307 fasttrap_probe_t *probe = id->fti_probe;
309 if (id->fti_ptype == DTFTP_POST_OFFSETS) {
310 if (probe->ftp_argmap != NULL && fake_restore) {
313 fasttrap_usdt_args(probe, rp, fake_restore,
314 sizeof (t) / sizeof (t[0]), t);
316 cookie = dtrace_interrupt_disable();
317 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
318 dtrace_probe(probe->ftp_id, t[0], t[1],
320 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
321 dtrace_interrupt_enable(cookie);
323 } else if (probe->ftp_argmap != NULL) {
326 fasttrap_usdt_args(probe, rp, fake_restore,
327 sizeof (t) / sizeof (t[0]), t);
329 dtrace_probe(probe->ftp_id, t[0], t[1],
332 } else if (fake_restore) {
333 uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
334 uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
335 uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
336 uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
337 uintptr_t arg4 = fasttrap_getreg(rp, R_I4);
339 cookie = dtrace_interrupt_disable();
340 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
341 dtrace_probe(probe->ftp_id, arg0, arg1,
343 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
344 dtrace_interrupt_enable(cookie);
347 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1,
348 rp->r_o2, rp->r_o3, rp->r_o4);
355 * If this is only a possible return point, we must
356 * be looking at a potential tail call in leaf context.
357 * If the %npc is still within this function, then we
358 * must have misidentified a jmpl as a tail-call when it
359 * is, in fact, part of a jump table. It would be nice to
360 * remove this tracepoint, but this is neither the time
363 if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) &&
364 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
368 * It's possible for a function to branch to the delay slot
369 * of an instruction that we've identified as a return site.
370 * We can dectect this spurious return probe activation by
371 * observing that in this case %npc will be %pc + 4 and %npc
372 * will be inside the current function (unless the user is
373 * doing _crazy_ instruction picking in which case there's
374 * very little we can do). The second check is important
375 * in case the last instructions of a function make a tail-
376 * call to the function located immediately subsequent.
378 if (rp->r_npc == rp->r_pc + 4 &&
379 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
383 * The first argument is the offset of return tracepoint
384 * in the function; the remaining arguments are the return
387 * If fake_restore is set, we need to pull the return values
388 * out of the %i's rather than the %o's -- a little trickier.
391 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
392 rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3);
394 uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
395 uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
396 uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
397 uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
399 cookie = dtrace_interrupt_disable();
400 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
401 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
402 arg0, arg1, arg2, arg3);
403 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
404 dtrace_interrupt_enable(cookie);
412 fasttrap_pid_probe(struct regs *rp)
415 fasttrap_tracepoint_t *tp, tp_local;
418 uintptr_t pc = rp->r_pc;
419 uintptr_t npc = rp->r_npc;
420 uintptr_t orig_pc = pc;
421 fasttrap_bucket_t *bucket;
423 uint_t fake_restore = 0, is_enabled = 0;
424 dtrace_icookie_t cookie;
427 * It's possible that a user (in a veritable orgy of bad planning)
428 * could redirect this thread's flow of control before it reached the
429 * return probe fasttrap. In this case we need to kill the process
430 * since it's in a unrecoverable state.
432 if (curthread->t_dtrace_step) {
433 ASSERT(curthread->t_dtrace_on);
434 fasttrap_sigtrap(p, curthread, pc);
439 * Clear all user tracing flags.
441 curthread->t_dtrace_ft = 0;
442 curthread->t_dtrace_pc = 0;
443 curthread->t_dtrace_npc = 0;
444 curthread->t_dtrace_scrpc = 0;
445 curthread->t_dtrace_astpc = 0;
448 * Treat a child created by a call to vfork(2) as if it were its
449 * parent. We know that there's only one thread of control in such a
452 while (p->p_flag & SVFORK) {
457 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
458 mutex_enter(pid_mtx);
459 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
462 * Lookup the tracepoint that the process just hit.
464 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
465 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
466 tp->ftt_proc->ftpc_acount != 0)
471 * If we couldn't find a matching tracepoint, either a tracepoint has
472 * been inserted without using the pid<pid> ioctl interface (see
473 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
480 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
481 fasttrap_probe_t *probe = id->fti_probe;
482 int isentry = (id->fti_ptype == DTFTP_ENTRY);
484 if (id->fti_ptype == DTFTP_IS_ENABLED) {
490 * We note that this was an entry probe to help ustack() find
494 cookie = dtrace_interrupt_disable();
495 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
497 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2,
500 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
501 dtrace_interrupt_enable(cookie);
506 * We're about to do a bunch of work so we cache a local copy of
507 * the tracepoint to emulate the instruction, and then find the
508 * tracepoint again later if we need to light up any return probes.
515 * If there's an is-enabled probe conntected to this tracepoint it
516 * means that there was a 'mov %g0, %o0' instruction that was placed
517 * there by DTrace when the binary was linked. As this probe is, in
518 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can
519 * bypass all the instruction emulation logic since we know the
520 * inevitable result. It's possible that a user could construct a
521 * scenario where the 'is-enabled' probe was on some other
522 * instruction, but that would be a rather exotic way to shoot oneself
533 * We emulate certain types of instructions to ensure correctness
534 * (in the case of position dependent instructions) or optimize
535 * common cases. The rest we have the thread execute back in user-
538 switch (tp->ftt_type) {
539 case FASTTRAP_T_SAVE:
544 * This an optimization to let us handle function entry
545 * probes more efficiently. Many functions begin with a save
546 * instruction that follows the pattern:
547 * save %sp, <imm>, %sp
549 * Meanwhile, we've stashed the instruction:
552 * off of %g7, so all we have to do is stick the right value
553 * into %g1 and reset %pc to point to the instruction we've
554 * cleverly hidden (%npc should not be touched).
557 imm = tp->ftt_instr << 19;
559 rp->r_g1 = rp->r_sp + imm;
560 pc = rp->r_g7 + FASTTRAP_OFF_SAVE;
564 case FASTTRAP_T_RESTORE:
570 * This is an optimization to let us handle function
571 * return probes more efficiently. Most non-leaf functions
572 * end with the sequence:
574 * restore <reg>, <reg_or_imm>, %oX
576 * We've stashed the instruction:
577 * restore %g0, %g0, %g0
579 * off of %g7 so we just need to place the correct value
580 * in the right %i register (since after our fake-o
581 * restore, the %i's will become the %o's) and set the %pc
582 * to point to our hidden restore. We also set fake_restore to
583 * let fasttrap_return_common() know that it will find the
584 * return values in the %i's rather than the %o's.
587 if (I(tp->ftt_instr)) {
590 imm = tp->ftt_instr << 19;
592 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
594 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
595 fasttrap_getreg(rp, RS2(tp->ftt_instr));
599 * Convert %o's to %i's; leave %g's as they are.
601 rd = RD(tp->ftt_instr);
602 fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value);
604 pc = rp->r_g7 + FASTTRAP_OFF_RESTORE;
609 case FASTTRAP_T_RETURN:
614 * A return instruction is like a jmpl (without the link
615 * part) that executes an implicit restore. We've stashed
619 * off of %g7 so we just need to place the target in %o0
620 * and set the %pc to point to the stashed return instruction.
621 * We use %o0 since that register disappears after the return
622 * executes, erasing any evidence of this tampering.
624 if (I(tp->ftt_instr)) {
627 imm = tp->ftt_instr << 19;
629 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
631 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
632 fasttrap_getreg(rp, RS2(tp->ftt_instr));
635 fasttrap_putreg(rp, R_O0, target);
637 pc = rp->r_g7 + FASTTRAP_OFF_RETURN;
646 if (I(tp->ftt_instr)) {
649 imm = tp->ftt_instr << 19;
651 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm;
653 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) |
654 fasttrap_getreg(rp, RS2(tp->ftt_instr));
657 fasttrap_putreg(rp, RD(tp->ftt_instr), value);
663 case FASTTRAP_T_SETHI:
664 if (RD(tp->ftt_instr) != R_G0) {
665 uint32_t imm32 = tp->ftt_instr << 10;
666 fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32);
674 uint_t c, v, z, n, taken;
675 uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT;
685 switch (tp->ftt_code) {
691 taken = z | (n ^ v); break;
693 taken = n ^ v; break;
695 taken = c | z; break;
696 case 0x5: /* BCS (BLU) */
704 * We handle the BA case differently since the annul
705 * bit means something slightly different.
707 panic("fasttrap: mishandled a branch");
712 taken = ~(z | (n ^ v)); break;
714 taken = ~(n ^ v); break;
716 taken = ~(c | z); break;
717 case 0xd: /* BCC (BGEU) */
728 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
730 * Untaken annulled branches don't execute the
731 * instruction in the delay slot.
750 if (tp->ftt_cc == 0) {
751 fcc = (fsr >> 10) & 0x3;
754 ASSERT(tp->ftt_cc <= 3);
755 shift = 30 + tp->ftt_cc * 2;
756 fcc = (fsr >> shift) & 0x3;
759 switch (tp->ftt_code) {
761 taken = (1 << fcc) & (0|0|0|0); break;
763 taken = (1 << fcc) & (8|4|2|0); break;
765 taken = (1 << fcc) & (0|4|2|0); break;
767 taken = (1 << fcc) & (8|0|2|0); break;
769 taken = (1 << fcc) & (0|0|2|0); break;
771 taken = (1 << fcc) & (8|4|0|0); break;
773 taken = (1 << fcc) & (0|4|0|0); break;
775 taken = (1 << fcc) & (8|0|0|0); break;
778 * We handle the FBA case differently since the annul
779 * bit means something slightly different.
781 panic("fasttrap: mishandled a branch");
782 taken = (1 << fcc) & (8|4|2|1); break;
784 taken = (1 << fcc) & (0|0|0|1); break;
786 taken = (1 << fcc) & (8|0|0|1); break;
788 taken = (1 << fcc) & (0|4|0|1); break;
789 case 0xc: /* FBUGE */
790 taken = (1 << fcc) & (8|4|0|1); break;
792 taken = (1 << fcc) & (0|0|2|1); break;
793 case 0xe: /* FBULE */
794 taken = (1 << fcc) & (8|0|2|1); break;
796 taken = (1 << fcc) & (0|4|2|1); break;
802 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
804 * Untaken annulled branches don't execute the
805 * instruction in the delay slot.
820 uint_t reg = RS1(tp->ftt_instr);
823 * An ILP32 process shouldn't be using a branch predicated on
824 * an %i or an %l since it would violate the ABI. It's a
825 * violation of the ABI because we can't ensure deterministic
826 * behavior. We should have identified this case when we
829 ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16);
831 value = (int64_t)fasttrap_getreg(rp, reg);
833 switch (tp->ftt_code) {
835 taken = (value == 0); break;
836 case 0x2: /* BRLEZ */
837 taken = (value <= 0); break;
839 taken = (value < 0); break;
841 taken = (value != 0); break;
843 taken = (value > 0); break;
844 case 0x7: /* BRGEZ */
845 taken = (value >= 0); break;
849 panic("fasttrap: mishandled a branch");
855 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
857 * Untaken annulled branches don't execute the
858 * instruction in the delay slot.
869 case FASTTRAP_T_ALWAYS:
874 if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
876 * Annulled branch always instructions never execute
877 * the instruction in the delay slot.
880 npc = tp->ftt_dest + 4;
887 case FASTTRAP_T_RDPC:
888 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
893 case FASTTRAP_T_CALL:
895 * It's a call _and_ link remember...
902 case FASTTRAP_T_JMPL:
905 if (I(tp->ftt_instr)) {
906 uint_t rs1 = RS1(tp->ftt_instr);
909 imm = tp->ftt_instr << 19;
911 npc = fasttrap_getreg(rp, rs1) + imm;
913 uint_t rs1 = RS1(tp->ftt_instr);
914 uint_t rs2 = RS2(tp->ftt_instr);
916 npc = fasttrap_getreg(rp, rs1) +
917 fasttrap_getreg(rp, rs2);
921 * Do the link part of the jump-and-link instruction.
923 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
927 case FASTTRAP_T_COMMON:
929 curthread->t_dtrace_scrpc = rp->r_g7;
930 curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET;
933 * Copy the instruction to a reserved location in the
934 * user-land thread structure, then set the PC to that
935 * location and leave the NPC alone. We take pains to ensure
936 * consistency in the instruction stream (See SPARC
937 * Architecture Manual Version 9, sections 8.4.7, A.20, and
938 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1,
939 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the
940 * instruction into the user's address space without
941 * bypassing the I$. There's no AS_USER version of this ASI
942 * (as exist for other ASIs) so we use the lofault
943 * mechanism to catch faults.
945 if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) {
947 * If the copyout fails, then the process's state
948 * is not consistent (the effects of the traced
949 * instruction will never be seen). This process
950 * cannot be allowed to continue execution.
952 fasttrap_sigtrap(curproc, curthread, pc);
956 curthread->t_dtrace_pc = pc;
957 curthread->t_dtrace_npc = npc;
958 curthread->t_dtrace_on = 1;
960 pc = curthread->t_dtrace_scrpc;
962 if (tp->ftt_retids != NULL) {
963 curthread->t_dtrace_step = 1;
964 curthread->t_dtrace_ret = 1;
965 npc = curthread->t_dtrace_astpc;
971 panic("fasttrap: mishandled an instruction");
975 * This bit me in the ass a couple of times, so lets toss this
976 * in as a cursory sanity check.
978 ASSERT(pc != rp->r_g7 + 4);
979 ASSERT(pc != rp->r_g7 + 8);
983 * If there were no return probes when we first found the tracepoint,
984 * we should feel no obligation to honor any return probes that were
985 * subsequently enabled -- they'll just have to wait until the next
988 if (tp->ftt_retids != NULL) {
990 * We need to wait until the results of the instruction are
991 * apparent before invoking any return probes. If this
992 * instruction was emulated we can just call
993 * fasttrap_return_common(); if it needs to be executed, we
994 * need to wait until we return to the kernel.
996 if (tp->ftt_type != FASTTRAP_T_COMMON) {
997 fasttrap_return_common(rp, orig_pc, pid, fake_restore);
999 ASSERT(curthread->t_dtrace_ret != 0);
1000 ASSERT(curthread->t_dtrace_pc == orig_pc);
1001 ASSERT(curthread->t_dtrace_scrpc == rp->r_g7);
1002 ASSERT(npc == curthread->t_dtrace_astpc);
1014 fasttrap_return_probe(struct regs *rp)
1016 proc_t *p = ttoproc(curthread);
1018 uintptr_t pc = curthread->t_dtrace_pc;
1019 uintptr_t npc = curthread->t_dtrace_npc;
1021 curthread->t_dtrace_pc = 0;
1022 curthread->t_dtrace_npc = 0;
1023 curthread->t_dtrace_scrpc = 0;
1024 curthread->t_dtrace_astpc = 0;
1027 * Treat a child created by a call to vfork(2) as if it were its
1028 * parent. We know there's only one thread of control in such a
1029 * process: this one.
1031 while (p->p_flag & SVFORK) {
1036 * We set the %pc and %npc to their values when the traced
1037 * instruction was initially executed so that it appears to
1038 * dtrace_probe() that we're on the original instruction, and so that
1039 * the user can't easily detect our complex web of lies.
1040 * dtrace_return_probe() (our caller) will correctly set %pc and %npc
1047 fasttrap_return_common(rp, pc, pid, 0);
1053 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
1055 fasttrap_instr_t instr = FASTTRAP_INSTR;
1057 if (uwrite(p, &instr, 4, tp->ftt_pc) != 0)
1064 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
1066 fasttrap_instr_t instr;
1069 * Distinguish between read or write failures and a changed
1072 if (uread(p, &instr, 4, tp->ftt_pc) != 0)
1074 if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR)
1076 if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0)
1083 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
1084 fasttrap_probe_type_t type)
1090 * Read the instruction at the given address out of the process's
1091 * address space. We don't have to worry about a debugger
1092 * changing this instruction before we overwrite it with our trap
1093 * instruction since P_PR_LOCK is set.
1095 if (uread(p, &instr, 4, pc) != 0)
1099 * Decode the instruction to fill in the probe flags. We can have
1100 * the process execute most instructions on its own using a pc/npc
1101 * trick, but pc-relative control transfer present a problem since
1102 * we're relocating the instruction. We emulate these instructions
1103 * in the kernel. We assume a default type and over-write that as
1106 * pc-relative instructions must be emulated for correctness;
1107 * other instructions (which represent a large set of commonly traced
1108 * instructions) are emulated or otherwise optimized for performance.
1110 tp->ftt_type = FASTTRAP_T_COMMON;
1111 if (OP(instr) == 1) {
1113 * Call instructions.
1115 tp->ftt_type = FASTTRAP_T_CALL;
1116 disp = DISP30(instr) << 2;
1117 tp->ftt_dest = pc + (intptr_t)disp;
1119 } else if (OP(instr) == 0) {
1121 * Branch instructions.
1123 * Unconditional branches need careful attention when they're
1124 * annulled: annulled unconditional branches never execute
1125 * the instruction in the delay slot.
1127 switch (OP2(instr)) {
1131 * The compiler may place an illtrap after a call to
1132 * a function that returns a structure. In the case of
1133 * a returned structure, the compiler places an illtrap
1134 * whose const22 field is the size of the returned
1135 * structure immediately following the delay slot of
1136 * the call. To stay out of the way, we refuse to
1137 * place tracepoints on top of illtrap instructions.
1139 * This is one of the dumbest architectural decisions
1140 * I've ever had to work around.
1142 * We also identify the only illegal op2 value (See
1143 * SPARC Architecture Manual Version 9, E.2 table 31).
1148 if (COND(instr) == 8) {
1149 tp->ftt_type = FASTTRAP_T_ALWAYS;
1152 * Check for an illegal instruction.
1156 tp->ftt_type = FASTTRAP_T_CCR;
1157 tp->ftt_cc = CC(instr);
1158 tp->ftt_code = COND(instr);
1162 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1164 disp = DISP19(instr);
1167 tp->ftt_dest = pc + (intptr_t)disp;
1171 if (COND(instr) == 8) {
1172 tp->ftt_type = FASTTRAP_T_ALWAYS;
1174 tp->ftt_type = FASTTRAP_T_CCR;
1176 tp->ftt_code = COND(instr);
1180 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1182 disp = DISP22(instr);
1185 tp->ftt_dest = pc + (intptr_t)disp;
1190 * Check for an illegal instruction.
1192 if ((RCOND(instr) & 3) == 0)
1196 * It's a violation of the v8plus ABI to use a
1197 * register-predicated branch in a 32-bit app if
1198 * the register used is an %l or an %i (%gs and %os
1199 * are legit because they're not saved to the stack
1200 * in 32-bit words when we take a trap).
1202 if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16)
1205 tp->ftt_type = FASTTRAP_T_REG;
1207 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1208 disp = DISP16(instr);
1211 tp->ftt_dest = pc + (intptr_t)disp;
1212 tp->ftt_code = RCOND(instr);
1216 tp->ftt_type = FASTTRAP_T_SETHI;
1220 if (COND(instr) == 8) {
1221 tp->ftt_type = FASTTRAP_T_ALWAYS;
1223 tp->ftt_type = FASTTRAP_T_FCC;
1224 tp->ftt_cc = CC(instr);
1225 tp->ftt_code = COND(instr);
1229 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1231 disp = DISP19(instr);
1234 tp->ftt_dest = pc + (intptr_t)disp;
1238 if (COND(instr) == 8) {
1239 tp->ftt_type = FASTTRAP_T_ALWAYS;
1241 tp->ftt_type = FASTTRAP_T_FCC;
1243 tp->ftt_code = COND(instr);
1247 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1249 disp = DISP22(instr);
1252 tp->ftt_dest = pc + (intptr_t)disp;
1256 } else if (OP(instr) == 2) {
1257 switch (OP3(instr)) {
1259 tp->ftt_type = FASTTRAP_T_RETURN;
1263 tp->ftt_type = FASTTRAP_T_JMPL;
1267 if (RS1(instr) == 5)
1268 tp->ftt_type = FASTTRAP_T_RDPC;
1273 * We optimize for save instructions at function
1274 * entry; see the comment in fasttrap_pid_probe()
1275 * (near FASTTRAP_T_SAVE) for details.
1277 if (fasttrap_optimize_save != 0 &&
1278 type == DTFTP_ENTRY &&
1279 I(instr) == 1 && RD(instr) == R_SP)
1280 tp->ftt_type = FASTTRAP_T_SAVE;
1285 * We optimize restore instructions at function
1286 * return; see the comment in fasttrap_pid_probe()
1287 * (near FASTTRAP_T_RESTORE) for details.
1289 * rd must be an %o or %g register.
1291 if ((RD(instr) & 0x10) == 0)
1292 tp->ftt_type = FASTTRAP_T_RESTORE;
1297 * A large proportion of instructions in the delay
1298 * slot of retl instructions are or's so we emulate
1299 * these downstairs as an optimization.
1301 tp->ftt_type = FASTTRAP_T_OR;
1306 * Breakpoint instructions are effectively position-
1307 * dependent since the debugger uses the %pc value
1308 * to lookup which breakpoint was executed. As a
1309 * result, we can't actually instrument breakpoints.
1311 if (SW_TRAP(instr) == ST_BREAKPOINT)
1321 * Identify illegal instructions (See SPARC
1322 * Architecture Manual Version 9, E.2 table 32).
1326 } else if (OP(instr) == 3) {
1327 uint32_t op3 = OP3(instr);
1330 * Identify illegal instructions (See SPARC Architecture
1331 * Manual Version 9, E.2 table 33).
1333 if ((op3 & 0x28) == 0x28) {
1334 if (op3 != OP3_PREFETCH && op3 != OP3_CASA &&
1335 op3 != OP3_PREFETCHA && op3 != OP3_CASXA)
1338 if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31)
1343 tp->ftt_instr = instr;
1346 * We don't know how this tracepoint is going to be used, but in case
1347 * it's used as part of a function return probe, we need to indicate
1348 * whether it's always a return site or only potentially a return
1349 * site. If it's part of a return probe, it's always going to be a
1350 * return from that function if it's a restore instruction or if
1351 * the previous instruction was a return. If we could reliably
1352 * distinguish jump tables from return sites, this wouldn't be
1355 if (tp->ftt_type != FASTTRAP_T_RESTORE &&
1356 (uread(p, &instr, 4, pc - sizeof (instr)) != 0 ||
1357 !(OP(instr) == 2 && OP3(instr) == OP3_RETURN)))
1358 tp->ftt_flags |= FASTTRAP_F_RETMAYBE;
1365 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1368 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1373 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1376 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1379 static uint64_t fasttrap_getreg_fast_cnt;
1380 static uint64_t fasttrap_getreg_mpcb_cnt;
1381 static uint64_t fasttrap_getreg_slow_cnt;
1384 fasttrap_getreg(struct regs *rp, uint_t reg)
1387 dtrace_icookie_t cookie;
1388 struct machpcb *mpcb;
1389 extern ulong_t dtrace_getreg_win(uint_t, uint_t);
1392 * We have the %os and %gs in our struct regs, but if we need to
1393 * snag a %l or %i we need to go scrounging around in the process's
1400 return ((&rp->r_g1)[reg - 1]);
1403 * Before we look at the user's stack, we'll check the register
1404 * windows to see if the information we want is in there.
1406 cookie = dtrace_interrupt_disable();
1407 if (dtrace_getotherwin() > 0) {
1408 value = dtrace_getreg_win(reg, 1);
1409 dtrace_interrupt_enable(cookie);
1411 atomic_inc_64(&fasttrap_getreg_fast_cnt);
1415 dtrace_interrupt_enable(cookie);
1418 * First check the machpcb structure to see if we've already read
1419 * in the register window we're looking for; if we haven't, (and
1420 * we probably haven't) try to copy in the value of the register.
1422 /* LINTED - alignment */
1423 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1425 if (get_udatamodel() == DATAMODEL_NATIVE) {
1426 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1428 if (mpcb->mpcb_wbcnt > 0) {
1429 struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
1430 int i = mpcb->mpcb_wbcnt;
1433 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1436 atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1437 return (rwin[i].rw_local[reg - 16]);
1441 if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0)
1444 struct frame32 *fr =
1445 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1446 uint32_t *v32 = (uint32_t *)&value;
1448 if (mpcb->mpcb_wbcnt > 0) {
1449 struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
1450 int i = mpcb->mpcb_wbcnt;
1453 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1456 atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1457 return (rwin[i].rw_local[reg - 16]);
1461 if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0)
1467 atomic_inc_64(&fasttrap_getreg_slow_cnt);
1472 * If the copy in failed, the process will be in a irrecoverable
1473 * state, and we have no choice but to kill it.
1475 kern_psignal(ttoproc(curthread), SIGILL);
1479 static uint64_t fasttrap_putreg_fast_cnt;
1480 static uint64_t fasttrap_putreg_mpcb_cnt;
1481 static uint64_t fasttrap_putreg_slow_cnt;
1484 fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value)
1486 dtrace_icookie_t cookie;
1487 struct machpcb *mpcb;
1488 extern void dtrace_putreg_win(uint_t, ulong_t);
1494 (&rp->r_g1)[reg - 1] = value;
1499 * If the user process is still using some register windows, we
1500 * can just place the value in the correct window.
1502 cookie = dtrace_interrupt_disable();
1503 if (dtrace_getotherwin() > 0) {
1504 dtrace_putreg_win(reg, value);
1505 dtrace_interrupt_enable(cookie);
1506 atomic_inc_64(&fasttrap_putreg_fast_cnt);
1509 dtrace_interrupt_enable(cookie);
1512 * First see if there's a copy of the register window in the
1513 * machpcb structure that we can modify; if there isn't try to
1514 * copy out the value. If that fails, we try to create a new
1515 * register window in the machpcb structure. While this isn't
1516 * _precisely_ the intended use of the machpcb structure, it
1517 * can't cause any problems since we know at this point in the
1518 * code that all of the user's data have been flushed out of the
1519 * register file (since %otherwin is 0).
1521 /* LINTED - alignment */
1522 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1524 if (get_udatamodel() == DATAMODEL_NATIVE) {
1525 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1526 /* LINTED - alignment */
1527 struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf;
1529 if (mpcb->mpcb_wbcnt > 0) {
1530 int i = mpcb->mpcb_wbcnt;
1533 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1536 rwin[i].rw_local[reg - 16] = value;
1537 atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1542 if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) {
1543 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1544 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1547 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value;
1548 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1550 atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1554 struct frame32 *fr =
1555 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1556 /* LINTED - alignment */
1557 struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf;
1558 uint32_t v32 = (uint32_t)value;
1560 if (mpcb->mpcb_wbcnt > 0) {
1561 int i = mpcb->mpcb_wbcnt;
1564 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1567 rwin[i].rw_local[reg - 16] = v32;
1568 atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1573 if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) {
1574 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1575 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1578 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32;
1579 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1581 atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1586 atomic_inc_64(&fasttrap_putreg_slow_cnt);
1591 * If we couldn't record this register's value, the process is in an
1592 * irrecoverable state and we have no choice but to euthanize it.
1594 kern_psignal(ttoproc(curthread), SIGILL);