4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
29 #include <sys/fasttrap_isa.h>
30 #include <sys/fasttrap_impl.h>
31 #include <sys/dtrace.h>
32 #include <sys/dtrace_impl.h>
33 #include <sys/cmn_err.h>
34 #include <sys/frame.h>
35 #include <sys/stack.h>
36 #include <sys/sysmacros.h>
39 #include <v9/sys/machpcb.h>
40 #include <v9/sys/privregs.h>
43 * Lossless User-Land Tracing on SPARC
44 * -----------------------------------
48 * The most important design constraint is, of course, correct execution of
49 * the user thread above all else. The next most important goal is rapid
50 * execution. We combine execution of instructions in user-land with
51 * emulation of certain instructions in the kernel to aim for complete
52 * correctness and maximal performance.
54 * We take advantage of the split PC/NPC architecture to speed up logical
55 * single-stepping; when we copy an instruction out to the scratch space in
56 * the ulwp_t structure (held in the %g7 register on SPARC), we can
57 * effectively single step by setting the PC to our scratch space and leaving
58 * the NPC alone. This executes the replaced instruction and then continues
59 * on without having to reenter the kernel as with single- stepping. The
60 * obvious caveat is for instructions whose execution is PC dependant --
61 * branches, call and link instructions (call and jmpl), and the rdpc
62 * instruction. These instructions cannot be executed in the manner described
63 * so they must be emulated in the kernel.
65 * Emulation for this small set of instructions if fairly simple; the most
66 * difficult part being emulating branch conditions.
69 * A Cache Heavy Portfolio
71 * It's important to note at this time that copying an instruction out to the
72 * ulwp_t scratch space in user-land is rather complicated. SPARC has
73 * separate data and instruction caches so any writes to the D$ (using a
74 * store instruction for example) aren't necessarily reflected in the I$.
75 * The flush instruction can be used to synchronize the two and must be used
76 * for any self-modifying code, but the flush instruction only applies to the
77 * primary address space (the absence of a flusha analogue to the flush
78 * instruction that accepts an ASI argument is an obvious omission from SPARC
79 * v9 where the notion of the alternate address space was introduced on
80 * SPARC). To correctly copy out the instruction we must use a block store
81 * that doesn't allocate in the D$ and ensures synchronization with the I$;
82 * see dtrace_blksuword32() for the implementation (this function uses
83 * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner
84 * described). Refer to the UltraSPARC I/II manual for details on the
85 * ASI_BLK_COMMIT_S ASI.
90 * When we're firing a return probe we need to expose the value returned by
91 * the function being traced. Since the function can set the return value
92 * in its last instruction, we need to fire the return probe only _after_
93 * the effects of the instruction are apparent. For instructions that we
94 * emulate, we can call dtrace_probe() after we've performed the emulation;
95 * for instructions that we execute after we return to user-land, we set
96 * %pc to the instruction we copied out (as described above) and set %npc
97 * to a trap instruction stashed in the ulwp_t structure. After the traced
98 * instruction is executed, the trap instruction returns control to the
99 * kernel where we can fire the return probe.
101 * This need for a second trap in cases where we execute the traced
102 * instruction makes it all the more important to emulate the most common
103 * instructions to avoid the second trip in and out of the kernel.
108 * Since copying out an instruction is neither simple nor inexpensive for the
109 * CPU, we should attempt to avoid doing it in as many cases as possible.
110 * Since function entry and return are usually the most interesting probe
111 * sites, we attempt to tune the performance of the fasttrap provider around
112 * instructions typically in those places.
114 * Looking at a bunch of functions in libraries and executables reveals that
115 * most functions begin with either a save or a sethi (to setup a larger
116 * argument to the save) and end with a restore or an or (in the case of leaf
117 * functions). To try to improve performance, we emulate all of these
118 * instructions in the kernel.
120 * The save and restore instructions are a little tricky since they perform
121 * register window maniplulation. Rather than trying to tinker with the
122 * register windows from the kernel, we emulate the implicit add that takes
123 * place as part of those instructions and set the %pc to point to a simple
124 * save or restore we've hidden in the ulwp_t structure. If we're in a return
125 * probe so want to make it seem as though the tracepoint has been completely
126 * executed we need to remember that we've pulled this trick with restore and
127 * pull registers from the previous window (the one that we'll switch to once
128 * the simple store instruction is executed) rather than the current one. This
129 * is why in the case of emulating a restore we set the DTrace CPU flag
130 * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes
131 * (see fasttrap_return_common()).
134 #define OP(x) ((x) >> 30)
135 #define OP2(x) (((x) >> 22) & 0x07)
136 #define OP3(x) (((x) >> 19) & 0x3f)
137 #define RCOND(x) (((x) >> 25) & 0x07)
138 #define COND(x) (((x) >> 25) & 0x0f)
139 #define A(x) (((x) >> 29) & 0x01)
140 #define I(x) (((x) >> 13) & 0x01)
141 #define RD(x) (((x) >> 25) & 0x1f)
142 #define RS1(x) (((x) >> 14) & 0x1f)
143 #define RS2(x) (((x) >> 0) & 0x1f)
144 #define CC(x) (((x) >> 20) & 0x03)
145 #define DISP16(x) ((((x) >> 6) & 0xc000) | ((x) & 0x3fff))
146 #define DISP22(x) ((x) & 0x3fffff)
147 #define DISP19(x) ((x) & 0x7ffff)
148 #define DISP30(x) ((x) & 0x3fffffff)
149 #define SW_TRAP(x) ((x) & 0x7f)
153 #define OP3_JMPL 0x38
154 #define OP3_RETURN 0x39
156 #define OP3_SAVE 0x3c
157 #define OP3_RESTORE 0x3d
159 #define OP3_PREFETCH 0x2d
160 #define OP3_CASA 0x3c
161 #define OP3_PREFETCHA 0x3d
162 #define OP3_CASXA 0x3e
164 #define OP2_ILLTRAP 0x0
168 #define OP2_SETHI 0x4
169 #define OP2_FBPfcc 0x5
170 #define OP2_FBfcc 0x6
182 * Check the comment in fasttrap.h when changing these offsets or adding
185 #define FASTTRAP_OFF_SAVE 64
186 #define FASTTRAP_OFF_RESTORE 68
187 #define FASTTRAP_OFF_FTRET 72
188 #define FASTTRAP_OFF_RETURN 76
190 #define BREAKPOINT_INSTR 0x91d02001 /* ta 1 */
193 * Tunable to let users turn off the fancy save instruction optimization.
194 * If a program is non-ABI compliant, there's a possibility that the save
195 * instruction optimization could cause an error.
197 int fasttrap_optimize_save = 1;
200 fasttrap_anarg(struct regs *rp, int argno)
205 return ((&rp->r_o0)[argno]);
207 if (curproc->p_model == DATAMODEL_NATIVE) {
208 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
210 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
211 value = dtrace_fulword(&fr->fr_argd[argno]);
212 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
213 CPU_DTRACE_BADALIGN);
215 struct frame32 *fr = (struct frame32 *)rp->r_sp;
217 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
218 value = dtrace_fuword32(&fr->fr_argd[argno]);
219 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
220 CPU_DTRACE_BADALIGN);
226 static ulong_t fasttrap_getreg(struct regs *, uint_t);
227 static void fasttrap_putreg(struct regs *, uint_t, ulong_t);
230 fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp,
231 uint_t fake_restore, int argc, uintptr_t *argv)
233 int i, x, cap = MIN(argc, probe->ftp_nargs);
234 int inc = (fake_restore ? 16 : 0);
237 * The only way we'll hit the fake_restore case is if a USDT probe is
238 * invoked as a tail-call. While it wouldn't be incorrect, we can
239 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp
240 * directly since a tail-call can't be made if the invoked function
241 * would use the argument dump space (i.e. if there were more than
242 * 6 arguments). We take this shortcut because unconditionally rooting
243 * around for R_FP (R_SP + 16) would be unnecessarily painful.
246 if (curproc->p_model == DATAMODEL_NATIVE) {
247 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
250 for (i = 0; i < cap; i++) {
251 x = probe->ftp_argmap[i];
254 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
255 else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0)
260 struct frame32 *fr = (struct frame32 *)rp->r_sp;
263 for (i = 0; i < cap; i++) {
264 x = probe->ftp_argmap[i];
267 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
268 else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0)
273 for (; i < argc; i++) {
279 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
282 fasttrap_tracepoint_t *tp;
283 fasttrap_bucket_t *bucket;
286 dtrace_icookie_t cookie;
288 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
289 mutex_enter(pid_mtx);
290 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
292 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
293 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
294 tp->ftt_proc->ftpc_acount != 0)
299 * Don't sweat it if we can't find the tracepoint again; unlike
300 * when we're in fasttrap_pid_probe(), finding the tracepoint here
301 * is not essential to the correct execution of the process.
303 if (tp == NULL || tp->ftt_retids == NULL) {
308 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
309 fasttrap_probe_t *probe = id->fti_probe;
311 if (id->fti_ptype == DTFTP_POST_OFFSETS) {
312 if (probe->ftp_argmap != NULL && fake_restore) {
315 fasttrap_usdt_args(probe, rp, fake_restore,
316 sizeof (t) / sizeof (t[0]), t);
318 cookie = dtrace_interrupt_disable();
319 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
320 dtrace_probe(probe->ftp_id, t[0], t[1],
322 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
323 dtrace_interrupt_enable(cookie);
325 } else if (probe->ftp_argmap != NULL) {
328 fasttrap_usdt_args(probe, rp, fake_restore,
329 sizeof (t) / sizeof (t[0]), t);
331 dtrace_probe(probe->ftp_id, t[0], t[1],
334 } else if (fake_restore) {
335 uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
336 uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
337 uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
338 uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
339 uintptr_t arg4 = fasttrap_getreg(rp, R_I4);
341 cookie = dtrace_interrupt_disable();
342 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
343 dtrace_probe(probe->ftp_id, arg0, arg1,
345 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
346 dtrace_interrupt_enable(cookie);
349 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1,
350 rp->r_o2, rp->r_o3, rp->r_o4);
357 * If this is only a possible return point, we must
358 * be looking at a potential tail call in leaf context.
359 * If the %npc is still within this function, then we
360 * must have misidentified a jmpl as a tail-call when it
361 * is, in fact, part of a jump table. It would be nice to
362 * remove this tracepoint, but this is neither the time
365 if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) &&
366 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
370 * It's possible for a function to branch to the delay slot
371 * of an instruction that we've identified as a return site.
372 * We can dectect this spurious return probe activation by
373 * observing that in this case %npc will be %pc + 4 and %npc
374 * will be inside the current function (unless the user is
375 * doing _crazy_ instruction picking in which case there's
376 * very little we can do). The second check is important
377 * in case the last instructions of a function make a tail-
378 * call to the function located immediately subsequent.
380 if (rp->r_npc == rp->r_pc + 4 &&
381 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
385 * The first argument is the offset of return tracepoint
386 * in the function; the remaining arguments are the return
389 * If fake_restore is set, we need to pull the return values
390 * out of the %i's rather than the %o's -- a little trickier.
393 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
394 rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3);
396 uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
397 uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
398 uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
399 uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
401 cookie = dtrace_interrupt_disable();
402 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
403 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
404 arg0, arg1, arg2, arg3);
405 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
406 dtrace_interrupt_enable(cookie);
414 fasttrap_pid_probe(struct regs *rp)
417 fasttrap_tracepoint_t *tp, tp_local;
420 uintptr_t pc = rp->r_pc;
421 uintptr_t npc = rp->r_npc;
422 uintptr_t orig_pc = pc;
423 fasttrap_bucket_t *bucket;
425 uint_t fake_restore = 0, is_enabled = 0;
426 dtrace_icookie_t cookie;
429 * It's possible that a user (in a veritable orgy of bad planning)
430 * could redirect this thread's flow of control before it reached the
431 * return probe fasttrap. In this case we need to kill the process
432 * since it's in a unrecoverable state.
434 if (curthread->t_dtrace_step) {
435 ASSERT(curthread->t_dtrace_on);
436 fasttrap_sigtrap(p, curthread, pc);
441 * Clear all user tracing flags.
443 curthread->t_dtrace_ft = 0;
444 curthread->t_dtrace_pc = 0;
445 curthread->t_dtrace_npc = 0;
446 curthread->t_dtrace_scrpc = 0;
447 curthread->t_dtrace_astpc = 0;
450 * Treat a child created by a call to vfork(2) as if it were its
451 * parent. We know that there's only one thread of control in such a
454 while (p->p_flag & SVFORK) {
459 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
460 mutex_enter(pid_mtx);
461 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
464 * Lookup the tracepoint that the process just hit.
466 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
467 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
468 tp->ftt_proc->ftpc_acount != 0)
473 * If we couldn't find a matching tracepoint, either a tracepoint has
474 * been inserted without using the pid<pid> ioctl interface (see
475 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
482 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
483 fasttrap_probe_t *probe = id->fti_probe;
484 int isentry = (id->fti_ptype == DTFTP_ENTRY);
486 if (id->fti_ptype == DTFTP_IS_ENABLED) {
492 * We note that this was an entry probe to help ustack() find
496 cookie = dtrace_interrupt_disable();
497 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
499 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2,
502 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
503 dtrace_interrupt_enable(cookie);
508 * We're about to do a bunch of work so we cache a local copy of
509 * the tracepoint to emulate the instruction, and then find the
510 * tracepoint again later if we need to light up any return probes.
517 * If there's an is-enabled probe conntected to this tracepoint it
518 * means that there was a 'mov %g0, %o0' instruction that was placed
519 * there by DTrace when the binary was linked. As this probe is, in
520 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can
521 * bypass all the instruction emulation logic since we know the
522 * inevitable result. It's possible that a user could construct a
523 * scenario where the 'is-enabled' probe was on some other
524 * instruction, but that would be a rather exotic way to shoot oneself
535 * We emulate certain types of instructions to ensure correctness
536 * (in the case of position dependent instructions) or optimize
537 * common cases. The rest we have the thread execute back in user-
540 switch (tp->ftt_type) {
541 case FASTTRAP_T_SAVE:
546 * This an optimization to let us handle function entry
547 * probes more efficiently. Many functions begin with a save
548 * instruction that follows the pattern:
549 * save %sp, <imm>, %sp
551 * Meanwhile, we've stashed the instruction:
554 * off of %g7, so all we have to do is stick the right value
555 * into %g1 and reset %pc to point to the instruction we've
556 * cleverly hidden (%npc should not be touched).
559 imm = tp->ftt_instr << 19;
561 rp->r_g1 = rp->r_sp + imm;
562 pc = rp->r_g7 + FASTTRAP_OFF_SAVE;
566 case FASTTRAP_T_RESTORE:
572 * This is an optimization to let us handle function
573 * return probes more efficiently. Most non-leaf functions
574 * end with the sequence:
576 * restore <reg>, <reg_or_imm>, %oX
578 * We've stashed the instruction:
579 * restore %g0, %g0, %g0
581 * off of %g7 so we just need to place the correct value
582 * in the right %i register (since after our fake-o
583 * restore, the %i's will become the %o's) and set the %pc
584 * to point to our hidden restore. We also set fake_restore to
585 * let fasttrap_return_common() know that it will find the
586 * return values in the %i's rather than the %o's.
589 if (I(tp->ftt_instr)) {
592 imm = tp->ftt_instr << 19;
594 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
596 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
597 fasttrap_getreg(rp, RS2(tp->ftt_instr));
601 * Convert %o's to %i's; leave %g's as they are.
603 rd = RD(tp->ftt_instr);
604 fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value);
606 pc = rp->r_g7 + FASTTRAP_OFF_RESTORE;
611 case FASTTRAP_T_RETURN:
616 * A return instruction is like a jmpl (without the link
617 * part) that executes an implicit restore. We've stashed
621 * off of %g7 so we just need to place the target in %o0
622 * and set the %pc to point to the stashed return instruction.
623 * We use %o0 since that register disappears after the return
624 * executes, erasing any evidence of this tampering.
626 if (I(tp->ftt_instr)) {
629 imm = tp->ftt_instr << 19;
631 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
633 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
634 fasttrap_getreg(rp, RS2(tp->ftt_instr));
637 fasttrap_putreg(rp, R_O0, target);
639 pc = rp->r_g7 + FASTTRAP_OFF_RETURN;
648 if (I(tp->ftt_instr)) {
651 imm = tp->ftt_instr << 19;
653 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm;
655 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) |
656 fasttrap_getreg(rp, RS2(tp->ftt_instr));
659 fasttrap_putreg(rp, RD(tp->ftt_instr), value);
665 case FASTTRAP_T_SETHI:
666 if (RD(tp->ftt_instr) != R_G0) {
667 uint32_t imm32 = tp->ftt_instr << 10;
668 fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32);
676 uint_t c, v, z, n, taken;
677 uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT;
687 switch (tp->ftt_code) {
693 taken = z | (n ^ v); break;
695 taken = n ^ v; break;
697 taken = c | z; break;
698 case 0x5: /* BCS (BLU) */
706 * We handle the BA case differently since the annul
707 * bit means something slightly different.
709 panic("fasttrap: mishandled a branch");
714 taken = ~(z | (n ^ v)); break;
716 taken = ~(n ^ v); break;
718 taken = ~(c | z); break;
719 case 0xd: /* BCC (BGEU) */
730 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
732 * Untaken annulled branches don't execute the
733 * instruction in the delay slot.
752 if (tp->ftt_cc == 0) {
753 fcc = (fsr >> 10) & 0x3;
756 ASSERT(tp->ftt_cc <= 3);
757 shift = 30 + tp->ftt_cc * 2;
758 fcc = (fsr >> shift) & 0x3;
761 switch (tp->ftt_code) {
763 taken = (1 << fcc) & (0|0|0|0); break;
765 taken = (1 << fcc) & (8|4|2|0); break;
767 taken = (1 << fcc) & (0|4|2|0); break;
769 taken = (1 << fcc) & (8|0|2|0); break;
771 taken = (1 << fcc) & (0|0|2|0); break;
773 taken = (1 << fcc) & (8|4|0|0); break;
775 taken = (1 << fcc) & (0|4|0|0); break;
777 taken = (1 << fcc) & (8|0|0|0); break;
780 * We handle the FBA case differently since the annul
781 * bit means something slightly different.
783 panic("fasttrap: mishandled a branch");
784 taken = (1 << fcc) & (8|4|2|1); break;
786 taken = (1 << fcc) & (0|0|0|1); break;
788 taken = (1 << fcc) & (8|0|0|1); break;
790 taken = (1 << fcc) & (0|4|0|1); break;
791 case 0xc: /* FBUGE */
792 taken = (1 << fcc) & (8|4|0|1); break;
794 taken = (1 << fcc) & (0|0|2|1); break;
795 case 0xe: /* FBULE */
796 taken = (1 << fcc) & (8|0|2|1); break;
798 taken = (1 << fcc) & (0|4|2|1); break;
804 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
806 * Untaken annulled branches don't execute the
807 * instruction in the delay slot.
822 uint_t reg = RS1(tp->ftt_instr);
825 * An ILP32 process shouldn't be using a branch predicated on
826 * an %i or an %l since it would violate the ABI. It's a
827 * violation of the ABI because we can't ensure deterministic
828 * behavior. We should have identified this case when we
831 ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16);
833 value = (int64_t)fasttrap_getreg(rp, reg);
835 switch (tp->ftt_code) {
837 taken = (value == 0); break;
838 case 0x2: /* BRLEZ */
839 taken = (value <= 0); break;
841 taken = (value < 0); break;
843 taken = (value != 0); break;
845 taken = (value > 0); break;
846 case 0x7: /* BRGEZ */
847 taken = (value >= 0); break;
851 panic("fasttrap: mishandled a branch");
857 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
859 * Untaken annulled branches don't execute the
860 * instruction in the delay slot.
871 case FASTTRAP_T_ALWAYS:
876 if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
878 * Annulled branch always instructions never execute
879 * the instruction in the delay slot.
882 npc = tp->ftt_dest + 4;
889 case FASTTRAP_T_RDPC:
890 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
895 case FASTTRAP_T_CALL:
897 * It's a call _and_ link remember...
904 case FASTTRAP_T_JMPL:
907 if (I(tp->ftt_instr)) {
908 uint_t rs1 = RS1(tp->ftt_instr);
911 imm = tp->ftt_instr << 19;
913 npc = fasttrap_getreg(rp, rs1) + imm;
915 uint_t rs1 = RS1(tp->ftt_instr);
916 uint_t rs2 = RS2(tp->ftt_instr);
918 npc = fasttrap_getreg(rp, rs1) +
919 fasttrap_getreg(rp, rs2);
923 * Do the link part of the jump-and-link instruction.
925 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
929 case FASTTRAP_T_COMMON:
931 curthread->t_dtrace_scrpc = rp->r_g7;
932 curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET;
935 * Copy the instruction to a reserved location in the
936 * user-land thread structure, then set the PC to that
937 * location and leave the NPC alone. We take pains to ensure
938 * consistency in the instruction stream (See SPARC
939 * Architecture Manual Version 9, sections 8.4.7, A.20, and
940 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1,
941 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the
942 * instruction into the user's address space without
943 * bypassing the I$. There's no AS_USER version of this ASI
944 * (as exist for other ASIs) so we use the lofault
945 * mechanism to catch faults.
947 if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) {
949 * If the copyout fails, then the process's state
950 * is not consistent (the effects of the traced
951 * instruction will never be seen). This process
952 * cannot be allowed to continue execution.
954 fasttrap_sigtrap(curproc, curthread, pc);
958 curthread->t_dtrace_pc = pc;
959 curthread->t_dtrace_npc = npc;
960 curthread->t_dtrace_on = 1;
962 pc = curthread->t_dtrace_scrpc;
964 if (tp->ftt_retids != NULL) {
965 curthread->t_dtrace_step = 1;
966 curthread->t_dtrace_ret = 1;
967 npc = curthread->t_dtrace_astpc;
973 panic("fasttrap: mishandled an instruction");
977 * This bit me in the ass a couple of times, so lets toss this
978 * in as a cursory sanity check.
980 ASSERT(pc != rp->r_g7 + 4);
981 ASSERT(pc != rp->r_g7 + 8);
985 * If there were no return probes when we first found the tracepoint,
986 * we should feel no obligation to honor any return probes that were
987 * subsequently enabled -- they'll just have to wait until the next
990 if (tp->ftt_retids != NULL) {
992 * We need to wait until the results of the instruction are
993 * apparent before invoking any return probes. If this
994 * instruction was emulated we can just call
995 * fasttrap_return_common(); if it needs to be executed, we
996 * need to wait until we return to the kernel.
998 if (tp->ftt_type != FASTTRAP_T_COMMON) {
999 fasttrap_return_common(rp, orig_pc, pid, fake_restore);
1001 ASSERT(curthread->t_dtrace_ret != 0);
1002 ASSERT(curthread->t_dtrace_pc == orig_pc);
1003 ASSERT(curthread->t_dtrace_scrpc == rp->r_g7);
1004 ASSERT(npc == curthread->t_dtrace_astpc);
1016 fasttrap_return_probe(struct regs *rp)
1018 proc_t *p = ttoproc(curthread);
1020 uintptr_t pc = curthread->t_dtrace_pc;
1021 uintptr_t npc = curthread->t_dtrace_npc;
1023 curthread->t_dtrace_pc = 0;
1024 curthread->t_dtrace_npc = 0;
1025 curthread->t_dtrace_scrpc = 0;
1026 curthread->t_dtrace_astpc = 0;
1029 * Treat a child created by a call to vfork(2) as if it were its
1030 * parent. We know there's only one thread of control in such a
1031 * process: this one.
1033 while (p->p_flag & SVFORK) {
1038 * We set the %pc and %npc to their values when the traced
1039 * instruction was initially executed so that it appears to
1040 * dtrace_probe() that we're on the original instruction, and so that
1041 * the user can't easily detect our complex web of lies.
1042 * dtrace_return_probe() (our caller) will correctly set %pc and %npc
1049 fasttrap_return_common(rp, pc, pid, 0);
1055 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
1057 fasttrap_instr_t instr = FASTTRAP_INSTR;
1059 if (uwrite(p, &instr, 4, tp->ftt_pc) != 0)
1066 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
1068 fasttrap_instr_t instr;
1071 * Distinguish between read or write failures and a changed
1074 if (uread(p, &instr, 4, tp->ftt_pc) != 0)
1076 if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR)
1078 if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0)
1085 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
1086 fasttrap_probe_type_t type)
1092 * Read the instruction at the given address out of the process's
1093 * address space. We don't have to worry about a debugger
1094 * changing this instruction before we overwrite it with our trap
1095 * instruction since P_PR_LOCK is set.
1097 if (uread(p, &instr, 4, pc) != 0)
1101 * Decode the instruction to fill in the probe flags. We can have
1102 * the process execute most instructions on its own using a pc/npc
1103 * trick, but pc-relative control transfer present a problem since
1104 * we're relocating the instruction. We emulate these instructions
1105 * in the kernel. We assume a default type and over-write that as
1108 * pc-relative instructions must be emulated for correctness;
1109 * other instructions (which represent a large set of commonly traced
1110 * instructions) are emulated or otherwise optimized for performance.
1112 tp->ftt_type = FASTTRAP_T_COMMON;
1113 if (OP(instr) == 1) {
1115 * Call instructions.
1117 tp->ftt_type = FASTTRAP_T_CALL;
1118 disp = DISP30(instr) << 2;
1119 tp->ftt_dest = pc + (intptr_t)disp;
1121 } else if (OP(instr) == 0) {
1123 * Branch instructions.
1125 * Unconditional branches need careful attention when they're
1126 * annulled: annulled unconditional branches never execute
1127 * the instruction in the delay slot.
1129 switch (OP2(instr)) {
1133 * The compiler may place an illtrap after a call to
1134 * a function that returns a structure. In the case of
1135 * a returned structure, the compiler places an illtrap
1136 * whose const22 field is the size of the returned
1137 * structure immediately following the delay slot of
1138 * the call. To stay out of the way, we refuse to
1139 * place tracepoints on top of illtrap instructions.
1141 * This is one of the dumbest architectural decisions
1142 * I've ever had to work around.
1144 * We also identify the only illegal op2 value (See
1145 * SPARC Architecture Manual Version 9, E.2 table 31).
1150 if (COND(instr) == 8) {
1151 tp->ftt_type = FASTTRAP_T_ALWAYS;
1154 * Check for an illegal instruction.
1158 tp->ftt_type = FASTTRAP_T_CCR;
1159 tp->ftt_cc = CC(instr);
1160 tp->ftt_code = COND(instr);
1164 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1166 disp = DISP19(instr);
1169 tp->ftt_dest = pc + (intptr_t)disp;
1173 if (COND(instr) == 8) {
1174 tp->ftt_type = FASTTRAP_T_ALWAYS;
1176 tp->ftt_type = FASTTRAP_T_CCR;
1178 tp->ftt_code = COND(instr);
1182 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1184 disp = DISP22(instr);
1187 tp->ftt_dest = pc + (intptr_t)disp;
1192 * Check for an illegal instruction.
1194 if ((RCOND(instr) & 3) == 0)
1198 * It's a violation of the v8plus ABI to use a
1199 * register-predicated branch in a 32-bit app if
1200 * the register used is an %l or an %i (%gs and %os
1201 * are legit because they're not saved to the stack
1202 * in 32-bit words when we take a trap).
1204 if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16)
1207 tp->ftt_type = FASTTRAP_T_REG;
1209 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1210 disp = DISP16(instr);
1213 tp->ftt_dest = pc + (intptr_t)disp;
1214 tp->ftt_code = RCOND(instr);
1218 tp->ftt_type = FASTTRAP_T_SETHI;
1222 if (COND(instr) == 8) {
1223 tp->ftt_type = FASTTRAP_T_ALWAYS;
1225 tp->ftt_type = FASTTRAP_T_FCC;
1226 tp->ftt_cc = CC(instr);
1227 tp->ftt_code = COND(instr);
1231 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1233 disp = DISP19(instr);
1236 tp->ftt_dest = pc + (intptr_t)disp;
1240 if (COND(instr) == 8) {
1241 tp->ftt_type = FASTTRAP_T_ALWAYS;
1243 tp->ftt_type = FASTTRAP_T_FCC;
1245 tp->ftt_code = COND(instr);
1249 tp->ftt_flags |= FASTTRAP_F_ANNUL;
1251 disp = DISP22(instr);
1254 tp->ftt_dest = pc + (intptr_t)disp;
1258 } else if (OP(instr) == 2) {
1259 switch (OP3(instr)) {
1261 tp->ftt_type = FASTTRAP_T_RETURN;
1265 tp->ftt_type = FASTTRAP_T_JMPL;
1269 if (RS1(instr) == 5)
1270 tp->ftt_type = FASTTRAP_T_RDPC;
1275 * We optimize for save instructions at function
1276 * entry; see the comment in fasttrap_pid_probe()
1277 * (near FASTTRAP_T_SAVE) for details.
1279 if (fasttrap_optimize_save != 0 &&
1280 type == DTFTP_ENTRY &&
1281 I(instr) == 1 && RD(instr) == R_SP)
1282 tp->ftt_type = FASTTRAP_T_SAVE;
1287 * We optimize restore instructions at function
1288 * return; see the comment in fasttrap_pid_probe()
1289 * (near FASTTRAP_T_RESTORE) for details.
1291 * rd must be an %o or %g register.
1293 if ((RD(instr) & 0x10) == 0)
1294 tp->ftt_type = FASTTRAP_T_RESTORE;
1299 * A large proportion of instructions in the delay
1300 * slot of retl instructions are or's so we emulate
1301 * these downstairs as an optimization.
1303 tp->ftt_type = FASTTRAP_T_OR;
1308 * Breakpoint instructions are effectively position-
1309 * dependent since the debugger uses the %pc value
1310 * to lookup which breakpoint was executed. As a
1311 * result, we can't actually instrument breakpoints.
1313 if (SW_TRAP(instr) == ST_BREAKPOINT)
1323 * Identify illegal instructions (See SPARC
1324 * Architecture Manual Version 9, E.2 table 32).
1328 } else if (OP(instr) == 3) {
1329 uint32_t op3 = OP3(instr);
1332 * Identify illegal instructions (See SPARC Architecture
1333 * Manual Version 9, E.2 table 33).
1335 if ((op3 & 0x28) == 0x28) {
1336 if (op3 != OP3_PREFETCH && op3 != OP3_CASA &&
1337 op3 != OP3_PREFETCHA && op3 != OP3_CASXA)
1340 if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31)
1345 tp->ftt_instr = instr;
1348 * We don't know how this tracepoint is going to be used, but in case
1349 * it's used as part of a function return probe, we need to indicate
1350 * whether it's always a return site or only potentially a return
1351 * site. If it's part of a return probe, it's always going to be a
1352 * return from that function if it's a restore instruction or if
1353 * the previous instruction was a return. If we could reliably
1354 * distinguish jump tables from return sites, this wouldn't be
1357 if (tp->ftt_type != FASTTRAP_T_RESTORE &&
1358 (uread(p, &instr, 4, pc - sizeof (instr)) != 0 ||
1359 !(OP(instr) == 2 && OP3(instr) == OP3_RETURN)))
1360 tp->ftt_flags |= FASTTRAP_F_RETMAYBE;
1367 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1370 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1375 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1378 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1381 static uint64_t fasttrap_getreg_fast_cnt;
1382 static uint64_t fasttrap_getreg_mpcb_cnt;
1383 static uint64_t fasttrap_getreg_slow_cnt;
1386 fasttrap_getreg(struct regs *rp, uint_t reg)
1389 dtrace_icookie_t cookie;
1390 struct machpcb *mpcb;
1391 extern ulong_t dtrace_getreg_win(uint_t, uint_t);
1394 * We have the %os and %gs in our struct regs, but if we need to
1395 * snag a %l or %i we need to go scrounging around in the process's
1402 return ((&rp->r_g1)[reg - 1]);
1405 * Before we look at the user's stack, we'll check the register
1406 * windows to see if the information we want is in there.
1408 cookie = dtrace_interrupt_disable();
1409 if (dtrace_getotherwin() > 0) {
1410 value = dtrace_getreg_win(reg, 1);
1411 dtrace_interrupt_enable(cookie);
1413 atomic_add_64(&fasttrap_getreg_fast_cnt, 1);
1417 dtrace_interrupt_enable(cookie);
1420 * First check the machpcb structure to see if we've already read
1421 * in the register window we're looking for; if we haven't, (and
1422 * we probably haven't) try to copy in the value of the register.
1424 /* LINTED - alignment */
1425 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1427 if (get_udatamodel() == DATAMODEL_NATIVE) {
1428 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1430 if (mpcb->mpcb_wbcnt > 0) {
1431 struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
1432 int i = mpcb->mpcb_wbcnt;
1435 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1438 atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1);
1439 return (rwin[i].rw_local[reg - 16]);
1443 if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0)
1446 struct frame32 *fr =
1447 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1448 uint32_t *v32 = (uint32_t *)&value;
1450 if (mpcb->mpcb_wbcnt > 0) {
1451 struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
1452 int i = mpcb->mpcb_wbcnt;
1455 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1458 atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1);
1459 return (rwin[i].rw_local[reg - 16]);
1463 if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0)
1469 atomic_add_64(&fasttrap_getreg_slow_cnt, 1);
1474 * If the copy in failed, the process will be in a irrecoverable
1475 * state, and we have no choice but to kill it.
1477 kern_psignal(ttoproc(curthread), SIGILL);
1481 static uint64_t fasttrap_putreg_fast_cnt;
1482 static uint64_t fasttrap_putreg_mpcb_cnt;
1483 static uint64_t fasttrap_putreg_slow_cnt;
1486 fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value)
1488 dtrace_icookie_t cookie;
1489 struct machpcb *mpcb;
1490 extern void dtrace_putreg_win(uint_t, ulong_t);
1496 (&rp->r_g1)[reg - 1] = value;
1501 * If the user process is still using some register windows, we
1502 * can just place the value in the correct window.
1504 cookie = dtrace_interrupt_disable();
1505 if (dtrace_getotherwin() > 0) {
1506 dtrace_putreg_win(reg, value);
1507 dtrace_interrupt_enable(cookie);
1508 atomic_add_64(&fasttrap_putreg_fast_cnt, 1);
1511 dtrace_interrupt_enable(cookie);
1514 * First see if there's a copy of the register window in the
1515 * machpcb structure that we can modify; if there isn't try to
1516 * copy out the value. If that fails, we try to create a new
1517 * register window in the machpcb structure. While this isn't
1518 * _precisely_ the intended use of the machpcb structure, it
1519 * can't cause any problems since we know at this point in the
1520 * code that all of the user's data have been flushed out of the
1521 * register file (since %otherwin is 0).
1523 /* LINTED - alignment */
1524 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1526 if (get_udatamodel() == DATAMODEL_NATIVE) {
1527 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1528 /* LINTED - alignment */
1529 struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf;
1531 if (mpcb->mpcb_wbcnt > 0) {
1532 int i = mpcb->mpcb_wbcnt;
1535 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1538 rwin[i].rw_local[reg - 16] = value;
1539 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1544 if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) {
1545 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1546 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1549 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value;
1550 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1552 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1556 struct frame32 *fr =
1557 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1558 /* LINTED - alignment */
1559 struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf;
1560 uint32_t v32 = (uint32_t)value;
1562 if (mpcb->mpcb_wbcnt > 0) {
1563 int i = mpcb->mpcb_wbcnt;
1566 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1569 rwin[i].rw_local[reg - 16] = v32;
1570 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1575 if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) {
1576 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1577 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1580 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32;
1581 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1583 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1588 atomic_add_64(&fasttrap_putreg_slow_cnt, 1);
1593 * If we couldn't record this register's value, the process is in an
1594 * irrecoverable state and we have no choice but to euthanize it.
1596 kern_psignal(ttoproc(curthread), SIGILL);