2 * Copyright (c) 2005 Marcel Moolenaar
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
31 #include "opt_ktrace.h"
33 #include <sys/param.h>
34 #include <sys/systm.h>
37 #include <sys/sysproto.h>
38 #include <sys/kernel.h>
42 #include <sys/mutex.h>
43 #include <sys/sched.h>
45 #include <sys/vmmeter.h>
46 #include <sys/sysent.h>
47 #include <sys/signalvar.h>
48 #include <sys/syscall.h>
49 #include <sys/pioctl.h>
50 #include <sys/ptrace.h>
51 #include <sys/sysctl.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 #include <vm/vm_param.h>
58 #include <sys/ptrace.h>
59 #include <machine/cpu.h>
60 #include <machine/md_var.h>
61 #include <machine/reg.h>
62 #include <machine/pal.h>
63 #include <machine/fpu.h>
64 #include <machine/efi.h>
65 #include <machine/pcb.h>
67 #include <machine/smp.h>
72 #include <sys/ktrace.h>
75 #include <security/audit/audit.h>
77 #include <ia64/disasm/disasm.h>
79 static int print_usertrap = 0;
80 SYSCTL_INT(_machdep, OID_AUTO, print_usertrap,
81 CTLFLAG_RW, &print_usertrap, 0, "");
83 static void break_syscall(struct trapframe *tf);
86 * EFI-Provided FPSWA interface (Floating Point SoftWare Assist)
88 extern struct fpswa_iface *fpswa_iface;
90 extern char *syscallnames[];
92 static const char *ia64_vector_names[] = {
93 "VHPT Translation", /* 0 */
94 "Instruction TLB", /* 1 */
96 "Alternate Instruction TLB", /* 3 */
97 "Alternate Data TLB", /* 4 */
98 "Data Nested TLB", /* 5 */
99 "Instruction Key Miss", /* 6 */
100 "Data Key Miss", /* 7 */
102 "Instruction Access-Bit", /* 9 */
103 "Data Access-Bit", /* 10 */
104 "Break Instruction", /* 11 */
105 "External Interrupt", /* 12 */
106 "Reserved 13", /* 13 */
107 "Reserved 14", /* 14 */
108 "Reserved 15", /* 15 */
109 "Reserved 16", /* 16 */
110 "Reserved 17", /* 17 */
111 "Reserved 18", /* 18 */
112 "Reserved 19", /* 19 */
113 "Page Not Present", /* 20 */
114 "Key Permission", /* 21 */
115 "Instruction Access Rights", /* 22 */
116 "Data Access Rights", /* 23 */
117 "General Exception", /* 24 */
118 "Disabled FP-Register", /* 25 */
119 "NaT Consumption", /* 26 */
120 "Speculation", /* 27 */
121 "Reserved 28", /* 28 */
123 "Unaligned Reference", /* 30 */
124 "Unsupported Data Reference", /* 31 */
125 "Floating-point Fault", /* 32 */
126 "Floating-point Trap", /* 33 */
127 "Lower-Privilege Transfer Trap", /* 34 */
128 "Taken Branch Trap", /* 35 */
129 "Single Step Trap", /* 36 */
130 "Reserved 37", /* 37 */
131 "Reserved 38", /* 38 */
132 "Reserved 39", /* 39 */
133 "Reserved 40", /* 40 */
134 "Reserved 41", /* 41 */
135 "Reserved 42", /* 42 */
136 "Reserved 43", /* 43 */
137 "Reserved 44", /* 44 */
138 "IA-32 Exception", /* 45 */
139 "IA-32 Intercept", /* 46 */
140 "IA-32 Interrupt", /* 47 */
141 "Reserved 48", /* 48 */
142 "Reserved 49", /* 49 */
143 "Reserved 50", /* 50 */
144 "Reserved 51", /* 51 */
145 "Reserved 52", /* 52 */
146 "Reserved 53", /* 53 */
147 "Reserved 54", /* 54 */
148 "Reserved 55", /* 55 */
149 "Reserved 56", /* 56 */
150 "Reserved 57", /* 57 */
151 "Reserved 58", /* 58 */
152 "Reserved 59", /* 59 */
153 "Reserved 60", /* 60 */
154 "Reserved 61", /* 61 */
155 "Reserved 62", /* 62 */
156 "Reserved 63", /* 63 */
157 "Reserved 64", /* 64 */
158 "Reserved 65", /* 65 */
159 "Reserved 66", /* 66 */
160 "Reserved 67", /* 67 */
169 printbits(uint64_t mask, struct bitname *bn, int count)
174 for (i = 0; i < count; i++) {
176 * Handle fields wider than one bit.
178 bit = bn[i].mask & ~(bn[i].mask - 1);
179 if (bn[i].mask > bit) {
184 printf("%s=%ld", bn[i].name,
185 (mask & bn[i].mask) / bit);
186 } else if (mask & bit) {
191 printf("%s", bn[i].name);
196 struct bitname psr_bits[] = {
200 {IA64_PSR_MFL, "mfl"},
201 {IA64_PSR_MFH, "mfh"},
206 {IA64_PSR_DFL, "dfl"},
207 {IA64_PSR_DFH, "dfh"},
216 {IA64_PSR_CPL, "cpl"},
231 printpsr(uint64_t psr)
233 printbits(psr, psr_bits, sizeof(psr_bits)/sizeof(psr_bits[0]));
236 struct bitname isr_bits[] = {
237 {IA64_ISR_CODE, "code"},
238 {IA64_ISR_VECTOR, "vector"},
252 static void printisr(uint64_t isr)
254 printbits(isr, isr_bits, sizeof(isr_bits)/sizeof(isr_bits[0]));
258 printtrap(int vector, struct trapframe *tf, int isfatal, int user)
261 printf("%s %s trap (cpu %d):\n", isfatal? "fatal" : "handled",
262 user ? "user" : "kernel", PCPU_GET(cpuid));
264 printf(" trap vector = 0x%x (%s)\n",
265 vector, ia64_vector_names[vector]);
266 printf(" cr.iip = 0x%lx\n", tf->tf_special.iip);
267 printf(" cr.ipsr = 0x%lx (", tf->tf_special.psr);
268 printpsr(tf->tf_special.psr);
270 printf(" cr.isr = 0x%lx (", tf->tf_special.isr);
271 printisr(tf->tf_special.isr);
273 printf(" cr.ifa = 0x%lx\n", tf->tf_special.ifa);
274 if (tf->tf_special.psr & IA64_PSR_IS) {
275 printf(" ar.cflg = 0x%lx\n", ia64_get_cflg());
276 printf(" ar.csd = 0x%lx\n", ia64_get_csd());
277 printf(" ar.ssd = 0x%lx\n", ia64_get_ssd());
279 printf(" curthread = %p\n", curthread);
280 if (curthread != NULL)
281 printf(" pid = %d, comm = %s\n",
282 curthread->td_proc->p_pid, curthread->td_proc->p_comm);
287 * We got a trap caused by a break instruction and the immediate was 0.
288 * This indicates that we may have a break.b with some non-zero immediate.
289 * The break.b doesn't cause the immediate to be put in cr.iim. Hence,
290 * we need to disassemble the bundle and return the immediate found there.
291 * This may be a 0 value anyway. Return 0 for any error condition. This
292 * will result in a SIGILL, which is pretty much the best thing to do.
295 trap_decode_break(struct trapframe *tf)
297 struct asm_bundle bundle;
298 struct asm_inst *inst;
301 if (!asm_decode(tf->tf_special.iip, &bundle))
304 slot = ((tf->tf_special.psr & IA64_PSR_RI) == IA64_PSR_RI_0) ? 0 :
305 ((tf->tf_special.psr & IA64_PSR_RI) == IA64_PSR_RI_1) ? 1 : 2;
306 inst = bundle.b_inst + slot;
309 * Sanity checking: It must be a break instruction and the operand
310 * that has the break value must be an immediate.
312 if (inst->i_op != ASM_OP_BREAK ||
313 inst->i_oper[1].o_type != ASM_OPER_IMM)
316 return (inst->i_oper[1].o_value);
320 trap_panic(int vector, struct trapframe *tf)
323 printtrap(vector, tf, 1, TRAPF_USERMODE(tf));
325 kdb_trap(vector, 0, tf);
334 do_ast(struct trapframe *tf)
338 while (curthread->td_flags & (TDF_ASTPENDING|TDF_NEEDRESCHED)) {
344 * Keep interrupts disabled. We return r10 as a favor to the EPC
345 * syscall code so that it can quicky determine if the syscall
346 * needs to be restarted or not.
348 return (tf->tf_scratch.gr10);
352 * Trap is called from exception.s to handle most types of processor traps.
356 trap(int vector, struct trapframe *tf)
361 int error, sig, user;
364 user = TRAPF_USERMODE(tf) ? 1 : 0;
366 PCPU_INC(cnt.v_trap);
373 ia64_set_fpsr(IA64_FPSR_DEFAULT);
376 if (td->td_ucred != p->p_ucred)
377 cred_update_thread(td);
379 KASSERT(cold || td->td_ucred != NULL,
380 ("kernel trap doesn't have ucred"));
391 * This one is tricky. We should hardwire the VHPT, but
392 * don't at this time. I think we're mostly lucky that
393 * the VHPT is mapped.
395 trap_panic(vector, tf);
400 case IA64_VEC_EXT_INTR:
401 /* We never call trap() with these vectors. */
402 trap_panic(vector, tf);
405 case IA64_VEC_ALT_ITLB:
406 case IA64_VEC_ALT_DTLB:
408 * These should never happen, because regions 0-4 use the
409 * VHPT. If we get one of these it means we didn't program
410 * the region registers correctly.
412 trap_panic(vector, tf);
415 case IA64_VEC_NESTED_DTLB:
417 * We never call trap() with this vector. We may want to
418 * do that in the future in case the nested TLB handler
419 * could not find the translation it needs. In that case
420 * we could switch to a special (hardwired) stack and
421 * come here to produce a nice panic().
423 trap_panic(vector, tf);
426 case IA64_VEC_IKEY_MISS:
427 case IA64_VEC_DKEY_MISS:
428 case IA64_VEC_KEY_PERMISSION:
430 * We don't use protection keys, so we should never get
433 trap_panic(vector, tf);
436 case IA64_VEC_DIRTY_BIT:
437 case IA64_VEC_INST_ACCESS:
438 case IA64_VEC_DATA_ACCESS:
440 * We get here if we read or write to a page of which the
441 * PTE does not have the access bit or dirty bit set and
442 * we can not find the PTE in our datastructures. This
443 * either means we have a stale PTE in the TLB, or we lost
444 * the PTE in our datastructures.
446 trap_panic(vector, tf);
451 ucode = (int)tf->tf_special.ifa & 0x1FFFFF;
454 * A break.b doesn't cause the immediate to be
455 * stored in cr.iim (and saved in the TF in
456 * tf_special.ifa). We need to decode the
457 * instruction to find out what the immediate
458 * was. Note that if the break instruction
459 * didn't happen to be a break.b, but any
460 * other break with an immediate of 0, we
461 * will do unnecessary work to get the value
462 * we already had. Not an issue, because a
463 * break 0 is invalid.
465 ucode = trap_decode_break(tf);
467 if (ucode < 0x80000) {
468 /* Software interrupts. */
470 case 0: /* Unknown error. */
473 case 1: /* Integer divide by zero. */
477 case 2: /* Integer overflow. */
481 case 3: /* Range check/bounds check. */
485 case 6: /* Decimal overflow. */
486 case 7: /* Decimal divide by zero. */
487 case 8: /* Packed decimal error. */
488 case 9: /* Invalid ASCII digit. */
489 case 10: /* Invalid decimal digit. */
493 case 4: /* Null pointer dereference. */
494 case 5: /* Misaligned data. */
495 case 11: /* Paragraph stack overflow. */
502 } else if (ucode < 0x100000) {
503 /* Debugger breakpoint. */
504 tf->tf_special.psr &= ~IA64_PSR_SS;
506 } else if (ucode == 0x100000) {
508 return; /* do_ast() already called. */
509 } else if (ucode == 0x180000) {
512 error = copyin((void*)tf->tf_scratch.gr8,
515 set_mcontext(td, &mc);
516 return; /* Don't call do_ast()!!! */
519 ucode = tf->tf_scratch.gr8;
524 if (kdb_trap(vector, 0, tf))
528 trap_panic(vector, tf);
533 case IA64_VEC_PAGE_NOT_PRESENT:
534 case IA64_VEC_INST_ACCESS_RIGHTS:
535 case IA64_VEC_DATA_ACCESS_RIGHTS: {
543 va = trunc_page(tf->tf_special.ifa);
545 if (va >= VM_MAX_ADDRESS) {
547 * Don't allow user-mode faults for kernel virtual
548 * addresses, including the gateway page.
554 vm = (p != NULL) ? p->p_vmspace : NULL;
560 if (tf->tf_special.isr & IA64_ISR_X)
561 ftype = VM_PROT_EXECUTE;
562 else if (tf->tf_special.isr & IA64_ISR_W)
563 ftype = VM_PROT_WRITE;
565 ftype = VM_PROT_READ;
567 if (map != kernel_map) {
569 * Keep swapout from messing with us during this
576 /* Fault in the user page: */
577 rv = vm_fault(map, va, ftype, (ftype & VM_PROT_WRITE)
578 ? VM_FAULT_DIRTY : VM_FAULT_NORMAL);
585 * Don't have to worry about process locking or
586 * stacks in the kernel.
588 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
591 if (rv == KERN_SUCCESS)
596 /* Check for copyin/copyout fault. */
597 if (td != NULL && td->td_pcb->pcb_onfault != 0) {
599 td->td_pcb->pcb_onfault;
600 tf->tf_special.psr &= ~IA64_PSR_RI;
601 td->td_pcb->pcb_onfault = 0;
604 trap_panic(vector, tf);
607 sig = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
611 case IA64_VEC_GENERAL_EXCEPTION: {
615 trap_panic(vector, tf);
617 code = tf->tf_special.isr & (IA64_ISR_CODE & 0xf0ull);
619 case 0x0: /* Illegal Operation Fault. */
620 sig = ia64_emulate(tf, td);
632 case IA64_VEC_SPECULATION:
634 * The branching behaviour of the chk instruction is not
635 * implemented by the processor. All we need to do is
636 * compute the target address of the branch and make sure
637 * that control is transfered to that address.
638 * We should do this in the IVT table and not by entring
641 tf->tf_special.iip += tf->tf_special.ifa << 4;
642 tf->tf_special.psr &= ~IA64_PSR_RI;
645 case IA64_VEC_NAT_CONSUMPTION:
646 case IA64_VEC_UNSUPP_DATA_REFERENCE:
651 trap_panic(vector, tf);
654 case IA64_VEC_DISABLED_FP: {
659 /* Always fatal in kernel. Should never happen. */
661 trap_panic(vector, tf);
664 thr = PCPU_GET(fpcurthread);
667 * Short-circuit handling the trap when this CPU
668 * already holds the high FP registers for this
669 * thread. We really shouldn't get the trap in the
670 * first place, but since it's only a performance
671 * issue and not a correctness issue, we emit a
672 * message for now, enable the high FP registers and
675 printf("XXX: bogusly disabled high FP regs\n");
676 tf->tf_special.psr &= ~IA64_PSR_DFH;
679 } else if (thr != NULL) {
680 mtx_lock_spin(&thr->td_md.md_highfp_mtx);
682 save_high_fp(&pcb->pcb_high_fp);
683 pcb->pcb_fpcpu = NULL;
684 PCPU_SET(fpcurthread, NULL);
685 mtx_unlock_spin(&thr->td_md.md_highfp_mtx);
689 mtx_lock_spin(&td->td_md.md_highfp_mtx);
691 pcpu = pcb->pcb_fpcpu;
695 mtx_unlock_spin(&td->td_md.md_highfp_mtx);
696 ipi_send(pcpu, IPI_HIGH_FP);
697 while (pcb->pcb_fpcpu == pcpu)
699 mtx_lock_spin(&td->td_md.md_highfp_mtx);
700 pcpu = pcb->pcb_fpcpu;
701 thr = PCPU_GET(fpcurthread);
705 if (thr == NULL && pcpu == NULL) {
706 restore_high_fp(&pcb->pcb_high_fp);
707 PCPU_SET(fpcurthread, td);
708 pcb->pcb_fpcpu = pcpup;
709 tf->tf_special.psr &= ~IA64_PSR_MFH;
710 tf->tf_special.psr &= ~IA64_PSR_DFH;
713 mtx_unlock_spin(&td->td_md.md_highfp_mtx);
719 case IA64_VEC_SINGLE_STEP_TRAP:
720 tf->tf_special.psr &= ~IA64_PSR_SS;
723 if (kdb_trap(vector, 0, tf))
727 trap_panic(vector, tf);
733 case IA64_VEC_UNALIGNED_REFERENCE:
735 * If user-land, do whatever fixups, printing, and
736 * signalling is appropriate (based on system-wide
737 * and per-process unaligned-access-handling flags).
740 sig = unaligned_fixup(tf, td);
743 ucode = tf->tf_special.ifa; /* VA */
745 /* Check for copyin/copyout fault. */
746 if (td != NULL && td->td_pcb->pcb_onfault != 0) {
748 td->td_pcb->pcb_onfault;
749 tf->tf_special.psr &= ~IA64_PSR_RI;
750 td->td_pcb->pcb_onfault = 0;
753 trap_panic(vector, tf);
757 case IA64_VEC_FLOATING_POINT_FAULT:
758 case IA64_VEC_FLOATING_POINT_TRAP: {
759 struct fpswa_bundle bundle;
760 struct fpswa_fpctx fpctx;
761 struct fpswa_ret ret;
765 /* Always fatal in kernel. Should never happen. */
767 trap_panic(vector, tf);
769 if (fpswa_iface == NULL) {
775 ip = (char *)tf->tf_special.iip;
776 if (vector == IA64_VEC_FLOATING_POINT_TRAP &&
777 (tf->tf_special.psr & IA64_PSR_RI) == 0)
779 error = copyin(ip, &bundle, sizeof(bundle));
781 sig = SIGBUS; /* EFAULT, basically */
782 ucode = 0; /* exception summary */
786 /* f6-f15 are saved in exception_save */
787 fpctx.mask_low = 0xffc0; /* bits 6 - 15 */
789 fpctx.fp_low_preserved = NULL;
790 fpctx.fp_low_volatile = &tf->tf_scratch_fp.fr6;
791 fpctx.fp_high_preserved = NULL;
792 fpctx.fp_high_volatile = NULL;
794 fault = (vector == IA64_VEC_FLOATING_POINT_FAULT) ? 1 : 0;
797 * We have the high FP registers disabled while in the
798 * kernel. Enable them for the FPSWA handler only.
800 ia64_enable_highfp();
802 /* The docs are unclear. Is Fpswa reentrant? */
803 ret = fpswa_iface->if_fpswa(fault, &bundle,
804 &tf->tf_special.psr, &tf->tf_special.fpsr,
805 &tf->tf_special.isr, &tf->tf_special.pr,
806 &tf->tf_special.cfm, &fpctx);
808 ia64_disable_highfp();
811 * Update ipsr and iip to next instruction. We only
812 * have to do that for faults.
814 if (fault && (ret.status == 0 || (ret.status & 2))) {
817 ei = (tf->tf_special.isr >> 41) & 0x03;
818 if (ei == 0) { /* no template for this case */
819 tf->tf_special.psr &= ~IA64_ISR_EI;
820 tf->tf_special.psr |= IA64_ISR_EI_1;
821 } else if (ei == 1) { /* MFI or MFB */
822 tf->tf_special.psr &= ~IA64_ISR_EI;
823 tf->tf_special.psr |= IA64_ISR_EI_2;
824 } else if (ei == 2) { /* MMF */
825 tf->tf_special.psr &= ~IA64_ISR_EI;
826 tf->tf_special.iip += 0x10;
830 if (ret.status == 0) {
832 } else if (ret.status == -1) {
833 printf("FATAL: FPSWA err1 %lx, err2 %lx, err3 %lx\n",
834 ret.err1, ret.err2, ret.err3);
835 panic("fpswa fatal error on fp fault");
838 ucode = 0; /* XXX exception summary */
843 case IA64_VEC_LOWER_PRIVILEGE_TRANSFER:
845 * The lower-privilege transfer trap is used by the EPC
846 * syscall code to trigger re-entry into the kernel when the
847 * process should be single stepped. The problem is that
848 * there's no way to set single stepping directly without
849 * using the rfi instruction. So instead we enable the
850 * lower-privilege transfer trap and when we get here we
851 * know that the process is about to enter userland (and
852 * has already lowered its privilege).
853 * However, there's another gotcha. When the process has
854 * lowered it's privilege it's still running in the gateway
855 * page. If we enable single stepping, we'll be stepping
856 * the code in the gateway page. In and by itself this is
857 * not a problem, but it's an address debuggers won't know
858 * anything about. Hence, it can only cause confusion.
859 * We know that we need to branch to get out of the gateway
860 * page, so what we do here is enable the taken branch
861 * trap and just let the process continue. When we branch
862 * out of the gateway page we'll get back into the kernel
863 * and then we enable single stepping.
864 * Since this a rather round-about way of enabling single
865 * stepping, don't make things complicated even more by
866 * calling userret() and do_ast(). We do that later...
868 tf->tf_special.psr &= ~IA64_PSR_LP;
869 tf->tf_special.psr |= IA64_PSR_TB;
872 case IA64_VEC_TAKEN_BRANCH_TRAP:
874 * Don't assume there aren't any branches other than the
875 * branch that takes us out of the gateway page. Check the
876 * iip and raise SIGTRAP only when it's an user address.
878 if (tf->tf_special.iip >= VM_MAX_ADDRESS)
880 tf->tf_special.psr &= ~IA64_PSR_TB;
884 case IA64_VEC_IA32_EXCEPTION:
885 case IA64_VEC_IA32_INTERCEPT:
886 case IA64_VEC_IA32_INTERRUPT:
888 ucode = tf->tf_special.iip;
892 /* Reserved vectors get here. Should never happen of course. */
893 trap_panic(vector, tf);
897 KASSERT(sig != 0, ("foo"));
900 printtrap(vector, tf, 1, user);
904 ksi.ksi_code = ucode;
905 trapsignal(td, &ksi);
910 mtx_assert(&Giant, MA_NOTOWNED);
917 * Handle break instruction based system calls.
920 break_syscall(struct trapframe *tf)
926 /* Save address of break instruction. */
927 iip = tf->tf_special.iip;
928 psr = tf->tf_special.psr;
930 /* Advance to the next instruction. */
931 tf->tf_special.psr += IA64_PSR_RI_1;
932 if ((tf->tf_special.psr & IA64_PSR_RI) > IA64_PSR_RI_2) {
933 tf->tf_special.iip += 16;
934 tf->tf_special.psr &= ~IA64_PSR_RI;
938 * Copy the arguments on the register stack into the trapframe
939 * to avoid having interleaved NaT collections.
941 tfp = &tf->tf_scratch.gr16;
942 nargs = tf->tf_special.cfm & 0x7f;
943 bsp = (uint64_t*)(curthread->td_kstack + tf->tf_special.ndirty +
944 (tf->tf_special.bspstore & 0x1ffUL));
945 bsp -= (((uintptr_t)bsp & 0x1ff) < (nargs << 3)) ? (nargs + 1): nargs;
948 if (((uintptr_t)bsp & 0x1ff) == 0x1f8)
952 if (error == ERESTART) {
953 tf->tf_special.iip = iip;
954 tf->tf_special.psr = psr;
961 * Process a system call.
963 * See syscall.s for details as to how we get here. In order to support
964 * the ERESTART case, we return the error to our caller. They deal with
968 syscall(struct trapframe *tf)
970 struct sysent *callp;
976 ia64_set_fpsr(IA64_FPSR_DEFAULT);
978 code = tf->tf_scratch.gr15;
979 args = &tf->tf_scratch.gr16;
981 PCPU_INC(cnt.v_syscall);
988 if (td->td_ucred != p->p_ucred)
989 cred_update_thread(td);
991 if (p->p_flag & P_SA)
992 thread_user_enter(td);
995 if (p->p_sysent->sv_prepsyscall) {
996 /* (*p->p_sysent->sv_prepsyscall)(tf, args, &code, ¶ms); */
997 panic("prepsyscall");
1000 * syscall() and __syscall() are handled the same on
1001 * the ia64, as everything is 64-bit aligned, anyway.
1003 if (code == SYS_syscall || code == SYS___syscall) {
1005 * Code is first argument, followed by actual args.
1012 if (p->p_sysent->sv_mask)
1013 code &= p->p_sysent->sv_mask;
1015 if (code >= p->p_sysent->sv_size)
1016 callp = &p->p_sysent->sv_table[0];
1018 callp = &p->p_sysent->sv_table[code];
1021 if (KTRPOINT(td, KTR_SYSCALL))
1022 ktrsyscall(code, callp->sy_narg, args);
1024 CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
1025 td->td_proc->p_pid, td->td_proc->p_comm, code);
1027 td->td_retval[0] = 0;
1028 td->td_retval[1] = 0;
1029 tf->tf_scratch.gr10 = EJUSTRETURN;
1031 STOPEVENT(p, S_SCE, callp->sy_narg);
1033 PTRACESTOP_SC(p, td, S_PT_SCE);
1035 AUDIT_SYSCALL_ENTER(code, td);
1036 error = (*callp->sy_call)(td, args);
1037 AUDIT_SYSCALL_EXIT(error, td);
1039 if (error != EJUSTRETURN) {
1041 * Save the "raw" error code in r10. We use this to handle
1042 * syscall restarts (see do_ast()).
1044 tf->tf_scratch.gr10 = error;
1046 tf->tf_scratch.gr8 = td->td_retval[0];
1047 tf->tf_scratch.gr9 = td->td_retval[1];
1048 } else if (error != ERESTART) {
1049 if (error < p->p_sysent->sv_errsize)
1050 error = p->p_sysent->sv_errtbl[error];
1052 * Translated error codes are returned in r8. User
1053 * processes use the translated error code.
1055 tf->tf_scratch.gr8 = error;
1062 * Check for misbehavior.
1064 WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
1065 (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
1066 KASSERT(td->td_critnest == 0,
1067 ("System call %s returning in a critical section",
1068 (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???"));
1069 KASSERT(td->td_locks == 0,
1070 ("System call %s returning with %d locks held",
1071 (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???",
1075 * Handle reschedule and other end-of-syscall issues
1079 CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td,
1080 td->td_proc->p_pid, td->td_proc->p_comm, code);
1082 if (KTRPOINT(td, KTR_SYSRET))
1083 ktrsysret(code, error, td->td_retval[0]);
1087 * This works because errno is findable through the
1088 * register set. If we ever support an emulation where this
1089 * is not the case, this code will need to be revisited.
1091 STOPEVENT(p, S_SCX, code);
1093 PTRACESTOP_SC(p, td, S_PT_SCX);