]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/ia64/ia64/trap.c
- Add the global 'rebooting' variable that is used to detect when
[FreeBSD/FreeBSD.git] / sys / ia64 / ia64 / trap.c
1 /*-
2  * Copyright (c) 2005 Marcel Moolenaar
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include "opt_ddb.h"
31 #include "opt_ktrace.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kdb.h>
36 #include <sys/ktr.h>
37 #include <sys/sysproto.h>
38 #include <sys/kernel.h>
39 #include <sys/proc.h>
40 #include <sys/exec.h>
41 #include <sys/lock.h>
42 #include <sys/mutex.h>
43 #include <sys/sched.h>
44 #include <sys/smp.h>
45 #include <sys/vmmeter.h>
46 #include <sys/sysent.h>
47 #include <sys/signalvar.h>
48 #include <sys/syscall.h>
49 #include <sys/pioctl.h>
50 #include <sys/ptrace.h>
51 #include <sys/sysctl.h>
52 #include <vm/vm.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57 #include <vm/vm_param.h>
58 #include <sys/ptrace.h>
59 #include <machine/clock.h>
60 #include <machine/cpu.h>
61 #include <machine/md_var.h>
62 #include <machine/reg.h>
63 #include <machine/pal.h>
64 #include <machine/fpu.h>
65 #include <machine/efi.h>
66 #include <machine/pcb.h>
67 #ifdef SMP
68 #include <machine/smp.h>
69 #endif
70
71 #ifdef KTRACE
72 #include <sys/uio.h>
73 #include <sys/ktrace.h>
74 #endif
75
76 #include <ia64/disasm/disasm.h>
77
78 static int print_usertrap = 0;
79 SYSCTL_INT(_machdep, OID_AUTO, print_usertrap,
80     CTLFLAG_RW, &print_usertrap, 0, "");
81
82 static void break_syscall(struct trapframe *tf);
83
84 /*
85  * EFI-Provided FPSWA interface (Floating Point SoftWare Assist)
86  */
87 extern struct fpswa_iface *fpswa_iface;
88
89 #ifdef WITNESS
90 extern char *syscallnames[];
91 #endif
92
93 static const char *ia64_vector_names[] = {
94         "VHPT Translation",                     /* 0 */
95         "Instruction TLB",                      /* 1 */
96         "Data TLB",                             /* 2 */
97         "Alternate Instruction TLB",            /* 3 */
98         "Alternate Data TLB",                   /* 4 */
99         "Data Nested TLB",                      /* 5 */
100         "Instruction Key Miss",                 /* 6 */
101         "Data Key Miss",                        /* 7 */
102         "Dirty-Bit",                            /* 8 */
103         "Instruction Access-Bit",               /* 9 */
104         "Data Access-Bit",                      /* 10 */
105         "Break Instruction",                    /* 11 */
106         "External Interrupt",                   /* 12 */
107         "Reserved 13",                          /* 13 */
108         "Reserved 14",                          /* 14 */
109         "Reserved 15",                          /* 15 */
110         "Reserved 16",                          /* 16 */
111         "Reserved 17",                          /* 17 */
112         "Reserved 18",                          /* 18 */
113         "Reserved 19",                          /* 19 */
114         "Page Not Present",                     /* 20 */
115         "Key Permission",                       /* 21 */
116         "Instruction Access Rights",            /* 22 */
117         "Data Access Rights",                   /* 23 */
118         "General Exception",                    /* 24 */
119         "Disabled FP-Register",                 /* 25 */
120         "NaT Consumption",                      /* 26 */
121         "Speculation",                          /* 27 */
122         "Reserved 28",                          /* 28 */
123         "Debug",                                /* 29 */
124         "Unaligned Reference",                  /* 30 */
125         "Unsupported Data Reference",           /* 31 */
126         "Floating-point Fault",                 /* 32 */
127         "Floating-point Trap",                  /* 33 */
128         "Lower-Privilege Transfer Trap",        /* 34 */
129         "Taken Branch Trap",                    /* 35 */
130         "Single Step Trap",                     /* 36 */
131         "Reserved 37",                          /* 37 */
132         "Reserved 38",                          /* 38 */
133         "Reserved 39",                          /* 39 */
134         "Reserved 40",                          /* 40 */
135         "Reserved 41",                          /* 41 */
136         "Reserved 42",                          /* 42 */
137         "Reserved 43",                          /* 43 */
138         "Reserved 44",                          /* 44 */
139         "IA-32 Exception",                      /* 45 */
140         "IA-32 Intercept",                      /* 46 */
141         "IA-32 Interrupt",                      /* 47 */
142         "Reserved 48",                          /* 48 */
143         "Reserved 49",                          /* 49 */
144         "Reserved 50",                          /* 50 */
145         "Reserved 51",                          /* 51 */
146         "Reserved 52",                          /* 52 */
147         "Reserved 53",                          /* 53 */
148         "Reserved 54",                          /* 54 */
149         "Reserved 55",                          /* 55 */
150         "Reserved 56",                          /* 56 */
151         "Reserved 57",                          /* 57 */
152         "Reserved 58",                          /* 58 */
153         "Reserved 59",                          /* 59 */
154         "Reserved 60",                          /* 60 */
155         "Reserved 61",                          /* 61 */
156         "Reserved 62",                          /* 62 */
157         "Reserved 63",                          /* 63 */
158         "Reserved 64",                          /* 64 */
159         "Reserved 65",                          /* 65 */
160         "Reserved 66",                          /* 66 */
161         "Reserved 67",                          /* 67 */
162 };
163
164 struct bitname {
165         uint64_t mask;
166         const char* name;
167 };
168
169 static void
170 printbits(uint64_t mask, struct bitname *bn, int count)
171 {
172         int i, first = 1;
173         uint64_t bit;
174
175         for (i = 0; i < count; i++) {
176                 /*
177                  * Handle fields wider than one bit.
178                  */
179                 bit = bn[i].mask & ~(bn[i].mask - 1);
180                 if (bn[i].mask > bit) {
181                         if (first)
182                                 first = 0;
183                         else
184                                 printf(",");
185                         printf("%s=%ld", bn[i].name,
186                                (mask & bn[i].mask) / bit);
187                 } else if (mask & bit) {
188                         if (first)
189                                 first = 0;
190                         else
191                                 printf(",");
192                         printf("%s", bn[i].name);
193                 }
194         }
195 }
196
197 struct bitname psr_bits[] = {
198         {IA64_PSR_BE,   "be"},
199         {IA64_PSR_UP,   "up"},
200         {IA64_PSR_AC,   "ac"},
201         {IA64_PSR_MFL,  "mfl"},
202         {IA64_PSR_MFH,  "mfh"},
203         {IA64_PSR_IC,   "ic"},
204         {IA64_PSR_I,    "i"},
205         {IA64_PSR_PK,   "pk"},
206         {IA64_PSR_DT,   "dt"},
207         {IA64_PSR_DFL,  "dfl"},
208         {IA64_PSR_DFH,  "dfh"},
209         {IA64_PSR_SP,   "sp"},
210         {IA64_PSR_PP,   "pp"},
211         {IA64_PSR_DI,   "di"},
212         {IA64_PSR_SI,   "si"},
213         {IA64_PSR_DB,   "db"},
214         {IA64_PSR_LP,   "lp"},
215         {IA64_PSR_TB,   "tb"},
216         {IA64_PSR_RT,   "rt"},
217         {IA64_PSR_CPL,  "cpl"},
218         {IA64_PSR_IS,   "is"},
219         {IA64_PSR_MC,   "mc"},
220         {IA64_PSR_IT,   "it"},
221         {IA64_PSR_ID,   "id"},
222         {IA64_PSR_DA,   "da"},
223         {IA64_PSR_DD,   "dd"},
224         {IA64_PSR_SS,   "ss"},
225         {IA64_PSR_RI,   "ri"},
226         {IA64_PSR_ED,   "ed"},
227         {IA64_PSR_BN,   "bn"},
228         {IA64_PSR_IA,   "ia"},
229 };
230
231 static void
232 printpsr(uint64_t psr)
233 {
234         printbits(psr, psr_bits, sizeof(psr_bits)/sizeof(psr_bits[0]));
235 }
236
237 struct bitname isr_bits[] = {
238         {IA64_ISR_CODE, "code"},
239         {IA64_ISR_VECTOR, "vector"},
240         {IA64_ISR_X,    "x"},
241         {IA64_ISR_W,    "w"},
242         {IA64_ISR_R,    "r"},
243         {IA64_ISR_NA,   "na"},
244         {IA64_ISR_SP,   "sp"},
245         {IA64_ISR_RS,   "rs"},
246         {IA64_ISR_IR,   "ir"},
247         {IA64_ISR_NI,   "ni"},
248         {IA64_ISR_SO,   "so"},
249         {IA64_ISR_EI,   "ei"},
250         {IA64_ISR_ED,   "ed"},
251 };
252
253 static void printisr(uint64_t isr)
254 {
255         printbits(isr, isr_bits, sizeof(isr_bits)/sizeof(isr_bits[0]));
256 }
257
258 static void
259 printtrap(int vector, struct trapframe *tf, int isfatal, int user)
260 {
261         printf("\n");
262         printf("%s %s trap (cpu %d):\n", isfatal? "fatal" : "handled",
263                user ? "user" : "kernel", PCPU_GET(cpuid));
264         printf("\n");
265         printf("    trap vector = 0x%x (%s)\n",
266                vector, ia64_vector_names[vector]);
267         printf("    cr.iip      = 0x%lx\n", tf->tf_special.iip);
268         printf("    cr.ipsr     = 0x%lx (", tf->tf_special.psr);
269         printpsr(tf->tf_special.psr);
270         printf(")\n");
271         printf("    cr.isr      = 0x%lx (", tf->tf_special.isr);
272         printisr(tf->tf_special.isr);
273         printf(")\n");
274         printf("    cr.ifa      = 0x%lx\n", tf->tf_special.ifa);
275         if (tf->tf_special.psr & IA64_PSR_IS) {
276                 printf("    ar.cflg     = 0x%lx\n", ia64_get_cflg());
277                 printf("    ar.csd      = 0x%lx\n", ia64_get_csd());
278                 printf("    ar.ssd      = 0x%lx\n", ia64_get_ssd());
279         }
280         printf("    curthread   = %p\n", curthread);
281         if (curthread != NULL)
282                 printf("        pid = %d, comm = %s\n",
283                        curthread->td_proc->p_pid, curthread->td_proc->p_comm);
284         printf("\n");
285 }
286
287 /*
288  * We got a trap caused by a break instruction and the immediate was 0.
289  * This indicates that we may have a break.b with some non-zero immediate.
290  * The break.b doesn't cause the immediate to be put in cr.iim.  Hence,
291  * we need to disassemble the bundle and return the immediate found there.
292  * This may be a 0 value anyway.  Return 0 for any error condition.  This
293  * will result in a SIGILL, which is pretty much the best thing to do.
294  */
295 static uint64_t
296 trap_decode_break(struct trapframe *tf)
297 {
298         struct asm_bundle bundle;
299         struct asm_inst *inst;
300         int slot;
301
302         if (!asm_decode(tf->tf_special.iip, &bundle))
303                 return (0);
304
305         slot = ((tf->tf_special.psr & IA64_PSR_RI) == IA64_PSR_RI_0) ? 0 :
306             ((tf->tf_special.psr & IA64_PSR_RI) == IA64_PSR_RI_1) ? 1 : 2;
307         inst = bundle.b_inst + slot;
308
309         /*
310          * Sanity checking: It must be a break instruction and the operand
311          * that has the break value must be an immediate.
312          */
313         if (inst->i_op != ASM_OP_BREAK ||
314             inst->i_oper[1].o_type != ASM_OPER_IMM)
315                 return (0);
316
317         return (inst->i_oper[1].o_value);
318 }
319
320 void
321 trap_panic(int vector, struct trapframe *tf)
322 {
323
324         printtrap(vector, tf, 1, TRAPF_USERMODE(tf));
325 #ifdef KDB
326         kdb_trap(vector, 0, tf);
327 #endif
328         panic("trap");
329 }
330
331 /*
332  *
333  */
334 int
335 do_ast(struct trapframe *tf)
336 {
337
338         disable_intr();
339         while (curthread->td_flags & (TDF_ASTPENDING|TDF_NEEDRESCHED)) {
340                 enable_intr();
341                 ast(tf);
342                 disable_intr();
343         }
344         /*
345          * Keep interrupts disabled. We return r10 as a favor to the EPC
346          * syscall code so that it can quicky determine if the syscall
347          * needs to be restarted or not.
348          */
349         return (tf->tf_scratch.gr10);
350 }
351
352 /*
353  * Trap is called from exception.s to handle most types of processor traps.
354  */
355 /*ARGSUSED*/
356 void
357 trap(int vector, struct trapframe *tf)
358 {
359         struct proc *p;
360         struct thread *td;
361         uint64_t ucode;
362         int error, sig, user;
363         u_int sticks;
364         ksiginfo_t ksi;
365
366         user = TRAPF_USERMODE(tf) ? 1 : 0;
367
368         PCPU_LAZY_INC(cnt.v_trap);
369
370         td = curthread;
371         p = td->td_proc;
372         ucode = 0;
373
374         if (user) {
375                 ia64_set_fpsr(IA64_FPSR_DEFAULT);
376                 sticks = td->td_sticks;
377                 td->td_frame = tf;
378                 if (td->td_ucred != p->p_ucred)
379                         cred_update_thread(td);
380         } else {
381                 sticks = 0;             /* XXX bogus -Wuninitialized warning */
382                 KASSERT(cold || td->td_ucred != NULL,
383                     ("kernel trap doesn't have ucred"));
384 #ifdef KDB
385                 if (kdb_active)
386                         kdb_reenter();
387 #endif
388         }
389
390         sig = 0;
391         switch (vector) {
392         case IA64_VEC_VHPT:
393                 /*
394                  * This one is tricky. We should hardwire the VHPT, but
395                  * don't at this time. I think we're mostly lucky that
396                  * the VHPT is mapped.
397                  */
398                 trap_panic(vector, tf);
399                 break;
400
401         case IA64_VEC_ITLB:
402         case IA64_VEC_DTLB:
403         case IA64_VEC_EXT_INTR:
404                 /* We never call trap() with these vectors. */
405                 trap_panic(vector, tf);
406                 break;
407
408         case IA64_VEC_ALT_ITLB:
409         case IA64_VEC_ALT_DTLB:
410                 /*
411                  * These should never happen, because regions 0-4 use the
412                  * VHPT. If we get one of these it means we didn't program
413                  * the region registers correctly.
414                  */
415                 trap_panic(vector, tf);
416                 break;
417
418         case IA64_VEC_NESTED_DTLB:
419                 /*
420                  * We never call trap() with this vector. We may want to
421                  * do that in the future in case the nested TLB handler
422                  * could not find the translation it needs. In that case
423                  * we could switch to a special (hardwired) stack and
424                  * come here to produce a nice panic().
425                  */
426                 trap_panic(vector, tf);
427                 break;
428
429         case IA64_VEC_IKEY_MISS:
430         case IA64_VEC_DKEY_MISS:
431         case IA64_VEC_KEY_PERMISSION:
432                 /*
433                  * We don't use protection keys, so we should never get
434                  * these faults.
435                  */
436                 trap_panic(vector, tf);
437                 break;
438
439         case IA64_VEC_DIRTY_BIT:
440         case IA64_VEC_INST_ACCESS:
441         case IA64_VEC_DATA_ACCESS:
442                 /*
443                  * We get here if we read or write to a page of which the
444                  * PTE does not have the access bit or dirty bit set and
445                  * we can not find the PTE in our datastructures. This
446                  * either means we have a stale PTE in the TLB, or we lost
447                  * the PTE in our datastructures.
448                  */
449                 trap_panic(vector, tf);
450                 break;
451
452         case IA64_VEC_BREAK:
453                 if (user) {
454                         ucode = (int)tf->tf_special.ifa & 0x1FFFFF;
455                         if (ucode == 0) {
456                                 /*
457                                  * A break.b doesn't cause the immediate to be
458                                  * stored in cr.iim (and saved in the TF in
459                                  * tf_special.ifa).  We need to decode the
460                                  * instruction to find out what the immediate
461                                  * was.  Note that if the break instruction
462                                  * didn't happen to be a break.b, but any
463                                  * other break with an immediate of 0, we
464                                  * will do unnecessary work to get the value
465                                  * we already had.  Not an issue, because a
466                                  * break 0 is invalid.
467                                  */
468                                 ucode = trap_decode_break(tf);
469                         }
470                         if (ucode < 0x80000) {
471                                 /* Software interrupts. */
472                                 switch (ucode) {
473                                 case 0:         /* Unknown error. */
474                                         sig = SIGILL;
475                                         break;
476                                 case 1:         /* Integer divide by zero. */
477                                         sig = SIGFPE;
478                                         ucode = FPE_INTDIV;
479                                         break;
480                                 case 2:         /* Integer overflow. */
481                                         sig = SIGFPE;
482                                         ucode = FPE_INTOVF;
483                                         break;
484                                 case 3:         /* Range check/bounds check. */
485                                         sig = SIGFPE;
486                                         ucode = FPE_FLTSUB;
487                                         break;
488                                 case 6:         /* Decimal overflow. */
489                                 case 7:         /* Decimal divide by zero. */
490                                 case 8:         /* Packed decimal error. */
491                                 case 9:         /* Invalid ASCII digit. */
492                                 case 10:        /* Invalid decimal digit. */
493                                         sig = SIGFPE;
494                                         ucode = FPE_FLTINV;
495                                         break;
496                                 case 4:         /* Null pointer dereference. */
497                                 case 5:         /* Misaligned data. */
498                                 case 11:        /* Paragraph stack overflow. */
499                                         sig = SIGSEGV;
500                                         break;
501                                 default:
502                                         sig = SIGILL;
503                                         break;
504                                 }
505                         } else if (ucode < 0x100000) {
506                                 /* Debugger breakpoint. */
507                                 tf->tf_special.psr &= ~IA64_PSR_SS;
508                                 sig = SIGTRAP;
509                         } else if (ucode == 0x100000) {
510                                 break_syscall(tf);
511                                 return;         /* do_ast() already called. */
512                         } else if (ucode == 0x180000) {
513                                 mcontext_t mc;
514
515                                 error = copyin((void*)tf->tf_scratch.gr8,
516                                     &mc, sizeof(mc));
517                                 if (!error) {
518                                         set_mcontext(td, &mc);
519                                         return; /* Don't call do_ast()!!! */
520                                 }
521                                 sig = SIGSEGV;
522                                 ucode = tf->tf_scratch.gr8;
523                         } else
524                                 sig = SIGILL;
525                 } else {
526 #ifdef KDB
527                         if (kdb_trap(vector, 0, tf))
528                                 return;
529                         panic("trap");
530 #else
531                         trap_panic(vector, tf);
532 #endif
533                 }
534                 break;
535
536         case IA64_VEC_PAGE_NOT_PRESENT:
537         case IA64_VEC_INST_ACCESS_RIGHTS:
538         case IA64_VEC_DATA_ACCESS_RIGHTS: {
539                 vm_offset_t va;
540                 struct vmspace *vm;
541                 vm_map_t map;
542                 vm_prot_t ftype;
543                 int rv;
544
545                 rv = 0;
546                 va = trunc_page(tf->tf_special.ifa);
547
548                 if (va >= VM_MAX_ADDRESS) {
549                         /*
550                          * Don't allow user-mode faults for kernel virtual
551                          * addresses, including the gateway page.
552                          */
553                         if (user)
554                                 goto no_fault_in;
555                         map = kernel_map;
556                 } else {
557                         vm = (p != NULL) ? p->p_vmspace : NULL;
558                         if (vm == NULL)
559                                 goto no_fault_in;
560                         map = &vm->vm_map;
561                 }
562
563                 if (tf->tf_special.isr & IA64_ISR_X)
564                         ftype = VM_PROT_EXECUTE;
565                 else if (tf->tf_special.isr & IA64_ISR_W)
566                         ftype = VM_PROT_WRITE;
567                 else
568                         ftype = VM_PROT_READ;
569
570                 if (map != kernel_map) {
571                         /*
572                          * Keep swapout from messing with us during this
573                          * critical time.
574                          */
575                         PROC_LOCK(p);
576                         ++p->p_lock;
577                         PROC_UNLOCK(p);
578
579                         /* Fault in the user page: */
580                         rv = vm_fault(map, va, ftype, (ftype & VM_PROT_WRITE)
581                             ? VM_FAULT_DIRTY : VM_FAULT_NORMAL);
582
583                         PROC_LOCK(p);
584                         --p->p_lock;
585                         PROC_UNLOCK(p);
586                 } else {
587                         /*
588                          * Don't have to worry about process locking or
589                          * stacks in the kernel.
590                          */
591                         rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
592                 }
593
594                 if (rv == KERN_SUCCESS)
595                         goto out;
596
597         no_fault_in:
598                 if (!user) {
599                         /* Check for copyin/copyout fault. */
600                         if (td != NULL && td->td_pcb->pcb_onfault != 0) {
601                                 tf->tf_special.iip =
602                                     td->td_pcb->pcb_onfault;
603                                 tf->tf_special.psr &= ~IA64_PSR_RI;
604                                 td->td_pcb->pcb_onfault = 0;
605                                 goto out;
606                         }
607                         trap_panic(vector, tf);
608                 }
609                 ucode = va;
610                 sig = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
611                 break;
612         }
613
614         case IA64_VEC_GENERAL_EXCEPTION:
615         case IA64_VEC_NAT_CONSUMPTION:
616         case IA64_VEC_SPECULATION:
617         case IA64_VEC_UNSUPP_DATA_REFERENCE:
618                 if (user) {
619                         ucode = vector;
620                         sig = SIGILL;
621                 } else
622                         trap_panic(vector, tf);
623                 break;
624
625         case IA64_VEC_DISABLED_FP: {
626                 struct pcpu *pcpu;
627                 struct pcb *pcb;
628                 struct thread *thr;
629
630                 /* Always fatal in kernel. Should never happen. */
631                 if (!user)
632                         trap_panic(vector, tf);
633
634                 sched_pin();
635                 thr = PCPU_GET(fpcurthread);
636                 if (thr == td) {
637                         /*
638                          * Short-circuit handling the trap when this CPU
639                          * already holds the high FP registers for this
640                          * thread.  We really shouldn't get the trap in the
641                          * first place, but since it's only a performance
642                          * issue and not a correctness issue, we emit a
643                          * message for now, enable the high FP registers and
644                          * return.
645                          */
646                         printf("XXX: bogusly disabled high FP regs\n");
647                         tf->tf_special.psr &= ~IA64_PSR_DFH;
648                         sched_unpin();
649                         goto out;
650                 } else if (thr != NULL) {
651                         mtx_lock_spin(&thr->td_md.md_highfp_mtx);
652                         pcb = thr->td_pcb;
653                         save_high_fp(&pcb->pcb_high_fp);
654                         pcb->pcb_fpcpu = NULL;
655                         PCPU_SET(fpcurthread, NULL);
656                         mtx_unlock_spin(&thr->td_md.md_highfp_mtx);
657                         thr = NULL;
658                 }
659
660                 mtx_lock_spin(&td->td_md.md_highfp_mtx);
661                 pcb = td->td_pcb;
662                 pcpu = pcb->pcb_fpcpu;
663
664 #ifdef SMP
665                 if (pcpu != NULL) {
666                         mtx_unlock_spin(&td->td_md.md_highfp_mtx);
667                         ipi_send(pcpu, IPI_HIGH_FP);
668                         while (pcb->pcb_fpcpu == pcpu)
669                                 DELAY(100);
670                         mtx_lock_spin(&td->td_md.md_highfp_mtx);
671                         pcpu = pcb->pcb_fpcpu;
672                         thr = PCPU_GET(fpcurthread);
673                 }
674 #endif
675
676                 if (thr == NULL && pcpu == NULL) {
677                         restore_high_fp(&pcb->pcb_high_fp);
678                         PCPU_SET(fpcurthread, td);
679                         pcb->pcb_fpcpu = pcpup;
680                         tf->tf_special.psr &= ~IA64_PSR_MFH;
681                         tf->tf_special.psr &= ~IA64_PSR_DFH;
682                 }
683
684                 mtx_unlock_spin(&td->td_md.md_highfp_mtx);
685                 sched_unpin();
686                 goto out;
687         }
688
689         case IA64_VEC_DEBUG:
690         case IA64_VEC_SINGLE_STEP_TRAP:
691                 tf->tf_special.psr &= ~IA64_PSR_SS;
692                 if (!user) {
693 #ifdef KDB
694                         if (kdb_trap(vector, 0, tf))
695                                 return;
696                         panic("trap");
697 #else
698                         trap_panic(vector, tf);
699 #endif
700                 }
701                 sig = SIGTRAP;
702                 break;
703
704         case IA64_VEC_UNALIGNED_REFERENCE:
705                 /*
706                  * If user-land, do whatever fixups, printing, and
707                  * signalling is appropriate (based on system-wide
708                  * and per-process unaligned-access-handling flags).
709                  */
710                 if (user) {
711                         sig = unaligned_fixup(tf, td);
712                         if (sig == 0)
713                                 goto out;
714                         ucode = tf->tf_special.ifa;     /* VA */
715                 } else {
716                         /* Check for copyin/copyout fault. */
717                         if (td != NULL && td->td_pcb->pcb_onfault != 0) {
718                                 tf->tf_special.iip =
719                                     td->td_pcb->pcb_onfault;
720                                 tf->tf_special.psr &= ~IA64_PSR_RI;
721                                 td->td_pcb->pcb_onfault = 0;
722                                 goto out;
723                         }
724                         trap_panic(vector, tf);
725                 }
726                 break;
727
728         case IA64_VEC_FLOATING_POINT_FAULT:
729         case IA64_VEC_FLOATING_POINT_TRAP: {
730                 struct fpswa_bundle bundle;
731                 struct fpswa_fpctx fpctx;
732                 struct fpswa_ret ret;
733                 char *ip;
734                 u_long fault;
735
736                 /* Always fatal in kernel. Should never happen. */
737                 if (!user)
738                         trap_panic(vector, tf);
739
740                 if (fpswa_iface == NULL) {
741                         sig = SIGFPE;
742                         ucode = 0;
743                         break;
744                 }
745
746                 ip = (char *)tf->tf_special.iip;
747                 if (vector == IA64_VEC_FLOATING_POINT_TRAP &&
748                     (tf->tf_special.psr & IA64_PSR_RI) == 0)
749                         ip -= 16;
750                 error = copyin(ip, &bundle, sizeof(bundle));
751                 if (error) {
752                         sig = SIGBUS;   /* EFAULT, basically */
753                         ucode = 0;      /* exception summary */
754                         break;
755                 }
756
757                 /* f6-f15 are saved in exception_save */
758                 fpctx.mask_low = 0xffc0;                /* bits 6 - 15 */
759                 fpctx.mask_high = 0;
760                 fpctx.fp_low_preserved = NULL;
761                 fpctx.fp_low_volatile = &tf->tf_scratch_fp.fr6;
762                 fpctx.fp_high_preserved = NULL;
763                 fpctx.fp_high_volatile = NULL;
764
765                 fault = (vector == IA64_VEC_FLOATING_POINT_FAULT) ? 1 : 0;
766
767                 /*
768                  * We have the high FP registers disabled while in the
769                  * kernel. Enable them for the FPSWA handler only.
770                  */
771                 ia64_enable_highfp();
772
773                 /* The docs are unclear.  Is Fpswa reentrant? */
774                 ret = fpswa_iface->if_fpswa(fault, &bundle,
775                     &tf->tf_special.psr, &tf->tf_special.fpsr,
776                     &tf->tf_special.isr, &tf->tf_special.pr,
777                     &tf->tf_special.cfm, &fpctx);
778
779                 ia64_disable_highfp();
780
781                 /*
782                  * Update ipsr and iip to next instruction. We only
783                  * have to do that for faults.
784                  */
785                 if (fault && (ret.status == 0 || (ret.status & 2))) {
786                         int ei;
787
788                         ei = (tf->tf_special.isr >> 41) & 0x03;
789                         if (ei == 0) {          /* no template for this case */
790                                 tf->tf_special.psr &= ~IA64_ISR_EI;
791                                 tf->tf_special.psr |= IA64_ISR_EI_1;
792                         } else if (ei == 1) {   /* MFI or MFB */
793                                 tf->tf_special.psr &= ~IA64_ISR_EI;
794                                 tf->tf_special.psr |= IA64_ISR_EI_2;
795                         } else if (ei == 2) {   /* MMF */
796                                 tf->tf_special.psr &= ~IA64_ISR_EI;
797                                 tf->tf_special.iip += 0x10;
798                         }
799                 }
800
801                 if (ret.status == 0) {
802                         goto out;
803                 } else if (ret.status == -1) {
804                         printf("FATAL: FPSWA err1 %lx, err2 %lx, err3 %lx\n",
805                             ret.err1, ret.err2, ret.err3);
806                         panic("fpswa fatal error on fp fault");
807                 } else {
808                         sig = SIGFPE;
809                         ucode = 0;              /* XXX exception summary */
810                         break;
811                 }
812         }
813
814         case IA64_VEC_LOWER_PRIVILEGE_TRANSFER:
815                 /*
816                  * The lower-privilege transfer trap is used by the EPC
817                  * syscall code to trigger re-entry into the kernel when the
818                  * process should be single stepped. The problem is that
819                  * there's no way to set single stepping directly without
820                  * using the rfi instruction. So instead we enable the
821                  * lower-privilege transfer trap and when we get here we
822                  * know that the process is about to enter userland (and
823                  * has already lowered its privilege).
824                  * However, there's another gotcha. When the process has
825                  * lowered it's privilege it's still running in the gateway
826                  * page. If we enable single stepping, we'll be stepping
827                  * the code in the gateway page. In and by itself this is
828                  * not a problem, but it's an address debuggers won't know
829                  * anything about. Hence, it can only cause confusion.
830                  * We know that we need to branch to get out of the gateway
831                  * page, so what we do here is enable the taken branch
832                  * trap and just let the process continue. When we branch
833                  * out of the gateway page we'll get back into the kernel
834                  * and then we enable single stepping.
835                  * Since this a rather round-about way of enabling single
836                  * stepping, don't make things complicated even more by
837                  * calling userret() and do_ast(). We do that later...
838                  */
839                 tf->tf_special.psr &= ~IA64_PSR_LP;
840                 tf->tf_special.psr |= IA64_PSR_TB;
841                 return;
842
843         case IA64_VEC_TAKEN_BRANCH_TRAP:
844                 /*
845                  * Don't assume there aren't any branches other than the
846                  * branch that takes us out of the gateway page. Check the
847                  * iip and raise SIGTRAP only when it's an user address.
848                  */
849                 if (tf->tf_special.iip >= VM_MAX_ADDRESS)
850                         return;
851                 tf->tf_special.psr &= ~IA64_PSR_TB;
852                 sig = SIGTRAP;
853                 break;
854
855         case IA64_VEC_IA32_EXCEPTION:
856         case IA64_VEC_IA32_INTERCEPT:
857         case IA64_VEC_IA32_INTERRUPT:
858                 sig = SIGEMT;
859                 ucode = tf->tf_special.iip;
860                 break;
861
862         default:
863                 /* Reserved vectors get here. Should never happen of course. */
864                 trap_panic(vector, tf);
865                 break;
866         }
867
868         KASSERT(sig != 0, ("foo"));
869
870         if (print_usertrap)
871                 printtrap(vector, tf, 1, user);
872
873         ksiginfo_init(&ksi);
874         ksi.ksi_signo = sig;
875         ksi.ksi_code = ucode;
876         trapsignal(td, &ksi);
877
878 out:
879         if (user) {
880                 userret(td, tf, sticks);
881                 mtx_assert(&Giant, MA_NOTOWNED);
882                 do_ast(tf);
883         }
884         return;
885 }
886
887 /*
888  * Handle break instruction based system calls.
889  */
890 void
891 break_syscall(struct trapframe *tf)
892 {
893         uint64_t *bsp, *tfp;
894         uint64_t iip, psr;
895         int error, nargs;
896
897         /* Save address of break instruction. */
898         iip = tf->tf_special.iip;
899         psr = tf->tf_special.psr;
900
901         /* Advance to the next instruction. */
902         tf->tf_special.psr += IA64_PSR_RI_1;
903         if ((tf->tf_special.psr & IA64_PSR_RI) > IA64_PSR_RI_2) {
904                 tf->tf_special.iip += 16;
905                 tf->tf_special.psr &= ~IA64_PSR_RI;
906         }
907
908         /*
909          * Copy the arguments on the register stack into the trapframe
910          * to avoid having interleaved NaT collections.
911          */
912         tfp = &tf->tf_scratch.gr16;
913         nargs = tf->tf_special.cfm & 0x7f;
914         bsp = (uint64_t*)(curthread->td_kstack + tf->tf_special.ndirty +
915             (tf->tf_special.bspstore & 0x1ffUL));
916         bsp -= (((uintptr_t)bsp & 0x1ff) < (nargs << 3)) ? (nargs + 1): nargs;
917         while (nargs--) {
918                 *tfp++ = *bsp++;
919                 if (((uintptr_t)bsp & 0x1ff) == 0x1f8)
920                         bsp++;
921         }
922         error = syscall(tf);
923         if (error == ERESTART) {
924                 tf->tf_special.iip = iip;
925                 tf->tf_special.psr = psr;
926         }
927
928         do_ast(tf);
929 }
930
931 /*
932  * Process a system call.
933  *
934  * See syscall.s for details as to how we get here. In order to support
935  * the ERESTART case, we return the error to our caller. They deal with
936  * the hairy details.
937  */
938 int
939 syscall(struct trapframe *tf)
940 {
941         struct sysent *callp;
942         struct proc *p;
943         struct thread *td;
944         uint64_t *args;
945         int code, error;
946         u_int sticks;
947
948         ia64_set_fpsr(IA64_FPSR_DEFAULT);
949
950         code = tf->tf_scratch.gr15;
951         args = &tf->tf_scratch.gr16;
952
953         PCPU_LAZY_INC(cnt.v_syscall);
954
955         td = curthread;
956         td->td_frame = tf;
957         p = td->td_proc;
958
959         sticks = td->td_sticks;
960         if (td->td_ucred != p->p_ucred)
961                 cred_update_thread(td);
962         if (p->p_flag & P_SA)
963                 thread_user_enter(td);
964
965         if (p->p_sysent->sv_prepsyscall) {
966                 /* (*p->p_sysent->sv_prepsyscall)(tf, args, &code, &params); */
967                 panic("prepsyscall");
968         } else {
969                 /*
970                  * syscall() and __syscall() are handled the same on
971                  * the ia64, as everything is 64-bit aligned, anyway.
972                  */
973                 if (code == SYS_syscall || code == SYS___syscall) {
974                         /*
975                          * Code is first argument, followed by actual args.
976                          */
977                         code = args[0];
978                         args++;
979                 }
980         }
981
982         if (p->p_sysent->sv_mask)
983                 code &= p->p_sysent->sv_mask;
984
985         if (code >= p->p_sysent->sv_size)
986                 callp = &p->p_sysent->sv_table[0];
987         else
988                 callp = &p->p_sysent->sv_table[code];
989
990 #ifdef KTRACE
991         if (KTRPOINT(td, KTR_SYSCALL))
992                 ktrsyscall(code, (callp->sy_narg & SYF_ARGMASK), args);
993 #endif
994
995         td->td_retval[0] = 0;
996         td->td_retval[1] = 0;
997         tf->tf_scratch.gr10 = EJUSTRETURN;
998
999         STOPEVENT(p, S_SCE, (callp->sy_narg & SYF_ARGMASK));
1000
1001         PTRACESTOP_SC(p, td, S_PT_SCE);
1002
1003         /*
1004          * Grab Giant if the syscall is not flagged as MP safe.
1005          */
1006         if ((callp->sy_narg & SYF_MPSAFE) == 0) {
1007                 mtx_lock(&Giant);
1008                 error = (*callp->sy_call)(td, args);
1009                 mtx_unlock(&Giant);
1010         } else
1011                 error = (*callp->sy_call)(td, args);
1012
1013         if (error != EJUSTRETURN) {
1014                 /*
1015                  * Save the "raw" error code in r10. We use this to handle
1016                  * syscall restarts (see do_ast()).
1017                  */
1018                 tf->tf_scratch.gr10 = error;
1019                 if (error == 0) {
1020                         tf->tf_scratch.gr8 = td->td_retval[0];
1021                         tf->tf_scratch.gr9 = td->td_retval[1];
1022                 } else if (error != ERESTART) {
1023                         if (error < p->p_sysent->sv_errsize)
1024                                 error = p->p_sysent->sv_errtbl[error];
1025                         /*
1026                          * Translated error codes are returned in r8. User
1027                          * processes use the translated error code.
1028                          */
1029                         tf->tf_scratch.gr8 = error;
1030                 }
1031         }
1032
1033         userret(td, tf, sticks);
1034
1035 #ifdef KTRACE
1036         if (KTRPOINT(td, KTR_SYSRET))
1037                 ktrsysret(code, error, td->td_retval[0]);
1038 #endif
1039
1040         /*
1041          * This works because errno is findable through the
1042          * register set.  If we ever support an emulation where this
1043          * is not the case, this code will need to be revisited.
1044          */
1045         STOPEVENT(p, S_SCX, code);
1046
1047         PTRACESTOP_SC(p, td, S_PT_SCX);
1048
1049         WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
1050             (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
1051         mtx_assert(&sched_lock, MA_NOTOWNED);
1052         mtx_assert(&Giant, MA_NOTOWNED);
1053
1054         return (error);
1055 }