]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/i386/i386/trap.c
Update compiler-rt to trunk r228651. This enables using Address
[FreeBSD/FreeBSD.git] / sys / i386 / i386 / trap.c
1 /*-
2  * Copyright (C) 1994, David Greenman
3  * Copyright (c) 1990, 1993
4  *      The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the University of Utah, and William Jolitz.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by the University of
20  *      California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *      from: @(#)trap.c        7.4 (Berkeley) 5/13/91
38  */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 /*
44  * 386 Trap and System call handling
45  */
46
47 #include "opt_clock.h"
48 #include "opt_cpu.h"
49 #include "opt_hwpmc_hooks.h"
50 #include "opt_isa.h"
51 #include "opt_kdb.h"
52 #include "opt_npx.h"
53 #include "opt_trap.h"
54
55 #include <sys/param.h>
56 #include <sys/bus.h>
57 #include <sys/systm.h>
58 #include <sys/proc.h>
59 #include <sys/pioctl.h>
60 #include <sys/ptrace.h>
61 #include <sys/kdb.h>
62 #include <sys/kernel.h>
63 #include <sys/ktr.h>
64 #include <sys/lock.h>
65 #include <sys/mutex.h>
66 #include <sys/resourcevar.h>
67 #include <sys/signalvar.h>
68 #include <sys/syscall.h>
69 #include <sys/sysctl.h>
70 #include <sys/sysent.h>
71 #include <sys/uio.h>
72 #include <sys/vmmeter.h>
73 #ifdef HWPMC_HOOKS
74 #include <sys/pmckern.h>
75 PMC_SOFT_DEFINE( , , page_fault, all);
76 PMC_SOFT_DEFINE( , , page_fault, read);
77 PMC_SOFT_DEFINE( , , page_fault, write);
78 #endif
79 #include <security/audit/audit.h>
80
81 #include <vm/vm.h>
82 #include <vm/vm_param.h>
83 #include <vm/pmap.h>
84 #include <vm/vm_kern.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_extern.h>
88
89 #include <machine/cpu.h>
90 #include <machine/intr_machdep.h>
91 #include <x86/mca.h>
92 #include <machine/md_var.h>
93 #include <machine/pcb.h>
94 #ifdef SMP
95 #include <machine/smp.h>
96 #endif
97 #include <machine/tss.h>
98 #include <machine/vm86.h>
99
100 #ifdef POWERFAIL_NMI
101 #include <sys/syslog.h>
102 #include <machine/clock.h>
103 #endif
104
105 #ifdef KDTRACE_HOOKS
106 #include <sys/dtrace_bsd.h>
107 #endif
108
109 extern void trap(struct trapframe *frame);
110 extern void syscall(struct trapframe *frame);
111
112 static int trap_pfault(struct trapframe *, int, vm_offset_t);
113 static void trap_fatal(struct trapframe *, vm_offset_t);
114 void dblfault_handler(void);
115
116 extern inthand_t IDTVEC(lcall_syscall);
117
118 #define MAX_TRAP_MSG            32
119 static char *trap_msg[] = {
120         "",                                     /*  0 unused */
121         "privileged instruction fault",         /*  1 T_PRIVINFLT */
122         "",                                     /*  2 unused */
123         "breakpoint instruction fault",         /*  3 T_BPTFLT */
124         "",                                     /*  4 unused */
125         "",                                     /*  5 unused */
126         "arithmetic trap",                      /*  6 T_ARITHTRAP */
127         "",                                     /*  7 unused */
128         "",                                     /*  8 unused */
129         "general protection fault",             /*  9 T_PROTFLT */
130         "trace trap",                           /* 10 T_TRCTRAP */
131         "",                                     /* 11 unused */
132         "page fault",                           /* 12 T_PAGEFLT */
133         "",                                     /* 13 unused */
134         "alignment fault",                      /* 14 T_ALIGNFLT */
135         "",                                     /* 15 unused */
136         "",                                     /* 16 unused */
137         "",                                     /* 17 unused */
138         "integer divide fault",                 /* 18 T_DIVIDE */
139         "non-maskable interrupt trap",          /* 19 T_NMI */
140         "overflow trap",                        /* 20 T_OFLOW */
141         "FPU bounds check fault",               /* 21 T_BOUND */
142         "FPU device not available",             /* 22 T_DNA */
143         "double fault",                         /* 23 T_DOUBLEFLT */
144         "FPU operand fetch fault",              /* 24 T_FPOPFLT */
145         "invalid TSS fault",                    /* 25 T_TSSFLT */
146         "segment not present fault",            /* 26 T_SEGNPFLT */
147         "stack fault",                          /* 27 T_STKFLT */
148         "machine check trap",                   /* 28 T_MCHK */
149         "SIMD floating-point exception",        /* 29 T_XMMFLT */
150         "reserved (unknown) fault",             /* 30 T_RESERVED */
151         "",                                     /* 31 unused (reserved) */
152         "DTrace pid return trap",               /* 32 T_DTRACE_RET */
153 };
154
155 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
156 int has_f00f_bug = 0;           /* Initialized so that it can be patched. */
157 #endif
158
159 #ifdef KDB
160 static int kdb_on_nmi = 1;
161 SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RWTUN,
162         &kdb_on_nmi, 0, "Go to KDB on NMI");
163 #endif
164 static int panic_on_nmi = 1;
165 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RWTUN,
166         &panic_on_nmi, 0, "Panic on NMI");
167 static int prot_fault_translation = 0;
168 SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
169         &prot_fault_translation, 0, "Select signal to deliver on protection fault");
170 static int uprintf_signal;
171 SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW,
172     &uprintf_signal, 0,
173     "Print debugging information on trap signal to ctty");
174
175 /*
176  * Exception, fault, and trap interface to the FreeBSD kernel.
177  * This common code is called from assembly language IDT gate entry
178  * routines that prepare a suitable stack frame, and restore this
179  * frame after the exception has been processed.
180  */
181
182 void
183 trap(struct trapframe *frame)
184 {
185 #ifdef KDTRACE_HOOKS
186         struct reg regs;
187 #endif
188         struct thread *td = curthread;
189         struct proc *p = td->td_proc;
190         int i = 0, ucode = 0, code;
191         u_int type;
192         register_t addr = 0;
193         vm_offset_t eva;
194         ksiginfo_t ksi;
195 #ifdef POWERFAIL_NMI
196         static int lastalert = 0;
197 #endif
198
199         PCPU_INC(cnt.v_trap);
200         type = frame->tf_trapno;
201
202 #ifdef SMP
203         /* Handler for NMI IPIs used for stopping CPUs. */
204         if (type == T_NMI) {
205                  if (ipi_nmi_handler() == 0)
206                            goto out;
207         }
208 #endif /* SMP */
209
210 #ifdef KDB
211         if (kdb_active) {
212                 kdb_reenter();
213                 goto out;
214         }
215 #endif
216
217         if (type == T_RESERVED) {
218                 trap_fatal(frame, 0);
219                 goto out;
220         }
221
222 #ifdef  HWPMC_HOOKS
223         /*
224          * CPU PMCs interrupt using an NMI so we check for that first.
225          * If the HWPMC module is active, 'pmc_hook' will point to
226          * the function to be called.  A return value of '1' from the
227          * hook means that the NMI was handled by it and that we can
228          * return immediately.
229          */
230         if (type == T_NMI && pmc_intr &&
231             (*pmc_intr)(PCPU_GET(cpuid), frame))
232             goto out;
233 #endif
234
235         if (type == T_MCHK) {
236                 mca_intr();
237                 goto out;
238         }
239
240 #ifdef KDTRACE_HOOKS
241         /*
242          * A trap can occur while DTrace executes a probe. Before
243          * executing the probe, DTrace blocks re-scheduling and sets
244          * a flag in its per-cpu flags to indicate that it doesn't
245          * want to fault. On returning from the probe, the no-fault
246          * flag is cleared and finally re-scheduling is enabled.
247          */
248         if ((type == T_PROTFLT || type == T_PAGEFLT) &&
249             dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
250                 goto out;
251 #endif
252
253         if ((frame->tf_eflags & PSL_I) == 0) {
254                 /*
255                  * Buggy application or kernel code has disabled
256                  * interrupts and then trapped.  Enabling interrupts
257                  * now is wrong, but it is better than running with
258                  * interrupts disabled until they are accidentally
259                  * enabled later.
260                  */
261                 if (ISPL(frame->tf_cs) == SEL_UPL || (frame->tf_eflags & PSL_VM))
262                         uprintf(
263                             "pid %ld (%s): trap %d with interrupts disabled\n",
264                             (long)curproc->p_pid, curthread->td_name, type);
265                 else if (type != T_NMI && type != T_BPTFLT &&
266                     type != T_TRCTRAP &&
267                     frame->tf_eip != (int)cpu_switch_load_gs) {
268                         /*
269                          * XXX not quite right, since this may be for a
270                          * multiple fault in user mode.
271                          */
272                         printf("kernel trap %d with interrupts disabled\n",
273                             type);
274                         /*
275                          * Page faults need interrupts disabled until later,
276                          * and we shouldn't enable interrupts while holding
277                          * a spin lock.
278                          */
279                         if (type != T_PAGEFLT &&
280                             td->td_md.md_spinlock_count == 0)
281                                 enable_intr();
282                 }
283         }
284         eva = 0;
285         code = frame->tf_err;
286         if (type == T_PAGEFLT) {
287                 /*
288                  * For some Cyrix CPUs, %cr2 is clobbered by
289                  * interrupts.  This problem is worked around by using
290                  * an interrupt gate for the pagefault handler.  We
291                  * are finally ready to read %cr2 and conditionally
292                  * reenable interrupts.  If we hold a spin lock, then
293                  * we must not reenable interrupts.  This might be a
294                  * spurious page fault.
295                  */
296                 eva = rcr2();
297                 if (td->td_md.md_spinlock_count == 0)
298                         enable_intr();
299         }
300
301         if ((ISPL(frame->tf_cs) == SEL_UPL) ||
302             ((frame->tf_eflags & PSL_VM) && 
303                 !(curpcb->pcb_flags & PCB_VM86CALL))) {
304                 /* user trap */
305
306                 td->td_pticks = 0;
307                 td->td_frame = frame;
308                 addr = frame->tf_eip;
309                 if (td->td_ucred != p->p_ucred) 
310                         cred_update_thread(td);
311
312                 switch (type) {
313                 case T_PRIVINFLT:       /* privileged instruction fault */
314                         i = SIGILL;
315                         ucode = ILL_PRVOPC;
316                         break;
317
318                 case T_BPTFLT:          /* bpt instruction fault */
319                 case T_TRCTRAP:         /* trace trap */
320                         enable_intr();
321 #ifdef KDTRACE_HOOKS
322                         if (type == T_BPTFLT) {
323                                 fill_frame_regs(frame, &regs);
324                                 if (dtrace_pid_probe_ptr != NULL &&
325                                     dtrace_pid_probe_ptr(&regs) == 0)
326                                         goto out;
327                         }
328 #endif
329                         frame->tf_eflags &= ~PSL_T;
330                         i = SIGTRAP;
331                         ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
332                         break;
333
334                 case T_ARITHTRAP:       /* arithmetic trap */
335 #ifdef DEV_NPX
336                         ucode = npxtrap_x87();
337                         if (ucode == -1)
338                                 goto userout;
339 #else
340                         ucode = 0;
341 #endif
342                         i = SIGFPE;
343                         break;
344
345                         /*
346                          * The following two traps can happen in
347                          * vm86 mode, and, if so, we want to handle
348                          * them specially.
349                          */
350                 case T_PROTFLT:         /* general protection fault */
351                 case T_STKFLT:          /* stack fault */
352                         if (frame->tf_eflags & PSL_VM) {
353                                 i = vm86_emulate((struct vm86frame *)frame);
354                                 if (i == 0)
355                                         goto user;
356                                 break;
357                         }
358                         i = SIGBUS;
359                         ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR;
360                         break;
361                 case T_SEGNPFLT:        /* segment not present fault */
362                         i = SIGBUS;
363                         ucode = BUS_ADRERR;
364                         break;
365                 case T_TSSFLT:          /* invalid TSS fault */
366                         i = SIGBUS;
367                         ucode = BUS_OBJERR;
368                         break;
369                 case T_ALIGNFLT:
370                         i = SIGBUS;
371                         ucode = BUS_ADRALN;
372                         break;
373                 case T_DOUBLEFLT:       /* double fault */
374                 default:
375                         i = SIGBUS;
376                         ucode = BUS_OBJERR;
377                         break;
378
379                 case T_PAGEFLT:         /* page fault */
380
381                         i = trap_pfault(frame, TRUE, eva);
382 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
383                         if (i == -2) {
384                                 /*
385                                  * The f00f hack workaround has triggered, so
386                                  * treat the fault as an illegal instruction 
387                                  * (T_PRIVINFLT) instead of a page fault.
388                                  */
389                                 type = frame->tf_trapno = T_PRIVINFLT;
390
391                                 /* Proceed as in that case. */
392                                 ucode = ILL_PRVOPC;
393                                 i = SIGILL;
394                                 break;
395                         }
396 #endif
397                         if (i == -1)
398                                 goto userout;
399                         if (i == 0)
400                                 goto user;
401
402                         if (i == SIGSEGV)
403                                 ucode = SEGV_MAPERR;
404                         else {
405                                 if (prot_fault_translation == 0) {
406                                         /*
407                                          * Autodetect.
408                                          * This check also covers the images
409                                          * without the ABI-tag ELF note.
410                                          */
411                                         if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
412                                             && p->p_osrel >= P_OSREL_SIGSEGV) {
413                                                 i = SIGSEGV;
414                                                 ucode = SEGV_ACCERR;
415                                         } else {
416                                                 i = SIGBUS;
417                                                 ucode = BUS_PAGE_FAULT;
418                                         }
419                                 } else if (prot_fault_translation == 1) {
420                                         /*
421                                          * Always compat mode.
422                                          */
423                                         i = SIGBUS;
424                                         ucode = BUS_PAGE_FAULT;
425                                 } else {
426                                         /*
427                                          * Always SIGSEGV mode.
428                                          */
429                                         i = SIGSEGV;
430                                         ucode = SEGV_ACCERR;
431                                 }
432                         }
433                         addr = eva;
434                         break;
435
436                 case T_DIVIDE:          /* integer divide fault */
437                         ucode = FPE_INTDIV;
438                         i = SIGFPE;
439                         break;
440
441 #ifdef DEV_ISA
442                 case T_NMI:
443 #ifdef POWERFAIL_NMI
444 #ifndef TIMER_FREQ
445 #  define TIMER_FREQ 1193182
446 #endif
447                         if (time_second - lastalert > 10) {
448                                 log(LOG_WARNING, "NMI: power fail\n");
449                                 sysbeep(880, hz);
450                                 lastalert = time_second;
451                         }
452                         goto userout;
453 #else /* !POWERFAIL_NMI */
454                         /* machine/parity/power fail/"kitchen sink" faults */
455                         if (isa_nmi(code) == 0) {
456 #ifdef KDB
457                                 /*
458                                  * NMI can be hooked up to a pushbutton
459                                  * for debugging.
460                                  */
461                                 if (kdb_on_nmi) {
462                                         printf ("NMI ... going to debugger\n");
463                                         kdb_trap(type, 0, frame);
464                                 }
465 #endif /* KDB */
466                                 goto userout;
467                         } else if (panic_on_nmi)
468                                 panic("NMI indicates hardware failure");
469                         break;
470 #endif /* POWERFAIL_NMI */
471 #endif /* DEV_ISA */
472
473                 case T_OFLOW:           /* integer overflow fault */
474                         ucode = FPE_INTOVF;
475                         i = SIGFPE;
476                         break;
477
478                 case T_BOUND:           /* bounds check fault */
479                         ucode = FPE_FLTSUB;
480                         i = SIGFPE;
481                         break;
482
483                 case T_DNA:
484 #ifdef DEV_NPX
485                         KASSERT(PCB_USER_FPU(td->td_pcb),
486                             ("kernel FPU ctx has leaked"));
487                         /* transparent fault (due to context switch "late") */
488                         if (npxdna())
489                                 goto userout;
490 #endif
491                         uprintf("pid %d killed due to lack of floating point\n",
492                                 p->p_pid);
493                         i = SIGKILL;
494                         ucode = 0;
495                         break;
496
497                 case T_FPOPFLT:         /* FPU operand fetch fault */
498                         ucode = ILL_COPROC;
499                         i = SIGILL;
500                         break;
501
502                 case T_XMMFLT:          /* SIMD floating-point exception */
503 #if defined(DEV_NPX) && !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
504                         ucode = npxtrap_sse();
505                         if (ucode == -1)
506                                 goto userout;
507 #else
508                         ucode = 0;
509 #endif
510                         i = SIGFPE;
511                         break;
512 #ifdef KDTRACE_HOOKS
513                 case T_DTRACE_RET:
514                         enable_intr();
515                         fill_frame_regs(frame, &regs);
516                         if (dtrace_return_probe_ptr != NULL &&
517                             dtrace_return_probe_ptr(&regs) == 0)
518                                 goto out;
519                         break;
520 #endif
521                 }
522         } else {
523                 /* kernel trap */
524
525                 KASSERT(cold || td->td_ucred != NULL,
526                     ("kernel trap doesn't have ucred"));
527                 switch (type) {
528                 case T_PAGEFLT:                 /* page fault */
529                         (void) trap_pfault(frame, FALSE, eva);
530                         goto out;
531
532                 case T_DNA:
533 #ifdef DEV_NPX
534                         KASSERT(!PCB_USER_FPU(td->td_pcb),
535                             ("Unregistered use of FPU in kernel"));
536                         if (npxdna())
537                                 goto out;
538 #endif
539                         break;
540
541                 case T_ARITHTRAP:       /* arithmetic trap */
542                 case T_XMMFLT:          /* SIMD floating-point exception */
543                 case T_FPOPFLT:         /* FPU operand fetch fault */
544                         /*
545                          * XXXKIB for now disable any FPU traps in kernel
546                          * handler registration seems to be overkill
547                          */
548                         trap_fatal(frame, 0);
549                         goto out;
550
551                         /*
552                          * The following two traps can happen in
553                          * vm86 mode, and, if so, we want to handle
554                          * them specially.
555                          */
556                 case T_PROTFLT:         /* general protection fault */
557                 case T_STKFLT:          /* stack fault */
558                         if (frame->tf_eflags & PSL_VM) {
559                                 i = vm86_emulate((struct vm86frame *)frame);
560                                 if (i != 0)
561                                         /*
562                                          * returns to original process
563                                          */
564                                         vm86_trap((struct vm86frame *)frame);
565                                 goto out;
566                         }
567                         if (type == T_STKFLT)
568                                 break;
569
570                         /* FALL THROUGH */
571
572                 case T_SEGNPFLT:        /* segment not present fault */
573                         if (curpcb->pcb_flags & PCB_VM86CALL)
574                                 break;
575
576                         /*
577                          * Invalid %fs's and %gs's can be created using
578                          * procfs or PT_SETREGS or by invalidating the
579                          * underlying LDT entry.  This causes a fault
580                          * in kernel mode when the kernel attempts to
581                          * switch contexts.  Lose the bad context
582                          * (XXX) so that we can continue, and generate
583                          * a signal.
584                          */
585                         if (frame->tf_eip == (int)cpu_switch_load_gs) {
586                                 curpcb->pcb_gs = 0;
587 #if 0                           
588                                 PROC_LOCK(p);
589                                 kern_psignal(p, SIGBUS);
590                                 PROC_UNLOCK(p);
591 #endif                          
592                                 goto out;
593                         }
594
595                         if (td->td_intr_nesting_level != 0)
596                                 break;
597
598                         /*
599                          * Invalid segment selectors and out of bounds
600                          * %eip's and %esp's can be set up in user mode.
601                          * This causes a fault in kernel mode when the
602                          * kernel tries to return to user mode.  We want
603                          * to get this fault so that we can fix the
604                          * problem here and not have to check all the
605                          * selectors and pointers when the user changes
606                          * them.
607                          */
608                         if (frame->tf_eip == (int)doreti_iret) {
609                                 frame->tf_eip = (int)doreti_iret_fault;
610                                 goto out;
611                         }
612                         if (frame->tf_eip == (int)doreti_popl_ds) {
613                                 frame->tf_eip = (int)doreti_popl_ds_fault;
614                                 goto out;
615                         }
616                         if (frame->tf_eip == (int)doreti_popl_es) {
617                                 frame->tf_eip = (int)doreti_popl_es_fault;
618                                 goto out;
619                         }
620                         if (frame->tf_eip == (int)doreti_popl_fs) {
621                                 frame->tf_eip = (int)doreti_popl_fs_fault;
622                                 goto out;
623                         }
624                         if (curpcb->pcb_onfault != NULL) {
625                                 frame->tf_eip =
626                                     (int)curpcb->pcb_onfault;
627                                 goto out;
628                         }
629                         break;
630
631                 case T_TSSFLT:
632                         /*
633                          * PSL_NT can be set in user mode and isn't cleared
634                          * automatically when the kernel is entered.  This
635                          * causes a TSS fault when the kernel attempts to
636                          * `iret' because the TSS link is uninitialized.  We
637                          * want to get this fault so that we can fix the
638                          * problem here and not every time the kernel is
639                          * entered.
640                          */
641                         if (frame->tf_eflags & PSL_NT) {
642                                 frame->tf_eflags &= ~PSL_NT;
643                                 goto out;
644                         }
645                         break;
646
647                 case T_TRCTRAP:  /* trace trap */
648                         if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) {
649                                 /*
650                                  * We've just entered system mode via the
651                                  * syscall lcall.  Continue single stepping
652                                  * silently until the syscall handler has
653                                  * saved the flags.
654                                  */
655                                 goto out;
656                         }
657                         if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
658                                 /*
659                                  * The syscall handler has now saved the
660                                  * flags.  Stop single stepping it.
661                                  */
662                                 frame->tf_eflags &= ~PSL_T;
663                                 goto out;
664                         }
665                         /*
666                          * Ignore debug register trace traps due to
667                          * accesses in the user's address space, which
668                          * can happen under several conditions such as
669                          * if a user sets a watchpoint on a buffer and
670                          * then passes that buffer to a system call.
671                          * We still want to get TRCTRAPS for addresses
672                          * in kernel space because that is useful when
673                          * debugging the kernel.
674                          */
675                         if (user_dbreg_trap() && 
676                            !(curpcb->pcb_flags & PCB_VM86CALL)) {
677                                 /*
678                                  * Reset breakpoint bits because the
679                                  * processor doesn't
680                                  */
681                                 load_dr6(rdr6() & 0xfffffff0);
682                                 goto out;
683                         }
684                         /*
685                          * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
686                          */
687                 case T_BPTFLT:
688                         /*
689                          * If KDB is enabled, let it handle the debugger trap.
690                          * Otherwise, debugger traps "can't happen".
691                          */
692 #ifdef KDB
693                         if (kdb_trap(type, 0, frame))
694                                 goto out;
695 #endif
696                         break;
697
698 #ifdef DEV_ISA
699                 case T_NMI:
700 #ifdef POWERFAIL_NMI
701                         if (time_second - lastalert > 10) {
702                                 log(LOG_WARNING, "NMI: power fail\n");
703                                 sysbeep(880, hz);
704                                 lastalert = time_second;
705                         }
706                         goto out;
707 #else /* !POWERFAIL_NMI */
708                         /* machine/parity/power fail/"kitchen sink" faults */
709                         if (isa_nmi(code) == 0) {
710 #ifdef KDB
711                                 /*
712                                  * NMI can be hooked up to a pushbutton
713                                  * for debugging.
714                                  */
715                                 if (kdb_on_nmi) {
716                                         printf ("NMI ... going to debugger\n");
717                                         kdb_trap(type, 0, frame);
718                                 }
719 #endif /* KDB */
720                                 goto out;
721                         } else if (panic_on_nmi == 0)
722                                 goto out;
723                         /* FALLTHROUGH */
724 #endif /* POWERFAIL_NMI */
725 #endif /* DEV_ISA */
726                 }
727
728                 trap_fatal(frame, eva);
729                 goto out;
730         }
731
732         /* Translate fault for emulators (e.g. Linux) */
733         if (*p->p_sysent->sv_transtrap)
734                 i = (*p->p_sysent->sv_transtrap)(i, type);
735
736         ksiginfo_init_trap(&ksi);
737         ksi.ksi_signo = i;
738         ksi.ksi_code = ucode;
739         ksi.ksi_addr = (void *)addr;
740         ksi.ksi_trapno = type;
741         if (uprintf_signal) {
742                 uprintf("pid %d comm %s: signal %d err %x code %d type %d "
743                     "addr 0x%x esp 0x%08x eip 0x%08x "
744                     "<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
745                     p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr,
746                     frame->tf_esp, frame->tf_eip,
747                     fubyte((void *)(frame->tf_eip + 0)),
748                     fubyte((void *)(frame->tf_eip + 1)),
749                     fubyte((void *)(frame->tf_eip + 2)),
750                     fubyte((void *)(frame->tf_eip + 3)),
751                     fubyte((void *)(frame->tf_eip + 4)),
752                     fubyte((void *)(frame->tf_eip + 5)),
753                     fubyte((void *)(frame->tf_eip + 6)),
754                     fubyte((void *)(frame->tf_eip + 7)));
755         }
756         KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled"));
757         trapsignal(td, &ksi);
758
759 #ifdef DEBUG
760         if (type <= MAX_TRAP_MSG) {
761                 uprintf("fatal process exception: %s",
762                         trap_msg[type]);
763                 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
764                         uprintf(", fault VA = 0x%lx", (u_long)eva);
765                 uprintf("\n");
766         }
767 #endif
768
769 user:
770         userret(td, frame);
771         KASSERT(PCB_USER_FPU(td->td_pcb),
772             ("Return from trap with kernel FPU ctx leaked"));
773 userout:
774 out:
775         return;
776 }
777
778 static int
779 trap_pfault(frame, usermode, eva)
780         struct trapframe *frame;
781         int usermode;
782         vm_offset_t eva;
783 {
784         vm_offset_t va;
785         struct vmspace *vm;
786         vm_map_t map;
787         int rv = 0;
788         vm_prot_t ftype;
789         struct thread *td = curthread;
790         struct proc *p = td->td_proc;
791
792         if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
793                 /*
794                  * Due to both processor errata and lazy TLB invalidation when
795                  * access restrictions are removed from virtual pages, memory
796                  * accesses that are allowed by the physical mapping layer may
797                  * nonetheless cause one spurious page fault per virtual page. 
798                  * When the thread is executing a "no faulting" section that
799                  * is bracketed by vm_fault_{disable,enable}_pagefaults(),
800                  * every page fault is treated as a spurious page fault,
801                  * unless it accesses the same virtual address as the most
802                  * recent page fault within the same "no faulting" section.
803                  */
804                 if (td->td_md.md_spurflt_addr != eva ||
805                     (td->td_pflags & TDP_RESETSPUR) != 0) {
806                         /*
807                          * Do nothing to the TLB.  A stale TLB entry is
808                          * flushed automatically by a page fault.
809                          */
810                         td->td_md.md_spurflt_addr = eva;
811                         td->td_pflags &= ~TDP_RESETSPUR;
812                         return (0);
813                 }
814         } else {
815                 /*
816                  * If we get a page fault while in a critical section, then
817                  * it is most likely a fatal kernel page fault.  The kernel
818                  * is already going to panic trying to get a sleep lock to
819                  * do the VM lookup, so just consider it a fatal trap so the
820                  * kernel can print out a useful trap message and even get
821                  * to the debugger.
822                  *
823                  * If we get a page fault while holding a non-sleepable
824                  * lock, then it is most likely a fatal kernel page fault.
825                  * If WITNESS is enabled, then it's going to whine about
826                  * bogus LORs with various VM locks, so just skip to the
827                  * fatal trap handling directly.
828                  */
829                 if (td->td_critnest != 0 ||
830                     WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
831                     "Kernel page fault") != 0) {
832                         trap_fatal(frame, eva);
833                         return (-1);
834                 }
835         }
836         va = trunc_page(eva);
837         if (va >= KERNBASE) {
838                 /*
839                  * Don't allow user-mode faults in kernel address space.
840                  * An exception:  if the faulting address is the invalid
841                  * instruction entry in the IDT, then the Intel Pentium
842                  * F00F bug workaround was triggered, and we need to
843                  * treat it is as an illegal instruction, and not a page
844                  * fault.
845                  */
846 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
847                 if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
848                         return (-2);
849 #endif
850                 if (usermode)
851                         goto nogo;
852
853                 map = kernel_map;
854         } else {
855                 /*
856                  * This is a fault on non-kernel virtual memory.  If either
857                  * p or p->p_vmspace is NULL, then the fault is fatal.
858                  */
859                 if (p == NULL || (vm = p->p_vmspace) == NULL)
860                         goto nogo;
861
862                 map = &vm->vm_map;
863
864                 /*
865                  * When accessing a user-space address, kernel must be
866                  * ready to accept the page fault, and provide a
867                  * handling routine.  Since accessing the address
868                  * without the handler is a bug, do not try to handle
869                  * it normally, and panic immediately.
870                  */
871                 if (!usermode && (td->td_intr_nesting_level != 0 ||
872                     curpcb->pcb_onfault == NULL)) {
873                         trap_fatal(frame, eva);
874                         return (-1);
875                 }
876         }
877
878         /*
879          * PGEX_I is defined only if the execute disable bit capability is
880          * supported and enabled.
881          */
882         if (frame->tf_err & PGEX_W)
883                 ftype = VM_PROT_WRITE;
884 #ifdef PAE
885         else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
886                 ftype = VM_PROT_EXECUTE;
887 #endif
888         else
889                 ftype = VM_PROT_READ;
890
891         if (map != kernel_map) {
892                 /*
893                  * Keep swapout from messing with us during this
894                  *      critical time.
895                  */
896                 PROC_LOCK(p);
897                 ++p->p_lock;
898                 PROC_UNLOCK(p);
899
900                 /* Fault in the user page: */
901                 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
902
903                 PROC_LOCK(p);
904                 --p->p_lock;
905                 PROC_UNLOCK(p);
906         } else {
907                 /*
908                  * Don't have to worry about process locking or stacks in the
909                  * kernel.
910                  */
911                 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
912         }
913         if (rv == KERN_SUCCESS) {
914 #ifdef HWPMC_HOOKS
915                 if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
916                         PMC_SOFT_CALL_TF( , , page_fault, all, frame);
917                         if (ftype == VM_PROT_READ)
918                                 PMC_SOFT_CALL_TF( , , page_fault, read,
919                                     frame);
920                         else
921                                 PMC_SOFT_CALL_TF( , , page_fault, write,
922                                     frame);
923                 }
924 #endif
925                 return (0);
926         }
927 nogo:
928         if (!usermode) {
929                 if (td->td_intr_nesting_level == 0 &&
930                     curpcb->pcb_onfault != NULL) {
931                         frame->tf_eip = (int)curpcb->pcb_onfault;
932                         return (0);
933                 }
934                 trap_fatal(frame, eva);
935                 return (-1);
936         }
937         return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
938 }
939
940 static void
941 trap_fatal(frame, eva)
942         struct trapframe *frame;
943         vm_offset_t eva;
944 {
945         int code, ss, esp;
946         u_int type;
947         struct soft_segment_descriptor softseg;
948         char *msg;
949
950         code = frame->tf_err;
951         type = frame->tf_trapno;
952         sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
953
954         if (type <= MAX_TRAP_MSG)
955                 msg = trap_msg[type];
956         else
957                 msg = "UNKNOWN";
958         printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
959             frame->tf_eflags & PSL_VM ? "vm86" :
960             ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
961 #ifdef SMP
962         /* two separate prints in case of a trap on an unmapped page */
963         printf("cpuid = %d; ", PCPU_GET(cpuid));
964         printf("apic id = %02x\n", PCPU_GET(apic_id));
965 #endif
966         if (type == T_PAGEFLT) {
967                 printf("fault virtual address   = 0x%x\n", eva);
968                 printf("fault code              = %s %s, %s\n",
969                         code & PGEX_U ? "user" : "supervisor",
970                         code & PGEX_W ? "write" : "read",
971                         code & PGEX_P ? "protection violation" : "page not present");
972         }
973         printf("instruction pointer     = 0x%x:0x%x\n",
974                frame->tf_cs & 0xffff, frame->tf_eip);
975         if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
976                 ss = frame->tf_ss & 0xffff;
977                 esp = frame->tf_esp;
978         } else {
979                 ss = GSEL(GDATA_SEL, SEL_KPL);
980                 esp = (int)&frame->tf_esp;
981         }
982         printf("stack pointer           = 0x%x:0x%x\n", ss, esp);
983         printf("frame pointer           = 0x%x:0x%x\n", ss, frame->tf_ebp);
984         printf("code segment            = base 0x%x, limit 0x%x, type 0x%x\n",
985                softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
986         printf("                        = DPL %d, pres %d, def32 %d, gran %d\n",
987                softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
988                softseg.ssd_gran);
989         printf("processor eflags        = ");
990         if (frame->tf_eflags & PSL_T)
991                 printf("trace trap, ");
992         if (frame->tf_eflags & PSL_I)
993                 printf("interrupt enabled, ");
994         if (frame->tf_eflags & PSL_NT)
995                 printf("nested task, ");
996         if (frame->tf_eflags & PSL_RF)
997                 printf("resume, ");
998         if (frame->tf_eflags & PSL_VM)
999                 printf("vm86, ");
1000         printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
1001         printf("current process         = ");
1002         if (curproc) {
1003                 printf("%lu (%s)\n", (u_long)curproc->p_pid, curthread->td_name);
1004         } else {
1005                 printf("Idle\n");
1006         }
1007
1008 #ifdef KDB
1009         if (debugger_on_panic || kdb_active) {
1010                 frame->tf_err = eva;    /* smuggle fault address to ddb */
1011                 if (kdb_trap(type, 0, frame)) {
1012                         frame->tf_err = code;   /* restore error code */
1013                         return;
1014                 }
1015                 frame->tf_err = code;           /* restore error code */
1016         }
1017 #endif
1018         printf("trap number             = %d\n", type);
1019         if (type <= MAX_TRAP_MSG)
1020                 panic("%s", trap_msg[type]);
1021         else
1022                 panic("unknown/reserved trap");
1023 }
1024
1025 /*
1026  * Double fault handler. Called when a fault occurs while writing
1027  * a frame for a trap/exception onto the stack. This usually occurs
1028  * when the stack overflows (such is the case with infinite recursion,
1029  * for example).
1030  *
1031  * XXX Note that the current PTD gets replaced by IdlePTD when the
1032  * task switch occurs. This means that the stack that was active at
1033  * the time of the double fault is not available at <kstack> unless
1034  * the machine was idle when the double fault occurred. The downside
1035  * of this is that "trace <ebp>" in ddb won't work.
1036  */
1037 void
1038 dblfault_handler()
1039 {
1040 #ifdef KDTRACE_HOOKS
1041         if (dtrace_doubletrap_func != NULL)
1042                 (*dtrace_doubletrap_func)();
1043 #endif
1044         printf("\nFatal double fault:\n");
1045         printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip));
1046         printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp));
1047         printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp));
1048 #ifdef SMP
1049         /* two separate prints in case of a trap on an unmapped page */
1050         printf("cpuid = %d; ", PCPU_GET(cpuid));
1051         printf("apic id = %02x\n", PCPU_GET(apic_id));
1052 #endif
1053         panic("double fault");
1054 }
1055
1056 int
1057 cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
1058 {
1059         struct proc *p;
1060         struct trapframe *frame;
1061         caddr_t params;
1062         long tmp;
1063         int error;
1064
1065         p = td->td_proc;
1066         frame = td->td_frame;
1067
1068         params = (caddr_t)frame->tf_esp + sizeof(int);
1069         sa->code = frame->tf_eax;
1070
1071         /*
1072          * Need to check if this is a 32 bit or 64 bit syscall.
1073          */
1074         if (sa->code == SYS_syscall) {
1075                 /*
1076                  * Code is first argument, followed by actual args.
1077                  */
1078                 error = fueword(params, &tmp);
1079                 if (error == -1)
1080                         return (EFAULT);
1081                 sa->code = tmp;
1082                 params += sizeof(int);
1083         } else if (sa->code == SYS___syscall) {
1084                 /*
1085                  * Like syscall, but code is a quad, so as to maintain
1086                  * quad alignment for the rest of the arguments.
1087                  */
1088                 error = fueword(params, &tmp);
1089                 if (error == -1)
1090                         return (EFAULT);
1091                 sa->code = tmp;
1092                 params += sizeof(quad_t);
1093         }
1094
1095         if (p->p_sysent->sv_mask)
1096                 sa->code &= p->p_sysent->sv_mask;
1097         if (sa->code >= p->p_sysent->sv_size)
1098                 sa->callp = &p->p_sysent->sv_table[0];
1099         else
1100                 sa->callp = &p->p_sysent->sv_table[sa->code];
1101         sa->narg = sa->callp->sy_narg;
1102
1103         if (params != NULL && sa->narg != 0)
1104                 error = copyin(params, (caddr_t)sa->args,
1105                     (u_int)(sa->narg * sizeof(int)));
1106         else
1107                 error = 0;
1108
1109         if (error == 0) {
1110                 td->td_retval[0] = 0;
1111                 td->td_retval[1] = frame->tf_edx;
1112         }
1113                 
1114         return (error);
1115 }
1116
1117 #include "../../kern/subr_syscall.c"
1118
1119 /*
1120  * syscall - system call request C handler.  A system call is
1121  * essentially treated as a trap by reusing the frame layout.
1122  */
1123 void
1124 syscall(struct trapframe *frame)
1125 {
1126         struct thread *td;
1127         struct syscall_args sa;
1128         register_t orig_tf_eflags;
1129         int error;
1130         ksiginfo_t ksi;
1131
1132 #ifdef DIAGNOSTIC
1133         if (ISPL(frame->tf_cs) != SEL_UPL) {
1134                 panic("syscall");
1135                 /* NOT REACHED */
1136         }
1137 #endif
1138         orig_tf_eflags = frame->tf_eflags;
1139
1140         td = curthread;
1141         td->td_frame = frame;
1142
1143         error = syscallenter(td, &sa);
1144
1145         /*
1146          * Traced syscall.
1147          */
1148         if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
1149                 frame->tf_eflags &= ~PSL_T;
1150                 ksiginfo_init_trap(&ksi);
1151                 ksi.ksi_signo = SIGTRAP;
1152                 ksi.ksi_code = TRAP_TRACE;
1153                 ksi.ksi_addr = (void *)frame->tf_eip;
1154                 trapsignal(td, &ksi);
1155         }
1156
1157         KASSERT(PCB_USER_FPU(td->td_pcb),
1158             ("System call %s returning with kernel FPU ctx leaked",
1159              syscallname(td->td_proc, sa.code)));
1160         KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
1161             ("System call %s returning with mangled pcb_save",
1162              syscallname(td->td_proc, sa.code)));
1163
1164         syscallret(td, error, &sa);
1165 }