2 * Copyright (c) 1990 The Regents of the University of California.
5 * This code is derived from software contributed to Berkeley by
6 * the University of Utah, and William Jolitz.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
41 * 386 Trap and System call handleing
45 #include "machine/cpu.h"
46 #include "machine/psl.h"
47 #include "machine/reg.h"
59 #include "vm/vm_param.h"
61 #include "vm/vm_map.h"
62 #include "sys/vmmeter.h"
64 #include "machine/trap.h"
69 * The "r" contraint could be "rm" except for fatal bugs in gas. As usual,
70 * we omit the size from the mov instruction to avoid nonfatal bugs in gas.
72 #define read_gs() ({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; })
73 #define write_gs(gs) __asm("mov %0,%%gs" : : "r" ((u_short) gs))
75 #else /* not __GNUC__ */
77 u_short read_gs __P((void));
78 void write_gs __P((/* promoted u_short */ int gs));
82 struct sysent sysent[];
91 * Exception, fault, and trap interface to BSD kernel. This
92 * common code is called from assembly language IDT gate entry
93 * routines that prepare a suitable stack frame, and restore this
94 * frame after the exception has been processed. Note that the
95 * effect is as if the arguments were passed call by reference.
100 struct trapframe frame;
103 register struct proc *p = curproc;
105 int ucode, type, code, eva;
107 frame.tf_eflags &= ~PSL_NT; /* clear nested trap XXX */
108 type = frame.tf_trapno;
111 if (curpcb && curpcb->pcb_onfault) {
112 if (frame.tf_trapno == T_BPTFLT
113 || frame.tf_trapno == T_TRCTRAP)
114 if (kdb_trap (type, 0, &frame))
119 /*pg("trap type %d code = %x eip = %x cs = %x eva = %x esp %x",
120 frame.tf_trapno, frame.tf_err, frame.tf_eip,
121 frame.tf_cs, rcr2(), frame.tf_esp);*/
122 if(curpcb == 0 || curproc == 0) goto we_re_toast;
123 if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) {
124 extern int _udatasel;
126 if (read_gs() != (u_short) _udatasel)
128 * Some user has corrupted %gs but we depend on it in
129 * copyout() etc. Fix it up and retry.
131 * (We don't preserve %fs or %gs, so users can change
132 * them to either _ucodesel, _udatasel or a not-present
133 * selector, possibly ORed with 0 to 3, making them
134 * volatile for other users. Not preserving them saves
135 * time and doesn't lose functionality or open security
141 frame.tf_eip = (int)curpcb->pcb_onfault;
146 if (ISPL(frame.tf_cs) == SEL_UPL) {
148 p->p_regs = (int *)&frame;
149 curpcb->pcb_flags |= FM_TRAP; /* used by sendsig */
164 if (kdb_trap (type, 0, &frame))
168 printf("trap type %d code = %x eip = %x cs = %x eflags = %x ",
169 frame.tf_trapno, frame.tf_err, frame.tf_eip,
170 frame.tf_cs, frame.tf_eflags);
172 printf("cr2 %x cpl %x\n", eva, cpl);
173 /* type &= ~T_USER; */ /* XXX what the hell is this */
177 case T_SEGNPFLT|T_USER:
178 case T_STKFLT|T_USER:
179 case T_PROTFLT|T_USER: /* protection fault */
180 ucode = code + BUS_SEGM_FAULT ;
184 case T_PRIVINFLT|T_USER: /* privileged instruction fault */
185 case T_RESADFLT|T_USER: /* reserved addressing fault */
186 case T_RESOPFLT|T_USER: /* reserved operand fault */
187 case T_FPOPFLT|T_USER: /* coprocessor operand fault */
188 ucode = type &~ T_USER;
192 case T_ASTFLT|T_USER: /* Allow process switch */
195 if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) {
196 addupc(frame.tf_eip, &p->p_stats->p_prof, 1);
197 p->p_flag &= ~SOWEUPC;
203 /* if a transparent fault (due to context switch "late") */
204 if (npxdna()) return;
205 #endif /* NNPX > 0 */
207 i = math_emulate(&frame);
209 #else /* MATH_EMULTATE */
210 panic("trap: math emulation necessary!");
211 #endif /* MATH_EMULTATE */
212 ucode = FPE_FPU_NP_TRAP;
216 ucode = FPE_SUBRNG_TRAP;
221 ucode = FPE_INTOVF_TRAP;
225 case T_DIVIDE|T_USER:
226 ucode = FPE_INTDIV_TRAP;
230 case T_ARITHTRAP|T_USER:
235 case T_PAGEFLT: /* allow page faults in kernel mode */
237 /* XXX - check only applies to 386's and 486's with WP off */
238 if (code & PGEX_P) goto we_re_toast;
242 case T_PAGEFLT|T_USER: /* page fault */
244 register vm_offset_t va;
245 register struct vmspace *vm = p->p_vmspace;
246 register vm_map_t map;
249 extern vm_map_t kernel_map;
252 va = trunc_page((vm_offset_t)eva);
254 * It is only a kernel address space fault iff:
255 * 1. (type & T_USER) == 0 and
256 * 2. pcb_onfault not set or
257 * 3. pcb_onfault set but supervisor space fault
258 * The last can occur during an exec() copyin where the
259 * argument space is lazy-allocated.
261 if (type == T_PAGEFLT && va >= KERNBASE)
266 ftype = VM_PROT_READ | VM_PROT_WRITE;
268 ftype = VM_PROT_READ;
271 if (map == kernel_map && va == 0) {
272 printf("trap: bad kernel access at %x\n", va);
278 * XXX: rude hack to make stack limits "work"
281 if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map
283 nss = clrnd(btoc((unsigned)vm->vm_maxsaddr
284 + MAXSSIZ - (unsigned)va));
285 if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
286 /*pg("trap rlimit %d, maxsaddr %x va %x ", nss, vm->vm_maxsaddr, va);*/
292 /* check if page table is mapped, if not, fault it first */
293 #define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v)
295 v = trunc_page(vtopte(va));
296 rv = vm_fault(map, v, ftype, FALSE);
297 if (rv != KERN_SUCCESS) goto nogo;
298 /* check if page table fault, increment wiring */
299 vm_map_pageable(map, v, round_page(v+1), FALSE);
301 rv = vm_fault(map, va, ftype, FALSE);
302 if (rv == KERN_SUCCESS) {
304 * XXX: continuation of rude stack hack
306 if (nss > vm->vm_ssize)
308 va = trunc_page(vtopte(va));
309 /* for page table, increment wiring
310 as long as not a page table fault as well */
311 if (!v && type != T_PAGEFLT)
312 vm_map_pageable(map, va, round_page(va+1), FALSE);
313 if (type == T_PAGEFLT)
318 if (type == T_PAGEFLT) {
319 if (curpcb->pcb_onfault)
321 printf("vm_fault(%x, %x, %x, 0) -> %x\n",
323 printf(" type %x, code %x\n",
327 i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
332 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */
333 frame.tf_eflags &= ~PSL_T;
335 /* Q: how do we turn it on again? */
339 case T_BPTFLT|T_USER: /* bpt instruction fault */
340 case T_TRCTRAP|T_USER: /* trace trap */
341 frame.tf_eflags &= ~PSL_T;
350 /* NMI can be hooked up to a pushbutton for debugging */
351 printf ("NMI ... going to debugger\n");
352 if (kdb_trap (type, 0, &frame))
355 /* machine/parity/power fail/"kitchen sink" faults */
356 if(isa_nmi(code) == 0) return;
357 else goto we_re_toast;
361 trapsignal(p, i, ucode);
362 if ((type & T_USER) == 0)
365 while (i = CURSIG(p))
367 p->p_pri = p->p_usrpri;
370 * Since we are curproc, clock will normally just change
371 * our priority without moving us from one queue to another
372 * (since the running process is not on a queue.)
373 * If that happened after we setrq ourselves but before we
374 * swtch()'ed, we might not be on the queue indicated by
379 p->p_stats->p_ru.ru_nivcsw++;
382 while (i = CURSIG(p))
385 if (p->p_stats->p_prof.pr_scale) {
387 struct timeval *tv = &p->p_stime;
389 ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
390 (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
393 extern int profscale;
394 addupc(frame.tf_eip, &p->p_stats->p_prof,
397 addupc(frame.tf_eip, &p->p_stats->p_prof, ticks);
402 curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */
406 * Compensate for 386 brain damage (missing URKR).
407 * This is a little simpler than the pagefault handler in trap() because
408 * it the page tables have already been faulted in and high addresses
409 * are thrown out early for other reasons.
419 va = trunc_page((vm_offset_t)addr);
421 * XXX - MAX is END. Changed > to >= for temp. fix.
423 if (va >= VM_MAXUSER_ADDRESS)
426 * XXX: rude stack hack adapted from trap().
431 if ((caddr_t)va >= vm->vm_maxsaddr && dostacklimits) {
432 nss = clrnd(btoc((unsigned)vm->vm_maxsaddr + MAXSSIZ
434 if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur))
438 if (vm_fault(&vm->vm_map, va, VM_PROT_READ | VM_PROT_WRITE, FALSE)
443 * XXX: continuation of rude stack hack
445 if (nss > vm->vm_ssize)
453 * System call request from POSIX system call gate interface to kernel.
454 * Like trap(), argument is call by reference.
458 volatile struct syscframe frame;
460 register int *locr0 = ((int *)&frame);
461 register caddr_t params;
463 register struct sysent *callp;
464 register struct proc *p = curproc;
467 int args[8], rval[2];
471 r0 = 0; r0 = r0; r1 = 0; r1 = r1;
474 if (ISPL(frame.sf_cs) != SEL_UPL)
478 curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */
479 p->p_regs = (int *)&frame;
480 params = (caddr_t)frame.sf_esp + sizeof (int) ;
483 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always.
485 opc = frame.sf_eip - 7;
486 callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
487 if (callp == sysent) {
489 params += sizeof (int);
490 callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
493 if ((i = callp->sy_narg * sizeof (int)) &&
494 (error = copyin(params, (caddr_t)args, (u_int)i))) {
495 frame.sf_eax = error;
496 frame.sf_eflags |= PSL_C; /* carry bit */
498 if (KTRPOINT(p, KTR_SYSCALL))
499 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args);
504 if (KTRPOINT(p, KTR_SYSCALL))
505 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args);
508 rval[1] = frame.sf_edx;
509 /*pg("%d. s %d\n", p->p_pid, code);*/
510 error = (*callp->sy_call)(p, args, rval);
511 if (error == ERESTART)
513 else if (error != EJUSTRETURN) {
515 /*pg("error %d", error);*/
516 frame.sf_eax = error;
517 frame.sf_eflags |= PSL_C; /* carry bit */
519 frame.sf_eax = rval[0];
520 frame.sf_edx = rval[1];
521 frame.sf_eflags &= ~PSL_C; /* carry bit */
524 /* else if (error == EJUSTRETURN) */
528 * Reinitialize proc pointer `p' as it may be different
529 * if this is a child returning from fork syscall.
532 while (i = CURSIG(p))
534 p->p_pri = p->p_usrpri;
537 * Since we are curproc, clock will normally just change
538 * our priority without moving us from one queue to another
539 * (since the running process is not on a queue.)
540 * If that happened after we setrq ourselves but before we
541 * swtch()'ed, we might not be on the queue indicated by
546 p->p_stats->p_ru.ru_nivcsw++;
549 while (i = CURSIG(p))
552 if (p->p_stats->p_prof.pr_scale) {
554 struct timeval *tv = &p->p_stime;
556 ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
557 (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
560 extern int profscale;
561 addupc(frame.sf_eip, &p->p_stats->p_prof,
564 addupc(frame.sf_eip, &p->p_stats->p_prof, ticks);
570 if (KTRPOINT(p, KTR_SYSRET))
571 ktrsysret(p->p_tracep, code, error, rval[0]);
574 { extern int _udatasel, _ucodesel;
575 if (frame.sf_ss != _udatasel)
576 printf("ss %x call %d\n", frame.sf_ss, code);
577 if ((frame.sf_cs&0xffff) != _ucodesel)
578 printf("cs %x call %d\n", frame.sf_cs, code);
579 if (frame.sf_eip > VM_MAXUSER_ADDRESS) {
580 printf("eip %x call %d\n", frame.sf_eip, code);