2 * Copyright 2014 Olivier Houchard <cognet@FreeBSD.org>
3 * Copyright 2014 Svatopluk Kraus <onwahe@gmail.com>
4 * Copyright 2014 Michal Meloun <meloun@miracle.cz>
5 * Copyright 2014 Andrew Turner <andrew@FreeBSD.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include "opt_ktrace.h"
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
37 #include <sys/systm.h>
39 #include <sys/kernel.h>
41 #include <sys/mutex.h>
42 #include <sys/signalvar.h>
44 #include <sys/vmmeter.h>
47 #include <sys/ktrace.h>
52 #include <vm/vm_kern.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_param.h>
57 #include <machine/cpu.h>
58 #include <machine/frame.h>
59 #include <machine/machdep.h>
60 #include <machine/pcb.h>
64 #include <machine/db_machdep.h>
68 #include <sys/dtrace_bsd.h>
71 extern char cachebailout[];
79 typedef int abort_func_t(struct trapframe *, u_int, u_int, u_int, u_int,
80 struct thread *, struct ksig *);
82 static abort_func_t abort_fatal;
83 static abort_func_t abort_align;
84 static abort_func_t abort_icache;
92 * How are the aborts handled?
95 * - Always fatal as we do not know what does it mean.
96 * Imprecise External Abort:
97 * - Always fatal, but can be handled somehow in the future.
98 * Now, due to PCIe buggy hardware, ignored.
99 * Precise External Abort:
100 * - Always fatal, but who knows in the future???
102 * - Special handling.
103 * External Translation Abort (L1 & L2)
104 * - Always fatal as something is screwed up in page tables or hardware.
105 * Domain Fault (L1 & L2):
106 * - Always fatal as we do not play game with domains.
108 * - Everything should be aligned in kernel with exception of user to kernel
109 * and vice versa data copying, so if pcb_onfault is not set, it's fatal.
110 * We generate signal in case of abort from user mode.
111 * Instruction cache maintenance:
112 * - According to manual, this is translation fault during cache maintenance
113 * operation. So, it could be really complex in SMP case and fuzzy too
114 * for cache operations working on virtual addresses. For now, we will
115 * consider this abort as fatal. In fact, no cache maintenance on
116 * not mapped virtual addresses should be called. As cache maintenance
117 * operation (except DMB, DSB, and Flush Prefetch Buffer) are privileged,
118 * the abort is fatal for user mode as well for now. (This is good place to
119 * note that cache maintenance on virtual address fill TLB.)
120 * Acces Bit (L1 & L2):
121 * - Fast hardware emulation for kernel and user mode.
122 * Translation Fault (L1 & L2):
123 * - Standard fault mechanism is held including vm_fault().
124 * Permission Fault (L1 & L2):
125 * - Fast hardware emulation of modify bits and in other cases, standard
126 * fault mechanism is held including vm_fault().
129 static const struct abort aborts[] = {
130 {abort_fatal, "Undefined Code (0x000)"},
131 {abort_align, "Alignment Fault"},
132 {abort_fatal, "Debug Event"},
133 {NULL, "Access Bit (L1)"},
134 {NULL, "Instruction cache maintenance"},
135 {NULL, "Translation Fault (L1)"},
136 {NULL, "Access Bit (L2)"},
137 {NULL, "Translation Fault (L2)"},
139 {abort_fatal, "External Abort"},
140 {abort_fatal, "Domain Fault (L1)"},
141 {abort_fatal, "Undefined Code (0x00A)"},
142 {abort_fatal, "Domain Fault (L2)"},
143 {abort_fatal, "External Translation Abort (L1)"},
144 {NULL, "Permission Fault (L1)"},
145 {abort_fatal, "External Translation Abort (L2)"},
146 {NULL, "Permission Fault (L2)"},
148 {abort_fatal, "TLB Conflict Abort"},
149 {abort_fatal, "Undefined Code (0x401)"},
150 {abort_fatal, "Undefined Code (0x402)"},
151 {abort_fatal, "Undefined Code (0x403)"},
152 {abort_fatal, "Undefined Code (0x404)"},
153 {abort_fatal, "Undefined Code (0x405)"},
154 {abort_fatal, "Asynchronous External Abort"},
155 {abort_fatal, "Undefined Code (0x407)"},
157 {abort_fatal, "Asynchronous Parity Error on Memory Access"},
158 {abort_fatal, "Parity Error on Memory Access"},
159 {abort_fatal, "Undefined Code (0x40A)"},
160 {abort_fatal, "Undefined Code (0x40B)"},
161 {abort_fatal, "Parity Error on Translation (L1)"},
162 {abort_fatal, "Undefined Code (0x40D)"},
163 {abort_fatal, "Parity Error on Translation (L2)"},
164 {abort_fatal, "Undefined Code (0x40F)"}
168 call_trapsignal(struct thread *td, int sig, int code, vm_offset_t addr,
173 CTR4(KTR_TRAP, "%s: addr: %#x, sig: %d, code: %d",
174 __func__, addr, sig, code);
177 * TODO: some info would be nice to know
178 * if we are serving data or prefetch abort.
181 ksiginfo_init_trap(&ksi);
184 ksi.ksi_addr = (void *)addr;
185 ksi.ksi_trapno = trapno;
186 trapsignal(td, &ksi);
190 * abort_imprecise() handles the following abort:
192 * FAULT_EA_IMPREC - Imprecise External Abort
194 * The imprecise means that we don't know where the abort happened,
195 * thus FAR is undefined. The abort should not never fire, but hot
196 * plugging or accidental hardware failure can be the cause of it.
197 * If the abort happens, it can even be on different (thread) context.
198 * Without any additional support, the abort is fatal, as we do not
199 * know what really happened.
201 * QQQ: Some additional functionality, like pcb_onfault but global,
202 * can be implemented. Imprecise handlers could be registered
203 * which tell us if the abort is caused by something they know
204 * about. They should return one of three codes like:
208 * The handlers should be called until some of them returns
209 * FAULT_IS_MINE value or all was called. If all handlers return
210 * FAULT_IS_NOT_MINE value, then the abort is fatal.
213 abort_imprecise(struct trapframe *tf, u_int fsr, u_int prefetch, bool usermode)
217 * XXX - We can got imprecise abort as result of access
218 * to not-present PCI/PCIe configuration space.
223 abort_fatal(tf, FAULT_EA_IMPREC, fsr, 0, prefetch, curthread, NULL);
226 * Returning from this function means that we ignore
227 * the abort for good reason. Note that imprecise abort
228 * could fire any time even in user mode.
234 userret(curthread, tf);
239 * abort_debug() handles the following abort:
241 * FAULT_DEBUG - Debug Event
245 abort_debug(struct trapframe *tf, u_int fsr, u_int prefetch, bool usermode,
253 call_trapsignal(td, SIGTRAP, TRAP_BRKPT, far, FAULT_DEBUG);
257 kdb_trap((prefetch) ? T_BREAKPOINT : T_WATCHPOINT, 0, tf);
259 printf("No debugger in kernel.\n");
267 * FAR, FSR, and everything what can be lost after enabling
268 * interrupts must be grabbed before the interrupts will be
269 * enabled. Note that when interrupts will be enabled, we
270 * could even migrate to another CPU ...
272 * TODO: move quick cases to ASM
275 abort_handler(struct trapframe *tf, int prefetch)
288 int bp_harden, ucode;
296 fsr = (prefetch) ? cp15_ifsr_get(): cp15_dfsr_get();
298 far = (prefetch) ? cp15_ifar_get() : cp15_dfar_get();
300 far = (prefetch) ? TRAPF_PC(tf) : cp15_dfar_get();
303 idx = FSR_TO_FAULT(fsr);
304 usermode = TRAPF_USERMODE(tf); /* Abort came from user mode? */
307 * Apply BP hardening by flushing the branch prediction cache
308 * for prefaults on kernel addresses.
310 if (__predict_false(prefetch && far > VM_MAXUSER_ADDRESS &&
311 (idx == FAULT_TRAN_L2 || idx == FAULT_PERM_L2))) {
312 bp_harden = PCPU_GET(bp_harden_kind);
313 if (bp_harden == PCPU_BP_HARDEN_KIND_BPIALL)
315 else if (bp_harden == PCPU_BP_HARDEN_KIND_ICIALLU)
322 CTR6(KTR_TRAP, "%s: fsr %#x (idx %u) far %#x prefetch %u usermode %d",
323 __func__, fsr, idx, far, prefetch, usermode);
326 * Firstly, handle aborts that are not directly related to mapping.
328 if (__predict_false(idx == FAULT_EA_IMPREC)) {
329 abort_imprecise(tf, fsr, prefetch, usermode);
333 if (__predict_false(idx == FAULT_DEBUG)) {
334 abort_debug(tf, fsr, prefetch, usermode, far);
339 * ARM has a set of unprivileged load and store instructions
340 * (LDRT/LDRBT/STRT/STRBT ...) which are supposed to be used in other
341 * than user mode and OS should recognize their aborts and behave
342 * appropriately. However, there is no way how to do that reasonably
343 * in general unless we restrict the handling somehow.
345 * For now, these instructions are used only in copyin()/copyout()
346 * like functions where usermode buffers are checked in advance that
347 * they are not from KVA space. Thus, no action is needed here.
351 * (1) Handle access and R/W hardware emulation aborts.
352 * (2) Check that abort is not on pmap essential address ranges.
353 * There is no way how to fix it, so we don't even try.
355 rv = pmap_fault(PCPU_GET(curpmap), far, fsr, idx, usermode);
356 if (rv == KERN_SUCCESS)
364 if (rv == KERN_INVALID_ADDRESS)
367 if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
369 * Due to both processor errata and lazy TLB invalidation when
370 * access restrictions are removed from virtual pages, memory
371 * accesses that are allowed by the physical mapping layer may
372 * nonetheless cause one spurious page fault per virtual page.
373 * When the thread is executing a "no faulting" section that
374 * is bracketed by vm_fault_{disable,enable}_pagefaults(),
375 * every page fault is treated as a spurious page fault,
376 * unless it accesses the same virtual address as the most
377 * recent page fault within the same "no faulting" section.
379 if (td->td_md.md_spurflt_addr != far ||
380 (td->td_pflags & TDP_RESETSPUR) != 0) {
381 td->td_md.md_spurflt_addr = far;
382 td->td_pflags &= ~TDP_RESETSPUR;
384 tlb_flush_local(far & ~PAGE_MASK);
389 * If we get a page fault while in a critical section, then
390 * it is most likely a fatal kernel page fault. The kernel
391 * is already going to panic trying to get a sleep lock to
392 * do the VM lookup, so just consider it a fatal trap so the
393 * kernel can print out a useful trap message and even get
396 * If we get a page fault while holding a non-sleepable
397 * lock, then it is most likely a fatal kernel page fault.
398 * If WITNESS is enabled, then it's going to whine about
399 * bogus LORs with various VM locks, so just skip to the
400 * fatal trap handling directly.
402 if (td->td_critnest != 0 ||
403 WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
404 "Kernel page fault") != 0) {
405 abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
410 /* Re-enable interrupts if they were enabled previously. */
411 if (td->td_md.md_spinlock_count == 0) {
412 if (__predict_true(tf->tf_spsr & PSR_I) == 0)
413 enable_interrupts(PSR_I);
414 if (__predict_true(tf->tf_spsr & PSR_F) == 0)
415 enable_interrupts(PSR_F);
421 if (td->td_cowgen != atomic_load_int(&p->p_cowgen))
422 thread_cow_update(td);
425 /* Invoke the appropriate handler, if necessary. */
426 if (__predict_false(aborts[idx].func != NULL)) {
427 if ((aborts[idx].func)(tf, idx, fsr, far, prefetch, td, &ksig))
433 * At this point, we're dealing with one of the following aborts:
435 * FAULT_ICACHE - I-cache maintenance
436 * FAULT_TRAN_xx - Translation
437 * FAULT_PERM_xx - Permission
441 * Don't pass faulting cache operation to vm_fault(). We don't want
442 * to handle all vm stuff at this moment.
445 if (__predict_false(pcb->pcb_onfault == cachebailout)) {
446 tf->tf_r0 = far; /* return failing address */
447 tf->tf_pc = (register_t)pcb->pcb_onfault;
451 /* Handle remaining I-cache aborts. */
452 if (idx == FAULT_ICACHE) {
453 if (abort_icache(tf, idx, fsr, far, prefetch, td, &ksig))
458 va = trunc_page(far);
459 if (va >= KERNBASE) {
461 * Don't allow user-mode faults in kernel address space.
465 ksig.code = SEGV_ACCERR;
472 * This is a fault on non-kernel virtual memory. If curproc
473 * is NULL or curproc->p_vmspace is NULL the fault is fatal.
475 vm = (p != NULL) ? p->p_vmspace : NULL;
483 if (!usermode && (td->td_intr_nesting_level != 0 ||
484 pcb->pcb_onfault == NULL)) {
485 abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
490 ftype = (fsr & FSR_WNR) ? VM_PROT_WRITE : VM_PROT_READ;
492 ftype |= VM_PROT_EXECUTE;
495 onfault = pcb->pcb_onfault;
496 pcb->pcb_onfault = NULL;
499 /* Fault in the page. */
500 rv = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &ksig.sig,
505 pcb->pcb_onfault = onfault;
508 if (__predict_true(rv == KERN_SUCCESS))
512 if (td->td_intr_nesting_level == 0 &&
513 pcb->pcb_onfault != NULL) {
515 tf->tf_pc = (int)pcb->pcb_onfault;
518 CTR2(KTR_TRAP, "%s: vm_fault() failed with %d", __func__, rv);
519 abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
526 call_trapsignal(td, ksig.sig, ksig.code, ksig.addr, idx);
533 * abort_fatal() handles the following data aborts:
535 * FAULT_DEBUG - Debug Event
536 * FAULT_ACCESS_xx - Acces Bit
537 * FAULT_EA_PREC - Precise External Abort
538 * FAULT_DOMAIN_xx - Domain Fault
539 * FAULT_EA_TRAN_xx - External Translation Abort
540 * FAULT_EA_IMPREC - Imprecise External Abort
541 * + all undefined codes for ABORT
543 * We should never see these on a properly functioning system.
545 * This function is also called by the other handlers if they
546 * detect a fatal problem.
548 * Note: If 'l' is NULL, we assume we're dealing with a prefetch abort.
551 abort_fatal(struct trapframe *tf, u_int idx, u_int fsr, u_int far,
552 u_int prefetch, struct thread *td, struct ksig *ksig)
561 usermode = TRAPF_USERMODE(tf);
564 if (dtrace_trap_func != NULL && (*dtrace_trap_func)(tf, far))
569 mode = usermode ? "user" : "kernel";
570 rw_mode = fsr & FSR_WNR ? "write" : "read";
571 disable_interrupts(PSR_I|PSR_F);
574 printf("Fatal %s mode data abort: '%s' on %s\n", mode,
575 aborts[idx].desc, rw_mode);
576 printf("trapframe: %p\nFSR=%08x, FAR=", tf, fsr);
577 if (idx != FAULT_EA_IMPREC)
578 printf("%08x, ", far);
581 printf("spsr=%08x\n", tf->tf_spsr);
583 printf("Fatal %s mode prefetch abort at 0x%08x\n",
585 printf("trapframe: %p, spsr=%08x\n", tf, tf->tf_spsr);
588 printf("r0 =%08x, r1 =%08x, r2 =%08x, r3 =%08x\n",
589 tf->tf_r0, tf->tf_r1, tf->tf_r2, tf->tf_r3);
590 printf("r4 =%08x, r5 =%08x, r6 =%08x, r7 =%08x\n",
591 tf->tf_r4, tf->tf_r5, tf->tf_r6, tf->tf_r7);
592 printf("r8 =%08x, r9 =%08x, r10=%08x, r11=%08x\n",
593 tf->tf_r8, tf->tf_r9, tf->tf_r10, tf->tf_r11);
594 printf("r12=%08x, ", tf->tf_r12);
597 printf("usp=%08x, ulr=%08x",
598 tf->tf_usr_sp, tf->tf_usr_lr);
600 printf("ssp=%08x, slr=%08x",
601 tf->tf_svc_sp, tf->tf_svc_lr);
602 printf(", pc =%08x\n\n", tf->tf_pc);
605 if (debugger_on_trap) {
606 kdb_why = KDB_WHY_TRAP;
607 handled = kdb_trap(fsr, 0, tf);
608 kdb_why = KDB_WHY_UNSET;
613 panic("Fatal abort");
618 * abort_align() handles the following data abort:
620 * FAULT_ALIGN - Alignment fault
622 * Everything should be aligned in kernel with exception of user to kernel
623 * and vice versa data copying, so if pcb_onfault is not set, it's fatal.
624 * We generate signal in case of abort from user mode.
627 abort_align(struct trapframe *tf, u_int idx, u_int fsr, u_int far,
628 u_int prefetch, struct thread *td, struct ksig *ksig)
632 usermode = TRAPF_USERMODE(tf);
634 if (td->td_intr_nesting_level == 0 && td != NULL &&
635 td->td_pcb->pcb_onfault != NULL) {
637 tf->tf_pc = (int)td->td_pcb->pcb_onfault;
640 abort_fatal(tf, idx, fsr, far, prefetch, td, ksig);
642 /* Deliver a bus error signal to the process */
643 ksig->code = BUS_ADRALN;
650 * abort_icache() handles the following data abort:
652 * FAULT_ICACHE - Instruction cache maintenance
654 * According to manual, FAULT_ICACHE is translation fault during cache
655 * maintenance operation. In fact, no cache maintenance operation on
656 * not mapped virtual addresses should be called. As cache maintenance
657 * operation (except DMB, DSB, and Flush Prefetch Buffer) are privileged,
658 * the abort is concider as fatal for now. However, all the matter with
659 * cache maintenance operation on virtual addresses could be really complex
660 * and fuzzy in SMP case, so maybe in future standard fault mechanism
661 * should be held here including vm_fault() calling.
664 abort_icache(struct trapframe *tf, u_int idx, u_int fsr, u_int far,
665 u_int prefetch, struct thread *td, struct ksig *ksig)
668 abort_fatal(tf, idx, fsr, far, prefetch, td, ksig);