2 * Copyright (c) 1990 William Jolitz.
3 * Copyright (c) 1991 The Regents of the University of California.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * from: @(#)npx.c 7.2 (Berkeley) 5/12/91
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
40 #include <sys/param.h>
41 #include <sys/systm.h>
43 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/module.h>
47 #include <sys/mutex.h>
48 #include <sys/mutex.h>
51 #include <sys/sysctl.h>
52 #include <machine/bus.h>
55 #include <sys/syslog.h>
57 #include <sys/signalvar.h>
59 #include <machine/asmacros.h>
60 #include <machine/cputypes.h>
61 #include <machine/frame.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
64 #include <machine/psl.h>
65 #include <machine/resource.h>
66 #include <machine/specialreg.h>
67 #include <machine/segments.h>
68 #include <machine/ucontext.h>
70 #include <machine/intr_machdep.h>
72 #include <machine/xen/xen-os.h>
73 #include <xen/hypervisor.h>
77 #include <isa/isavar.h>
80 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
81 #define CPU_ENABLE_SSE
85 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
88 #if defined(__GNUCLIKE_ASM) && !defined(lint)
90 #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw))
91 #define fnclex() __asm __volatile("fnclex")
92 #define fninit() __asm __volatile("fninit")
93 #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr)))
94 #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr)))
95 #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr)))
96 #define fp_divide_by_0() __asm __volatile( \
97 "fldz; fld1; fdiv %st,%st(1); fnop")
98 #define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr)))
100 #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr)))
101 #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
102 #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr)))
104 #else /* !(__GNUCLIKE_ASM && !lint) */
106 void fldcw(u_short cw);
109 void fnsave(caddr_t addr);
110 void fnstcw(caddr_t addr);
111 void fnstsw(caddr_t addr);
112 void fp_divide_by_0(void);
113 void frstor(caddr_t addr);
114 #ifdef CPU_ENABLE_SSE
115 void fxsave(caddr_t addr);
116 void fxrstor(caddr_t addr);
117 void stmxcsr(u_int *csr);
120 #endif /* __GNUCLIKE_ASM && !lint */
123 #define start_emulating() (HYPERVISOR_fpu_taskswitch(1))
124 #define stop_emulating() (HYPERVISOR_fpu_taskswitch(0))
126 #define start_emulating() load_cr0(rcr0() | CR0_TS)
127 #define stop_emulating() clts()
130 #ifdef CPU_ENABLE_SSE
131 #define GET_FPU_CW(thread) \
133 (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \
134 (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw)
135 #define GET_FPU_SW(thread) \
137 (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \
138 (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw)
139 #define SET_FPU_CW(savefpu, value) do { \
141 (savefpu)->sv_xmm.sv_env.en_cw = (value); \
143 (savefpu)->sv_87.sv_env.en_cw = (value); \
145 #else /* CPU_ENABLE_SSE */
146 #define GET_FPU_CW(thread) \
147 (thread->td_pcb->pcb_save->sv_87.sv_env.en_cw)
148 #define GET_FPU_SW(thread) \
149 (thread->td_pcb->pcb_save->sv_87.sv_env.en_sw)
150 #define SET_FPU_CW(savefpu, value) \
151 (savefpu)->sv_87.sv_env.en_cw = (value)
152 #endif /* CPU_ENABLE_SSE */
154 typedef u_char bool_t;
156 #ifdef CPU_ENABLE_SSE
157 static void fpu_clean_state(void);
160 static void fpusave(union savefpu *);
161 static void fpurstor(union savefpu *);
162 static int npx_attach(device_t dev);
163 static void npx_identify(driver_t *driver, device_t parent);
164 static int npx_probe(device_t dev);
168 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
169 &hw_float, 0, "Floating point instructions executed in hardware");
171 static volatile u_int npx_traps_while_probing;
172 static union savefpu npx_initialstate;
174 alias_for_inthand_t probetrap;
178 .type " __XSTRING(CNAME(probetrap)) ",@function \n\
179 " __XSTRING(CNAME(probetrap)) ": \n\
181 incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\
187 * Identify routine. Create a connection point on our parent for probing.
190 npx_identify(driver, parent)
196 child = BUS_ADD_CHILD(parent, 0, "npx", 0);
198 panic("npx_identify");
202 * Probe routine. Set flags to tell npxattach() what to do. Set up an
203 * interrupt handler if npx needs to use interrupts.
206 npx_probe(device_t dev)
208 struct gate_descriptor save_idt_npxtrap;
209 u_short control, status;
211 device_set_desc(dev, "math processor");
214 * Modern CPUs all have an FPU that uses the INT16 interface
215 * and provide a simple way to verify that, so handle the
216 * common case right away.
218 if (cpu_feature & CPUID_FPU) {
224 save_idt_npxtrap = idt[IDT_MF];
225 setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
226 GSEL(GCODE_SEL, SEL_KPL));
229 * Don't trap while we're probing.
234 * Finish resetting the coprocessor, if any. If there is an error
235 * pending, then we may get a bogus IRQ13, but npx_intr() will handle
236 * it OK. Bogus halts have never been observed, but we enabled
237 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
242 * Don't use fwait here because it might hang.
243 * Don't use fnop here because it usually hangs if there is no FPU.
245 DELAY(1000); /* wait for any IRQ13 */
247 if (npx_traps_while_probing != 0)
248 printf("fninit caused %u bogus npx trap(s)\n",
249 npx_traps_while_probing);
252 * Check for a status of mostly zero.
256 if ((status & 0xb8ff) == 0) {
258 * Good, now check for a proper control word.
262 if ((control & 0x1f3f) == 0x033f) {
264 * We have an npx, now divide by 0 to see if exception
267 control &= ~(1 << 2); /* enable divide by 0 trap */
269 #ifdef FPU_ERROR_BROKEN
271 * FPU error signal doesn't work on some CPU
277 npx_traps_while_probing = 0;
279 if (npx_traps_while_probing != 0) {
281 * Good, exception 16 works.
287 "FPU does not use exception 16 for error reporting\n");
293 * Probe failed. Floating point simply won't work.
294 * Notify user and disable FPU/MMX/SSE instruction execution.
296 device_printf(dev, "WARNING: no FPU!\n");
297 __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : :
298 "n" (CR0_EM | CR0_MP) : "ax");
301 idt[IDT_MF] = save_idt_npxtrap;
302 return (hw_float ? 0 : ENXIO);
306 * Attach routine - announce which it is, and wire into system
309 npx_attach(device_t dev)
315 fpusave(&npx_initialstate);
317 #ifdef CPU_ENABLE_SSE
319 if (npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask)
321 npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask;
323 cpu_mxcsr_mask = 0xFFBF;
324 bzero(npx_initialstate.sv_xmm.sv_fp,
325 sizeof(npx_initialstate.sv_xmm.sv_fp));
326 bzero(npx_initialstate.sv_xmm.sv_xmm,
327 sizeof(npx_initialstate.sv_xmm.sv_xmm));
328 /* XXX might need even more zeroing. */
331 bzero(npx_initialstate.sv_87.sv_ac,
332 sizeof(npx_initialstate.sv_87.sv_ac));
339 * Initialize floating point unit.
344 static union savefpu dummy;
351 * fninit has the same h/w bugs as fnsave. Use the detoxified
352 * fnsave to throw away any junk in the fpu. npxsave() initializes
353 * the fpu and sets fpcurthread = NULL as important side effects.
355 * It is too early for critical_enter() to work on AP.
357 saveintr = intr_disable();
360 #ifdef CPU_ENABLE_SSE
361 /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */
365 control = __INITIAL_NPXCW__;
368 intr_restore(saveintr);
372 * Free coprocessor (if we have it).
380 if (curthread == PCPU_GET(fpcurthread))
381 npxsave(curpcb->pcb_save);
385 u_int masked_exceptions;
387 masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
389 * Log exceptions that would have trapped with the old
390 * control word (overflow, divide by 0, and invalid operand).
392 if (masked_exceptions & 0x0d)
394 "pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
395 td->td_proc->p_pid, td->td_proc->p_comm,
406 return (_MC_FPFMT_NODEV);
407 #ifdef CPU_ENABLE_SSE
409 return (_MC_FPFMT_XMM);
411 return (_MC_FPFMT_387);
415 * The following mechanism is used to ensure that the FPE_... value
416 * that is passed as a trapcode to the signal handler of the user
417 * process does not have more than one bit set.
419 * Multiple bits may be set if the user process modifies the control
420 * word while a status word bit is already set. While this is a sign
421 * of bad coding, we have no choise than to narrow them down to one
422 * bit, since we must not send a trapcode that is not exactly one of
425 * The mechanism has a static table with 127 entries. Each combination
426 * of the 7 FPU status word exception bits directly translates to a
427 * position in this table, where a single FPE_... value is stored.
428 * This FPE_... value stored there is considered the "most important"
429 * of the exception bits and will be sent as the signal code. The
430 * precedence of the bits is based upon Intel Document "Numerical
431 * Applications", Chapter "Special Computational Situations".
433 * The macro to choose one of these values does these steps: 1) Throw
434 * away status word bits that cannot be masked. 2) Throw away the bits
435 * currently masked in the control word, assuming the user isn't
436 * interested in them anymore. 3) Reinsert status word bit 7 (stack
437 * fault) if it is set, which cannot be masked but must be presered.
438 * 4) Use the remaining bits to point into the trapcode table.
440 * The 6 maskable bits in order of their preference, as stated in the
441 * above referenced Intel manual:
442 * 1 Invalid operation (FP_X_INV)
445 * 1c Operand of unsupported format
447 * 2 QNaN operand (not an exception, irrelavant here)
448 * 3 Any other invalid-operation not mentioned above or zero divide
449 * (FP_X_INV, FP_X_DZ)
450 * 4 Denormal operand (FP_X_DNML)
451 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL)
452 * 6 Inexact result (FP_X_IMP)
454 static char fpetable[128] = {
456 FPE_FLTINV, /* 1 - INV */
457 FPE_FLTUND, /* 2 - DNML */
458 FPE_FLTINV, /* 3 - INV | DNML */
459 FPE_FLTDIV, /* 4 - DZ */
460 FPE_FLTINV, /* 5 - INV | DZ */
461 FPE_FLTDIV, /* 6 - DNML | DZ */
462 FPE_FLTINV, /* 7 - INV | DNML | DZ */
463 FPE_FLTOVF, /* 8 - OFL */
464 FPE_FLTINV, /* 9 - INV | OFL */
465 FPE_FLTUND, /* A - DNML | OFL */
466 FPE_FLTINV, /* B - INV | DNML | OFL */
467 FPE_FLTDIV, /* C - DZ | OFL */
468 FPE_FLTINV, /* D - INV | DZ | OFL */
469 FPE_FLTDIV, /* E - DNML | DZ | OFL */
470 FPE_FLTINV, /* F - INV | DNML | DZ | OFL */
471 FPE_FLTUND, /* 10 - UFL */
472 FPE_FLTINV, /* 11 - INV | UFL */
473 FPE_FLTUND, /* 12 - DNML | UFL */
474 FPE_FLTINV, /* 13 - INV | DNML | UFL */
475 FPE_FLTDIV, /* 14 - DZ | UFL */
476 FPE_FLTINV, /* 15 - INV | DZ | UFL */
477 FPE_FLTDIV, /* 16 - DNML | DZ | UFL */
478 FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */
479 FPE_FLTOVF, /* 18 - OFL | UFL */
480 FPE_FLTINV, /* 19 - INV | OFL | UFL */
481 FPE_FLTUND, /* 1A - DNML | OFL | UFL */
482 FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */
483 FPE_FLTDIV, /* 1C - DZ | OFL | UFL */
484 FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */
485 FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */
486 FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */
487 FPE_FLTRES, /* 20 - IMP */
488 FPE_FLTINV, /* 21 - INV | IMP */
489 FPE_FLTUND, /* 22 - DNML | IMP */
490 FPE_FLTINV, /* 23 - INV | DNML | IMP */
491 FPE_FLTDIV, /* 24 - DZ | IMP */
492 FPE_FLTINV, /* 25 - INV | DZ | IMP */
493 FPE_FLTDIV, /* 26 - DNML | DZ | IMP */
494 FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */
495 FPE_FLTOVF, /* 28 - OFL | IMP */
496 FPE_FLTINV, /* 29 - INV | OFL | IMP */
497 FPE_FLTUND, /* 2A - DNML | OFL | IMP */
498 FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */
499 FPE_FLTDIV, /* 2C - DZ | OFL | IMP */
500 FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */
501 FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */
502 FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */
503 FPE_FLTUND, /* 30 - UFL | IMP */
504 FPE_FLTINV, /* 31 - INV | UFL | IMP */
505 FPE_FLTUND, /* 32 - DNML | UFL | IMP */
506 FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */
507 FPE_FLTDIV, /* 34 - DZ | UFL | IMP */
508 FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */
509 FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */
510 FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */
511 FPE_FLTOVF, /* 38 - OFL | UFL | IMP */
512 FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */
513 FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */
514 FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */
515 FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */
516 FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */
517 FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */
518 FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */
519 FPE_FLTSUB, /* 40 - STK */
520 FPE_FLTSUB, /* 41 - INV | STK */
521 FPE_FLTUND, /* 42 - DNML | STK */
522 FPE_FLTSUB, /* 43 - INV | DNML | STK */
523 FPE_FLTDIV, /* 44 - DZ | STK */
524 FPE_FLTSUB, /* 45 - INV | DZ | STK */
525 FPE_FLTDIV, /* 46 - DNML | DZ | STK */
526 FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */
527 FPE_FLTOVF, /* 48 - OFL | STK */
528 FPE_FLTSUB, /* 49 - INV | OFL | STK */
529 FPE_FLTUND, /* 4A - DNML | OFL | STK */
530 FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */
531 FPE_FLTDIV, /* 4C - DZ | OFL | STK */
532 FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */
533 FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */
534 FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */
535 FPE_FLTUND, /* 50 - UFL | STK */
536 FPE_FLTSUB, /* 51 - INV | UFL | STK */
537 FPE_FLTUND, /* 52 - DNML | UFL | STK */
538 FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */
539 FPE_FLTDIV, /* 54 - DZ | UFL | STK */
540 FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */
541 FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */
542 FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */
543 FPE_FLTOVF, /* 58 - OFL | UFL | STK */
544 FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */
545 FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */
546 FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */
547 FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */
548 FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */
549 FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */
550 FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */
551 FPE_FLTRES, /* 60 - IMP | STK */
552 FPE_FLTSUB, /* 61 - INV | IMP | STK */
553 FPE_FLTUND, /* 62 - DNML | IMP | STK */
554 FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */
555 FPE_FLTDIV, /* 64 - DZ | IMP | STK */
556 FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */
557 FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */
558 FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */
559 FPE_FLTOVF, /* 68 - OFL | IMP | STK */
560 FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */
561 FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */
562 FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */
563 FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */
564 FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */
565 FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */
566 FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */
567 FPE_FLTUND, /* 70 - UFL | IMP | STK */
568 FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */
569 FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */
570 FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */
571 FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */
572 FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */
573 FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */
574 FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */
575 FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */
576 FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */
577 FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */
578 FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */
579 FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */
580 FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */
581 FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */
582 FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
586 * Read the FP status and control words, then generate si_code value
587 * for SIGFPE. The error code chosen will be one of the
588 * FPE_... macros. It will be sent as the second argument to old
589 * BSD-style signal handlers and as "siginfo_t->si_code" (second
590 * argument) to SA_SIGINFO signal handlers.
592 * Some time ago, we cleared the x87 exceptions with FNCLEX there.
593 * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The
594 * usermode code which understands the FPU hardware enough to enable
595 * the exceptions, can also handle clearing the exception state in the
596 * handler. The only consequence of not clearing the exception is the
597 * rethrow of the SIGFPE on return from the signal handler and
598 * reexecution of the corresponding instruction.
600 * For XMM traps, the exceptions were never cleared.
605 u_short control, status;
609 "npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n",
610 PCPU_GET(fpcurthread), curthread, hw_float);
611 panic("npxtrap from nowhere");
616 * Interrupt handling (for another interrupt) may have pushed the
617 * state to memory. Fetch the relevant parts of the state from
620 if (PCPU_GET(fpcurthread) != curthread) {
621 control = GET_FPU_CW(curthread);
622 status = GET_FPU_SW(curthread);
628 return (fpetable[status & ((~control & 0x3f) | 0x40)]);
631 #ifdef CPU_ENABLE_SSE
639 "npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n",
640 PCPU_GET(fpcurthread), curthread, hw_float);
641 panic("npxtrap from nowhere");
644 if (PCPU_GET(fpcurthread) != curthread)
645 mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr;
649 return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
654 * Implement device not available (DNA) exception
656 * It would be better to switch FP context here (if curthread != fpcurthread)
657 * and not necessarily for every context switch, but it is too hard to
658 * access foreign pcb's.
661 static int err_count = 0;
670 if (PCPU_GET(fpcurthread) == curthread) {
671 printf("npxdna: fpcurthread == curthread %d times\n",
677 if (PCPU_GET(fpcurthread) != NULL) {
678 printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
679 PCPU_GET(fpcurthread),
680 PCPU_GET(fpcurthread)->td_proc->p_pid,
681 curthread, curthread->td_proc->p_pid);
686 * Record new context early in case frstor causes an IRQ13.
688 PCPU_SET(fpcurthread, curthread);
690 #ifdef CPU_ENABLE_SSE
695 if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) {
697 * This is the first time this thread has used the FPU or
698 * the PCB doesn't contain a clean FPU state. Explicitly
699 * load an initial state.
701 fpurstor(&npx_initialstate);
702 if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
703 fldcw(curpcb->pcb_initial_npxcw);
704 curpcb->pcb_flags |= PCB_NPXINITDONE;
705 if (PCB_USER_FPU(curpcb))
706 curpcb->pcb_flags |= PCB_NPXUSERINITDONE;
709 * The following fpurstor() may cause an IRQ13 when the
710 * state being restored has a pending error. The error will
711 * appear to have been triggered by the current (npx) user
712 * instruction even when that instruction is a no-wait
713 * instruction that should not trigger an error (e.g.,
714 * fnclex). On at least one 486 system all of the no-wait
715 * instructions are broken the same as frstor, so our
716 * treatment does not amplify the breakage. On at least
717 * one 386/Cyrix 387 system, fnclex works correctly while
718 * frstor and fnsave are broken, so our treatment breaks
719 * fnclex if it is the first FPU instruction after a context
722 fpurstor(curpcb->pcb_save);
730 * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx
731 * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by
732 * no-wait npx instructions. See the Intel application note AP-578 for
733 * details. This doesn't cause any additional complications here. IRQ13's
734 * are inherently asynchronous unless the CPU is frozen to deliver them --
735 * one that started in userland may be delivered many instructions later,
736 * after the process has entered the kernel. It may even be delivered after
737 * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in
738 * the same way as a very-late-arriving non-spurious IRQ13 from user mode:
739 * it is normally ignored at first because we set fpcurthread to NULL; it is
740 * normally retriggered in npxdna() after return to user mode.
742 * npxsave() must be called with interrupts disabled, so that it clears
743 * fpcurthread atomically with saving the state. We require callers to do the
744 * disabling, since most callers need to disable interrupts anyway to call
745 * npxsave() atomically with checking fpcurthread.
747 * A previous version of npxsave() went to great lengths to excecute fnsave
748 * with interrupts enabled in case executing it froze the CPU. This case
749 * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply
761 PCPU_SET(fpcurthread, NULL);
770 * Discard pending exceptions in the !cpu_fxsr case so that unmasked
771 * ones don't cause a panic on the next frstor.
773 #ifdef CPU_ENABLE_SSE
778 td = PCPU_GET(fpcurthread);
779 KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
781 PCPU_SET(fpcurthread, NULL);
782 td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
787 * Get the user state of the FPU into pcb->pcb_user_save without
788 * dropping ownership (if possible). It returns the FPU ownership
792 npxgetregs(struct thread *td)
797 return (_MC_FPOWNED_NONE);
800 if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
801 bcopy(&npx_initialstate, &pcb->pcb_user_save,
802 sizeof(npx_initialstate));
803 SET_FPU_CW(&pcb->pcb_user_save, pcb->pcb_initial_npxcw);
805 return (_MC_FPOWNED_PCB);
808 if (td == PCPU_GET(fpcurthread)) {
809 fpusave(&pcb->pcb_user_save);
810 #ifdef CPU_ENABLE_SSE
814 * fnsave initializes the FPU and destroys whatever
815 * context it contains. Make sure the FPU owner
816 * starts with a clean state next time.
820 return (_MC_FPOWNED_FPU);
823 return (_MC_FPOWNED_PCB);
828 npxuserinited(struct thread *td)
833 if (PCB_USER_FPU(pcb))
834 pcb->pcb_flags |= PCB_NPXINITDONE;
835 pcb->pcb_flags |= PCB_NPXUSERINITDONE;
840 npxsetregs(struct thread *td, union savefpu *addr)
849 if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
850 #ifdef CPU_ENABLE_SSE
853 fnclex(); /* As in npxdrop(). */
854 if (((uintptr_t)addr & 0xf) != 0) {
855 bcopy(addr, &pcb->pcb_user_save, sizeof(*addr));
856 fpurstor(&pcb->pcb_user_save);
860 pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE;
863 bcopy(addr, &pcb->pcb_user_save, sizeof(*addr));
873 #ifdef CPU_ENABLE_SSE
881 #ifdef CPU_ENABLE_SSE
883 * On AuthenticAMD processors, the fxrstor instruction does not restore
884 * the x87's stored last instruction pointer, last data pointer, and last
885 * opcode values, except in the rare case in which the exception summary
886 * (ES) bit in the x87 status word is set to 1.
888 * In order to avoid leaking this information across processes, we clean
889 * these values by performing a dummy load before executing fxrstor().
892 fpu_clean_state(void)
894 static float dummy_variable = 0.0;
898 * Clear the ES bit in the x87 status word if it is currently
899 * set, in order to avoid causing a fault in the upcoming load.
906 * Load the dummy variable into the x87 stack. This mangles
907 * the x87 stack, but we don't care since we're about to call
910 __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable));
912 #endif /* CPU_ENABLE_SSE */
919 #ifdef CPU_ENABLE_SSE
927 static device_method_t npx_methods[] = {
928 /* Device interface */
929 DEVMETHOD(device_identify, npx_identify),
930 DEVMETHOD(device_probe, npx_probe),
931 DEVMETHOD(device_attach, npx_attach),
932 DEVMETHOD(device_detach, bus_generic_detach),
933 DEVMETHOD(device_shutdown, bus_generic_shutdown),
934 DEVMETHOD(device_suspend, bus_generic_suspend),
935 DEVMETHOD(device_resume, bus_generic_resume),
940 static driver_t npx_driver = {
946 static devclass_t npx_devclass;
949 * We prefer to attach to the root nexus so that the usual case (exception 16)
950 * doesn't describe the processor as being `on isa'.
952 DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0);
956 * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
958 static struct isa_pnp_id npxisa_ids[] = {
959 { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
964 npxisa_probe(device_t dev)
967 if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) {
974 npxisa_attach(device_t dev)
979 static device_method_t npxisa_methods[] = {
980 /* Device interface */
981 DEVMETHOD(device_probe, npxisa_probe),
982 DEVMETHOD(device_attach, npxisa_attach),
983 DEVMETHOD(device_detach, bus_generic_detach),
984 DEVMETHOD(device_shutdown, bus_generic_shutdown),
985 DEVMETHOD(device_suspend, bus_generic_suspend),
986 DEVMETHOD(device_resume, bus_generic_resume),
991 static driver_t npxisa_driver = {
997 static devclass_t npxisa_devclass;
999 DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
1001 DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
1003 #endif /* DEV_ISA */
1005 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
1006 "Kernel contexts for FPU state");
1008 #define XSAVE_AREA_ALIGN 64
1010 #define FPU_KERN_CTX_NPXINITDONE 0x01
1012 struct fpu_kern_ctx {
1013 union savefpu *prev;
1018 struct fpu_kern_ctx *
1019 fpu_kern_alloc_ctx(u_int flags)
1021 struct fpu_kern_ctx *res;
1024 sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN +
1025 sizeof(union savefpu);
1026 res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
1027 M_NOWAIT : M_WAITOK) | M_ZERO);
1032 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
1035 /* XXXKIB clear the memory ? */
1036 free(ctx, M_FPUKERN_CTX);
1039 static union savefpu *
1040 fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
1044 p = (vm_offset_t)&ctx->hwstate1;
1045 p = roundup2(p, XSAVE_AREA_ALIGN);
1046 return ((union savefpu *)p);
1050 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
1055 KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save,
1056 ("mangled pcb_save"));
1058 if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0)
1059 ctx->flags |= FPU_KERN_CTX_NPXINITDONE;
1061 ctx->prev = pcb->pcb_save;
1062 pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
1063 pcb->pcb_flags |= PCB_KERNNPX;
1064 pcb->pcb_flags &= ~PCB_NPXINITDONE;
1069 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
1075 if (curthread == PCPU_GET(fpcurthread))
1078 pcb->pcb_save = ctx->prev;
1079 if (pcb->pcb_save == &pcb->pcb_user_save) {
1080 if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0)
1081 pcb->pcb_flags |= PCB_NPXINITDONE;
1083 pcb->pcb_flags &= ~PCB_NPXINITDONE;
1084 pcb->pcb_flags &= ~PCB_KERNNPX;
1086 if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0)
1087 pcb->pcb_flags |= PCB_NPXINITDONE;
1089 pcb->pcb_flags &= ~PCB_NPXINITDONE;
1090 KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
1096 fpu_kern_thread(u_int flags)
1101 KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
1102 ("Only kthread may use fpu_kern_thread"));
1103 KASSERT(curpcb->pcb_save == &curpcb->pcb_user_save,
1104 ("mangled pcb_save"));
1105 KASSERT(PCB_USER_FPU(curpcb), ("recursive call"));
1107 curpcb->pcb_flags |= PCB_KERNNPX;
1112 is_fpu_kern_thread(u_int flags)
1115 if ((curthread->td_pflags & TDP_KTHREAD) == 0)
1117 return ((curpcb->pcb_flags & PCB_KERNNPX) != 0);