2 * Copyright (C) 1996 Wolfgang Solfrank.
3 * Copyright (C) 1996 TooLs GmbH.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by TooLs GmbH.
17 * 4. The name of TooLs GmbH may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 * $NetBSD: fpu.c,v 1.5 2001/07/22 11:29:46 wiz Exp $
34 #include <sys/cdefs.h>
35 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/limits.h>
40 #include <machine/altivec.h>
41 #include <machine/fpu.h>
42 #include <machine/ieeefp.h>
43 #include <machine/pcb.h>
44 #include <machine/psl.h>
46 #include <powerpc/fpu/fpu_arith.h>
47 #include <powerpc/fpu/fpu_emu.h>
48 #include <powerpc/fpu/fpu_extern.h>
50 void spe_handle_fpdata(struct trapframe *);
51 void spe_handle_fpround(struct trapframe *);
52 static int spe_emu_instr(uint32_t, struct fpemu *, struct fpn **, uint32_t *);
55 save_vec_int(struct thread *td)
63 * Temporarily re-enable the vector unit during the save
69 * Save the vector registers and SPEFSCR to the PCB
71 #define EVSTDW(n) __asm ("evstdw %1,0(%0)" \
72 :: "b"(pcb->pcb_vec.vr[n]), "n"(n));
73 EVSTDW(0); EVSTDW(1); EVSTDW(2); EVSTDW(3);
74 EVSTDW(4); EVSTDW(5); EVSTDW(6); EVSTDW(7);
75 EVSTDW(8); EVSTDW(9); EVSTDW(10); EVSTDW(11);
76 EVSTDW(12); EVSTDW(13); EVSTDW(14); EVSTDW(15);
77 EVSTDW(16); EVSTDW(17); EVSTDW(18); EVSTDW(19);
78 EVSTDW(20); EVSTDW(21); EVSTDW(22); EVSTDW(23);
79 EVSTDW(24); EVSTDW(25); EVSTDW(26); EVSTDW(27);
80 EVSTDW(28); EVSTDW(29); EVSTDW(30); EVSTDW(31);
83 __asm ( "evxor 0,0,0\n"
85 "evstdd 0,0(%0)" :: "b"(&pcb->pcb_vec.spare[0]));
86 pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR);
89 * Disable vector unit again
97 enable_vec(struct thread *td)
101 struct trapframe *tf;
107 * Save the thread's SPE CPU number, and set the CPU's current
110 td->td_pcb->pcb_veccpu = PCPU_GET(cpuid);
111 PCPU_SET(vecthread, td);
114 * Enable the vector unit for when the thread returns from the
115 * exception. If this is the first time the unit has been used by
116 * the thread, initialise the vector registers and VSCR to 0, and
117 * set the flag to indicate that the vector unit is in use.
120 if (!(pcb->pcb_flags & PCB_VEC)) {
121 memset(&pcb->pcb_vec, 0, sizeof pcb->pcb_vec);
122 pcb->pcb_flags |= PCB_VEC;
123 pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR);
127 * Temporarily enable the vector unit so the registers
131 mtmsr(msr | PSL_VEC);
133 /* Restore SPEFSCR and ACC. Use %r0 as the scratch for ACC. */
134 mtspr(SPR_SPEFSCR, pcb->pcb_vec.vscr);
135 __asm __volatile("isync;evldd 0, 0(%0); evmra 0,0\n"
136 :: "b"(&pcb->pcb_vec.spare[0]));
139 * The lower half of each register will be restored on trap return. Use
140 * %r0 as a scratch register, and restore it last.
142 #define EVLDW(n) __asm __volatile("evldw 0, 0(%0); evmergehilo "#n",0,"#n \
143 :: "b"(&pcb->pcb_vec.vr[n]));
144 EVLDW(1); EVLDW(2); EVLDW(3); EVLDW(4);
145 EVLDW(5); EVLDW(6); EVLDW(7); EVLDW(8);
146 EVLDW(9); EVLDW(10); EVLDW(11); EVLDW(12);
147 EVLDW(13); EVLDW(14); EVLDW(15); EVLDW(16);
148 EVLDW(17); EVLDW(18); EVLDW(19); EVLDW(20);
149 EVLDW(21); EVLDW(22); EVLDW(23); EVLDW(24);
150 EVLDW(25); EVLDW(26); EVLDW(27); EVLDW(28);
151 EVLDW(29); EVLDW(30); EVLDW(31); EVLDW(0);
159 save_vec(struct thread *td)
168 * Clear the current vec thread and pcb's CPU id
169 * XXX should this be left clear to allow lazy save/restore ?
171 pcb->pcb_veccpu = INT_MAX;
172 PCPU_SET(vecthread, NULL);
176 * Save SPE state without dropping ownership. This will only save state if
177 * the current vector-thread is `td'. This is used for taking core dumps, so
178 * don't leak kernel information; overwrite the low words of each vector with
179 * their real value, taken from the thread's trap frame, unconditionally.
182 save_vec_nodrop(struct thread *td)
187 if (td == PCPU_GET(vecthread))
192 for (i = 0; i < 32; i++) {
193 pcb->pcb_vec.vr[i][1] =
194 td->td_frame ? td->td_frame->fixreg[i] : 0;
198 #define SPE_INST_MASK 0x31f
225 #define EVFSADD 0x280
226 #define EVFSSUB 0x281
227 #define EVFSABS 0x284
228 #define EVFSNABS 0x285
229 #define EVFSNEG 0x286
230 #define EVFSMUL 0x288
231 #define EVFSDIV 0x289
232 #define EVFSCMPGT 0x28c
233 #define EVFSCMPLT 0x28d
234 #define EVFSCMPEQ 0x28e
235 #define EVFSCFUI 0x290
236 #define EVFSCFSI 0x291
237 #define EVFSCTUI 0x294
238 #define EVFSCTSI 0x295
239 #define EVFSCTUF 0x296
240 #define EVFSCTSF 0x297
241 #define EVFSCTUIZ 0x298
242 #define EVFSCTSIZ 0x29a
247 #define EFSNABS 0x2c5
251 #define EFSCMPGT 0x2cc
252 #define EFSCMPLT 0x2cd
253 #define EFSCMPEQ 0x2ce
255 #define EFSCFUI 0x2d0
256 #define EFSCFSI 0x2d1
257 #define EFSCTUI 0x2d4
258 #define EFSCTSI 0x2d5
259 #define EFSCTUF 0x2d6
260 #define EFSCTSF 0x2d7
261 #define EFSCTUIZ 0x2d8
262 #define EFSCTSIZ 0x2da
267 #define EFDNABS 0x2e5
271 #define EFDCMPGT 0x2ec
272 #define EFDCMPLT 0x2ed
273 #define EFDCMPEQ 0x2ee
275 #define EFDCFUI 0x2f0
276 #define EFDCFSI 0x2f1
277 #define EFDCTUI 0x2f4
278 #define EFDCTSI 0x2f5
279 #define EFDCTUF 0x2f6
280 #define EFDCTSF 0x2f7
281 #define EFDCTUIZ 0x2f8
282 #define EFDCTSIZ 0x2fa
291 static uint32_t fpscr_to_spefscr(uint32_t fpscr)
297 if (fpscr & FPSCR_VX)
298 spefscr |= SPEFSCR_FINV;
299 if (fpscr & FPSCR_OX)
300 spefscr |= SPEFSCR_FOVF;
301 if (fpscr & FPSCR_UX)
302 spefscr |= SPEFSCR_FUNF;
303 if (fpscr & FPSCR_ZX)
304 spefscr |= SPEFSCR_FDBZ;
305 if (fpscr & FPSCR_XX)
306 spefscr |= SPEFSCR_FX;
311 /* Sign is 0 for unsigned, 1 for signed. */
313 spe_to_int(struct fpemu *fpemu, struct fpn *fpn, uint32_t *val, int sign)
317 res[0] = fpu_ftox(fpemu, fpn, res);
318 if (res[0] != UINT_MAX && res[0] != 0)
319 fpemu->fe_cx |= FPSCR_OX;
320 else if (sign == 0 && res[0] != 0)
321 fpemu->fe_cx |= FPSCR_UX;
328 /* Masked instruction */
330 * For compare instructions, returns 1 if success, 0 if not. For all others,
331 * returns -1, or -2 if no result needs recorded.
334 spe_emu_instr(uint32_t instr, struct fpemu *fpemu,
335 struct fpn **result, uint32_t *iresult)
337 switch (instr & SPE_INST_MASK) {
341 /* Taken care of elsewhere. */
344 fpemu->fe_cx &= ~FPSCR_RN;
345 fpemu->fe_cx |= FP_RZ;
347 spe_to_int(fpemu, &fpemu->fe_f2, iresult, 0);
350 fpemu->fe_cx &= ~FPSCR_RN;
351 fpemu->fe_cx |= FP_RZ;
353 spe_to_int(fpemu, &fpemu->fe_f2, iresult, 1);
356 *result = fpu_add(fpemu);
359 *result = fpu_sub(fpemu);
362 *result = fpu_mul(fpemu);
365 *result = fpu_div(fpemu);
368 fpu_compare(fpemu, 0);
369 if (fpemu->fe_cx & FPSCR_FG)
373 fpu_compare(fpemu, 0);
374 if (fpemu->fe_cx & FPSCR_FL)
378 fpu_compare(fpemu, 0);
379 if (fpemu->fe_cx & FPSCR_FE)
383 printf("Unknown instruction %x\n", instr);
390 spe_explode(struct fpemu *fe, struct fpn *fp, uint32_t type,
391 uint32_t hi, uint32_t lo)
395 fp->fp_sign = hi >> 31;
399 s = fpu_stof(fp, hi);
403 s = fpu_dtof(fp, hi, lo);
407 if (s == FPC_QNAN && (fp->fp_mant[0] & FP_QUIETBIT) == 0) {
409 * Input is a signalling NaN. All operations that return
410 * an input NaN operand put it through a ``NaN conversion'',
411 * which basically just means ``turn on the quiet bit''.
412 * We do this here so that all NaNs internally look quiet
413 * (we can tell signalling ones by their class).
415 fp->fp_mant[0] |= FP_QUIETBIT;
416 fe->fe_cx = FPSCR_VXSNAN; /* assert invalid operand */
425 * Save the high word of a 64-bit GPR for manipulation in the exception handler.
428 spe_save_reg_high(int reg)
431 #define EVSTDW(n) case n: __asm __volatile ("evstdw %1,0(%0)" \
432 :: "b"(vec), "n"(n) : "memory"); break;
434 EVSTDW(0); EVSTDW(1); EVSTDW(2); EVSTDW(3);
435 EVSTDW(4); EVSTDW(5); EVSTDW(6); EVSTDW(7);
436 EVSTDW(8); EVSTDW(9); EVSTDW(10); EVSTDW(11);
437 EVSTDW(12); EVSTDW(13); EVSTDW(14); EVSTDW(15);
438 EVSTDW(16); EVSTDW(17); EVSTDW(18); EVSTDW(19);
439 EVSTDW(20); EVSTDW(21); EVSTDW(22); EVSTDW(23);
440 EVSTDW(24); EVSTDW(25); EVSTDW(26); EVSTDW(27);
441 EVSTDW(28); EVSTDW(29); EVSTDW(30); EVSTDW(31);
449 * Load the given value into the high word of the requested register.
452 spe_load_reg_high(int reg, uint32_t val)
454 #define EVLDW(n) case n: __asm __volatile("evmergelo "#n",%0,"#n \
457 EVLDW(1); EVLDW(2); EVLDW(3); EVLDW(4);
458 EVLDW(5); EVLDW(6); EVLDW(7); EVLDW(8);
459 EVLDW(9); EVLDW(10); EVLDW(11); EVLDW(12);
460 EVLDW(13); EVLDW(14); EVLDW(15); EVLDW(16);
461 EVLDW(17); EVLDW(18); EVLDW(19); EVLDW(20);
462 EVLDW(21); EVLDW(22); EVLDW(23); EVLDW(24);
463 EVLDW(25); EVLDW(26); EVLDW(27); EVLDW(28);
464 EVLDW(29); EVLDW(30); EVLDW(31); EVLDW(0);
471 spe_handle_fpdata(struct trapframe *frame)
475 uint32_t instr, instr_sec_op;
476 uint32_t cr_shift, ra, rb, rd, src;
477 uint32_t high, low, res, tmp; /* For vector operations. */
478 uint32_t spefscr = 0;
479 uint32_t ftod_res[2];
480 int width; /* Single, Double, Vector, Integer */
484 err = fueword32((void *)frame->srr0, &instr);
490 if ((instr >> OPC_SHIFT) != SPE_OPC)
495 * 'cr' field is the upper 3 bits of rd. Magically, since a) rd is 5
496 * bits, b) each 'cr' field is 4 bits, and c) Only the 'GT' bit is
497 * modified for most compare operations, the full value of rd can be
498 * used as a shift value.
500 rd = (instr >> 21) & 0x1f;
501 ra = (instr >> 16) & 0x1f;
502 rb = (instr >> 11) & 0x1f;
503 src = (instr >> 5) & 0x7;
504 cr_shift = 28 - (rd & 0x1f);
506 instr_sec_op = (instr & 0x7ff);
508 memset(&fpemu, 0, sizeof(fpemu));
513 mtmsr(msr | PSL_VEC);
514 switch (instr_sec_op) {
516 high = spe_save_reg_high(ra) & ~(1U << 31);
517 frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31);
518 spe_load_reg_high(rd, high);
521 high = spe_save_reg_high(ra) | (1U << 31);
522 frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31);
523 spe_load_reg_high(rd, high);
526 high = spe_save_reg_high(ra) ^ (1U << 31);
527 frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31);
528 spe_load_reg_high(rd, high);
532 spe_explode(&fpemu, &fpemu.fe_f1, SINGLE,
533 spe_save_reg_high(ra), 0);
534 spe_explode(&fpemu, &fpemu.fe_f2, SINGLE,
535 spe_save_reg_high(rb), 0);
536 high = spe_emu_instr(instr_sec_op, &fpemu, &result,
540 spe_load_reg_high(rd, tmp);
542 spefscr = fpscr_to_spefscr(fpemu.fe_cx) << 16;
543 /* Clear the fpemu to start over on the lower bits. */
544 memset(&fpemu, 0, sizeof(fpemu));
547 spe_explode(&fpemu, &fpemu.fe_f1, SINGLE,
548 frame->fixreg[ra], 0);
549 spe_explode(&fpemu, &fpemu.fe_f2, SINGLE,
550 frame->fixreg[rb], 0);
551 spefscr |= fpscr_to_spefscr(fpemu.fe_cx);
552 low = spe_emu_instr(instr_sec_op, &fpemu, &result,
554 if (instr_sec_op == EVFSCMPEQ ||
555 instr_sec_op == EVFSCMPGT ||
556 instr_sec_op == EVFSCMPLT) {
557 res = (high << 3) | (low << 2) |
558 ((high | low) << 1) | (high & low);
567 switch (instr_sec_op) {
569 frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31);
572 frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31);
575 frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31);
578 mtmsr(msr | PSL_VEC);
579 spe_explode(&fpemu, &fpemu.fe_f3, DOUBLE,
580 spe_save_reg_high(rb), frame->fixreg[rb]);
581 result = &fpemu.fe_f3;
585 spe_explode(&fpemu, &fpemu.fe_f1, SINGLE,
586 frame->fixreg[ra], 0);
587 spe_explode(&fpemu, &fpemu.fe_f2, SINGLE,
588 frame->fixreg[rb], 0);
593 mtmsr(msr | PSL_VEC);
594 switch (instr_sec_op) {
596 high = spe_save_reg_high(ra) & ~(1U << 31);
597 frame->fixreg[rd] = frame->fixreg[ra];
598 spe_load_reg_high(rd, high);
601 high = spe_save_reg_high(ra) | (1U << 31);
602 frame->fixreg[rd] = frame->fixreg[ra];
603 spe_load_reg_high(rd, high);
606 high = spe_save_reg_high(ra) ^ (1U << 31);
607 frame->fixreg[rd] = frame->fixreg[ra];
608 spe_load_reg_high(rd, high);
611 spe_explode(&fpemu, &fpemu.fe_f3, SINGLE,
612 frame->fixreg[rb], 0);
613 result = &fpemu.fe_f3;
617 spe_explode(&fpemu, &fpemu.fe_f1, DOUBLE,
618 spe_save_reg_high(ra), frame->fixreg[ra]);
619 spe_explode(&fpemu, &fpemu.fe_f2, DOUBLE,
620 spe_save_reg_high(rb), frame->fixreg[rb]);
625 switch (instr_sec_op) {
628 /* Already handled. */
631 res = spe_emu_instr(instr_sec_op, &fpemu, &result,
638 switch (instr_sec_op & SPE_INST_MASK) {
642 frame->cr &= ~(0xf << cr_shift);
643 frame->cr |= (res << cr_shift);
656 frame->fixreg[rd] = fpu_ftos(&fpemu, result);
659 spe_load_reg_high(rd, fpu_ftod(&fpemu, result, ftod_res));
660 frame->fixreg[rd] = ftod_res[1];
663 panic("Unknown storage width %d", width);
669 spefscr |= (mfspr(SPR_SPEFSCR) & ~SPEFSCR_FINVS);
670 mtspr(SPR_SPEFSCR, spefscr);
678 spe_handle_fpround(struct trapframe *frame)
682 * Punt fpround exceptions for now. This leaves the truncated result in
683 * the register. We'll deal with overflow/underflow later.