lib/libc/sparc64/fpu/fpu.c

   1 /*
   2  * Copyright (c) 1992, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * This software was developed by the Computer Systems Engineering group
   6  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
   7  * contributed to Berkeley.
   8  *
   9  * All advertising materials mentioning features or use of this software
  10  * must display the following acknowledgement:
  11  *      This product includes software developed by the University of
  12  *      California, Lawrence Berkeley Laboratory.
  13  *
  14  * Redistribution and use in source and binary forms, with or without
  15  * modification, are permitted provided that the following conditions
  16  * are met:
  17  * 1. Redistributions of source code must retain the above copyright
  18  *    notice, this list of conditions and the following disclaimer.
  19  * 2. Redistributions in binary form must reproduce the above copyright
  20  *    notice, this list of conditions and the following disclaimer in the
  21  *    documentation and/or other materials provided with the distribution.
  22  * 4. Neither the name of the University nor the names of its contributors
  23  *    may be used to endorse or promote products derived from this software
  24  *    without specific prior written permission.
  25  *
  26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  */
  38 /*-
  39  * Copyright 2001 by Thomas Moestl <tmm@FreeBSD.org>.  All rights reserved.
  40  *
  41  * Redistribution and use in source and binary forms, with or without
  42  * modification, are permitted provided that the following conditions
  43  * are met:
  44  * 1. Redistributions of source code must retain the above copyright
  45  *    notice, this list of conditions and the following disclaimer.
  46  * 2. Redistributions in binary form must reproduce the above copyright
  47  *    notice, this list of conditions and the following disclaimer in the
  48  *    documentation and/or other materials provided with the distribution.
  49  *
  50  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  51  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  52  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  53  * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  54  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  55  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  56  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  57  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  58  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  59  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  60  *
  61  *      @(#)fpu.c       8.1 (Berkeley) 6/11/93
  62  *      $NetBSD: fpu.c,v 1.11 2000/12/06 01:47:50 mrg Exp $
  63  */
  64
  65 #include <sys/cdefs.h>
  66 __FBSDID("$FreeBSD$");
  67
  68 #include <sys/param.h>
  69
  70 #include "namespace.h"
  71 #include <errno.h>
  72 #include <unistd.h>
  73 #include <signal.h>
  74 #include <stdlib.h>
  75 #include "un-namespace.h"
  76 #include "libc_private.h"
  77
  78 #include <machine/fp.h>
  79 #include <machine/frame.h>
  80 #include <machine/fsr.h>
  81 #include <machine/instr.h>
  82 #include <machine/pcb.h>
  83 #include <machine/tstate.h>
  84
  85 #include "__sparc_utrap_private.h"
  86 #include "fpu_emu.h"
  87 #include "fpu_extern.h"
  88
  89 /*
  90  * Translate current exceptions into `first' exception.  The
  91  * bits go the wrong way for ffs() (0x10 is most important, etc).
  92  * There are only 5, so do it the obvious way.
  93  */
  94 #define X1(x) x
  95 #define X2(x) x,x
  96 #define X4(x) x,x,x,x
  97 #define X8(x) X4(x),X4(x)
  98 #define X16(x) X8(x),X8(x)
  99
 100 static char cx_to_trapx[] = {
 101         X1(FSR_NX),
 102         X2(FSR_DZ),
 103         X4(FSR_UF),
 104         X8(FSR_OF),
 105         X16(FSR_NV)
 106 };
 107
 108 #ifdef FPU_DEBUG
 109 #ifdef FPU_DEBUG_MASK
 110 int __fpe_debug = FPU_DEBUG_MASK;
 111 #else
 112 int __fpe_debug = 0;
 113 #endif
 114 #endif  /* FPU_DEBUG */
 115
 116 static int __fpu_execute(struct utrapframe *, struct fpemu *, u_int32_t, u_long);
 117
 118 /*
 119  * Need to use an fpstate on the stack; we could switch, so we cannot safely
 120  * modify the pcb one, it might get overwritten.
 121  */
 122 int
 123 __fpu_exception(struct utrapframe *uf)
 124 {
 125         struct fpemu fe;
 126         u_long fsr, tstate;
 127         u_int insn;
 128         int sig;
 129
 130         fsr = uf->uf_fsr;
 131
 132         switch (FSR_GET_FTT(fsr)) {
 133         case FSR_FTT_NONE:
 134                 __utrap_write("lost FPU trap type\n");
 135                 return (0);
 136         case FSR_FTT_IEEE:
 137                 return (SIGFPE);
 138         case FSR_FTT_SEQERR:
 139                 __utrap_write("FPU sequence error\n");
 140                 return (SIGFPE);
 141         case FSR_FTT_HWERR:
 142                 __utrap_write("FPU hardware error\n");
 143                 return (SIGFPE);
 144         case FSR_FTT_UNFIN:
 145         case FSR_FTT_UNIMP:
 146                 break;
 147         default:
 148                 __utrap_write("unknown FPU error\n");
 149                 return (SIGFPE);
 150         }
 151
 152         fe.fe_fsr = fsr & ~FSR_FTT_MASK;
 153         insn = *(u_int32_t *)uf->uf_pc;
 154         if (IF_OP(insn) != IOP_MISC || (IF_F3_OP3(insn) != INS2_FPop1 &&
 155             IF_F3_OP3(insn) != INS2_FPop2))
 156                 __utrap_panic("bogus FP fault");
 157         tstate = uf->uf_state;
 158         sig = __fpu_execute(uf, &fe, insn, tstate);
 159         if (sig != 0)
 160                 return (sig);
 161         __asm __volatile("ldx %0, %%fsr" : : "m" (fe.fe_fsr));
 162         return (0);
 163 }
 164
 165 #ifdef FPU_DEBUG
 166 /*
 167  * Dump a `fpn' structure.
 168  */
 169 void
 170 __fpu_dumpfpn(struct fpn *fp)
 171 {
 172         static char *class[] = {
 173                 "SNAN", "QNAN", "ZERO", "NUM", "INF"
 174         };
 175
 176         printf("%s %c.%x %x %x %xE%d", class[fp->fp_class + 2],
 177                 fp->fp_sign ? '-' : ' ',
 178                 fp->fp_mant[0], fp->fp_mant[1],
 179                 fp->fp_mant[2], fp->fp_mant[3],
 180                 fp->fp_exp);
 181 }
 182 #endif
 183
 184 static int opmask[] = {0, 0, 1, 3};
 185
 186 /* Decode 5 bit register field depending on the type. */
 187 #define RN_DECODE(tp, rn) \
 188         ((tp == FTYPE_DBL || tp == FTYPE_EXT ? INSFPdq_RN((rn)) : (rn)) & \
 189             ~opmask[tp])
 190
 191 /* Operand size in 32-bit registers. */
 192 #define OPSZ(tp)        ((tp) == FTYPE_LNG ? 2 : (1 << (tp)))
 193
 194 /*
 195  * Helper for forming the below case statements. Build only the op3 and opf
 196  * field of the instruction, these are the only ones that need to match.
 197  */
 198 #define FOP(op3, opf) \
 199         ((op3) << IF_F3_OP3_SHIFT | (opf) << IF_F3_OPF_SHIFT)
 200
 201 /*
 202  * Implement a move operation for all supported operand types. The additional
 203  * nand and xor parameters will be applied to the upper 32 bit word of the
 204  * source operand. This allows to implement fabs and fneg (for fp operands
 205  * only!) using this functions, too, by passing (1 << 31) for one of the
 206  * parameters, and 0 for the other.
 207  */
 208 static void
 209 __fpu_mov(struct fpemu *fe, int type, int rd, int rs2, u_int32_t nand,
 210     u_int32_t xor)
 211 {
 212         u_int64_t tmp64;
 213         int i;
 214
 215         if (type == FTYPE_INT || type == FTYPE_SNG)
 216                 __fpu_setreg(rd, (__fpu_getreg(rs2) & ~nand) ^ xor);
 217         else {
 218                 /*
 219                  * Need to use the double versions to be able to access
 220                  * the upper 32 fp registers.
 221                  */
 222                 for (i = 0; i < OPSZ(type); i += 2, rd += 2, rs2 += 2) {
 223                         tmp64 = __fpu_getreg64(rs2);
 224                         if (i == 0)
 225                                 tmp64 = (tmp64 & ~((u_int64_t)nand << 32)) ^
 226                                     ((u_int64_t)xor << 32);
 227                         __fpu_setreg64(rd, tmp64);
 228                 }
 229         }
 230 }
 231
 232 static __inline void
 233 __fpu_ccmov(struct fpemu *fe, int type, int rd, int rs2,
 234     u_int32_t insn, int fcc)
 235 {
 236
 237         if (IF_F4_COND(insn) == fcc)
 238                 __fpu_mov(fe, type, rd, rs2, 0, 0);
 239 }
 240
 241 static int
 242 __fpu_cmpck(struct fpemu *fe)
 243 {
 244         u_long fsr;
 245         int cx;
 246
 247         /*
 248          * The only possible exception here is NV; catch it
 249          * early and get out, as there is no result register.
 250          */
 251         cx = fe->fe_cx;
 252         fsr = fe->fe_fsr | (cx << FSR_CEXC_SHIFT);
 253         if (cx != 0) {
 254                 if (fsr & (FSR_NV << FSR_TEM_SHIFT)) {
 255                         fe->fe_fsr = (fsr & ~FSR_FTT_MASK) |
 256                             FSR_FTT(FSR_FTT_IEEE);
 257                         return (SIGFPE);
 258                 }
 259                 fsr |= FSR_NV << FSR_AEXC_SHIFT;
 260         }
 261         fe->fe_fsr = fsr;
 262         return (0);
 263 }
 264
 265 /*
 266  * Execute an FPU instruction (one that runs entirely in the FPU; not
 267  * FBfcc or STF, for instance).  On return, fe->fe_fs->fs_fsr will be
 268  * modified to reflect the setting the hardware would have left.
 269  *
 270  * Note that we do not catch all illegal opcodes, so you can, for instance,
 271  * multiply two integers this way.
 272  */
 273 static int
 274 __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long tstate)
 275 {
 276         struct fpn *fp;
 277         int opf, rs1, rs2, rd, type, mask, cx, cond;
 278         u_long reg, fsr;
 279         u_int space[4];
 280         int i;
 281
 282         /*
 283          * `Decode' and execute instruction.  Start with no exceptions.
 284          * The type of any opf opcode is in the bottom two bits, so we
 285          * squish them out here.
 286          */
 287         opf = insn & (IF_MASK(IF_F3_OP3_SHIFT, IF_F3_OP3_BITS) |
 288             IF_MASK(IF_F3_OPF_SHIFT + 2, IF_F3_OPF_BITS - 2));
 289         type = IF_F3_OPF(insn) & 3;
 290         rs1 = RN_DECODE(type, IF_F3_RS1(insn));
 291         rs2 = RN_DECODE(type, IF_F3_RS2(insn));
 292         rd = RN_DECODE(type, IF_F3_RD(insn));
 293         cond = 0;
 294 #ifdef notdef
 295         if ((rs1 | rs2 | rd) & opmask[type])
 296                 return (SIGILL);
 297 #endif
 298         fsr = fe->fe_fsr;
 299         fe->fe_fsr &= ~FSR_CEXC_MASK;
 300         fe->fe_cx = 0;
 301         switch (opf) {
 302         case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(0))):
 303                 __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC0(fsr));
 304                 return (0);
 305         case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(1))):
 306                 __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC1(fsr));
 307                 return (0);
 308         case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(2))):
 309                 __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC2(fsr));
 310                 return (0);
 311         case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(3))):
 312                 __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC3(fsr));
 313                 return (0);
 314         case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_ICC)):
 315                 __fpu_ccmov(fe, type, rd, rs2, insn,
 316                     (tstate & TSTATE_ICC_MASK) >> TSTATE_ICC_SHIFT);
 317                 return (0);
 318         case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_XCC)):
 319                 __fpu_ccmov(fe, type, rd, rs2, insn,
 320                     (tstate & TSTATE_XCC_MASK) >> (TSTATE_XCC_SHIFT));
 321                 return (0);
 322         case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_Z)):
 323                 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
 324                 if (reg == 0)
 325                         __fpu_mov(fe, type, rd, rs2, 0, 0);
 326                 return (0);
 327         case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LEZ)):
 328                 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
 329                 if (reg <= 0)
 330                         __fpu_mov(fe, type, rd, rs2, 0, 0);
 331                 return (0);
 332         case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LZ)):
 333                 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
 334                 if (reg < 0)
 335                         __fpu_mov(fe, type, rd, rs2, 0, 0);
 336                 return (0);
 337         case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_NZ)):
 338                 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
 339                 if (reg != 0)
 340                         __fpu_mov(fe, type, rd, rs2, 0, 0);
 341                 return (0);
 342         case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GZ)):
 343                 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
 344                 if (reg > 0)
 345                         __fpu_mov(fe, type, rd, rs2, 0, 0);
 346                 return (0);
 347         case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GEZ)):
 348                 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
 349                 if (reg >= 0)
 350                         __fpu_mov(fe, type, rd, rs2, 0, 0);
 351                 return (0);
 352         case FOP(INS2_FPop2, INSFP2_FCMP):
 353                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 354                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 355                 __fpu_compare(fe, 0, IF_F3_CC(insn));
 356                 return (__fpu_cmpck(fe));
 357         case FOP(INS2_FPop2, INSFP2_FCMPE):
 358                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 359                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 360                 __fpu_compare(fe, 1, IF_F3_CC(insn));
 361                 return (__fpu_cmpck(fe));
 362         case FOP(INS2_FPop1, INSFP1_FMOV):      /* these should all be pretty obvious */
 363                 __fpu_mov(fe, type, rd, rs2, 0, 0);
 364                 return (0);
 365         case FOP(INS2_FPop1, INSFP1_FNEG):
 366                 __fpu_mov(fe, type, rd, rs2, 0, (1 << 31));
 367                 return (0);
 368         case FOP(INS2_FPop1, INSFP1_FABS):
 369                 __fpu_mov(fe, type, rd, rs2, (1 << 31), 0);
 370                 return (0);
 371         case FOP(INS2_FPop1, INSFP1_FSQRT):
 372                 __fpu_explode(fe, &fe->fe_f1, type, rs2);
 373                 fp = __fpu_sqrt(fe);
 374                 break;
 375         case FOP(INS2_FPop1, INSFP1_FADD):
 376                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 377                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 378                 fp = __fpu_add(fe);
 379                 break;
 380         case FOP(INS2_FPop1, INSFP1_FSUB):
 381                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 382                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 383                 fp = __fpu_sub(fe);
 384                 break;
 385         case FOP(INS2_FPop1, INSFP1_FMUL):
 386                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 387                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 388                 fp = __fpu_mul(fe);
 389                 break;
 390         case FOP(INS2_FPop1, INSFP1_FDIV):
 391                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 392                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 393                 fp = __fpu_div(fe);
 394                 break;
 395         case FOP(INS2_FPop1, INSFP1_FsMULd):
 396         case FOP(INS2_FPop1, INSFP1_FdMULq):
 397                 if (type == FTYPE_EXT)
 398                         return (SIGILL);
 399                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 400                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 401                 type++; /* single to double, or double to quad */
 402                 /*
 403                  * Recalculate rd (the old type applied for the source regs
 404                  * only, the target one has a different size).
 405                  */
 406                 rd = RN_DECODE(type, IF_F3_RD(insn));
 407                 fp = __fpu_mul(fe);
 408                 break;
 409         case FOP(INS2_FPop1, INSFP1_FxTOs):
 410         case FOP(INS2_FPop1, INSFP1_FxTOd):
 411         case FOP(INS2_FPop1, INSFP1_FxTOq):
 412                 type = FTYPE_LNG;
 413                 __fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
 414                 /* sneaky; depends on instruction encoding */
 415                 type = (IF_F3_OPF(insn) >> 2) & 3;
 416                 rd = RN_DECODE(type, IF_F3_RD(insn));
 417                 break;
 418         case FOP(INS2_FPop1, INSFP1_FTOx):
 419                 __fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
 420                 type = FTYPE_LNG;
 421                 mask = 1;       /* needs 2 registers */
 422                 rd = IF_F3_RD(insn) & ~mask;
 423                 break;
 424         case FOP(INS2_FPop1, INSFP1_FTOs):
 425         case FOP(INS2_FPop1, INSFP1_FTOd):
 426         case FOP(INS2_FPop1, INSFP1_FTOq):
 427         case FOP(INS2_FPop1, INSFP1_FTOi):
 428                 __fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
 429                 /* sneaky; depends on instruction encoding */
 430                 type = (IF_F3_OPF(insn) >> 2) & 3;
 431                 rd = RN_DECODE(type, IF_F3_RD(insn));
 432                 break;
 433         default:
 434                 return (SIGILL);
 435         }
 436
 437         /*
 438          * ALU operation is complete.  Collapse the result and then check
 439          * for exceptions.  If we got any, and they are enabled, do not
 440          * alter the destination register, just stop with an exception.
 441          * Otherwise set new current exceptions and accrue.
 442          */
 443         __fpu_implode(fe, fp, type, space);
 444         cx = fe->fe_cx;
 445         if (cx != 0) {
 446                 mask = (fsr >> FSR_TEM_SHIFT) & FSR_TEM_MASK;
 447                 if (cx & mask) {
 448                         /* not accrued??? */
 449                         fsr = (fsr & ~FSR_FTT_MASK) |
 450                             FSR_FTT(FSR_FTT_IEEE) |
 451                             FSR_CEXC(cx_to_trapx[(cx & mask) - 1]);
 452                         return (SIGFPE);
 453                 }
 454                 fsr |= (cx << FSR_CEXC_SHIFT) | (cx << FSR_AEXC_SHIFT);
 455         }
 456         fe->fe_fsr = fsr;
 457         if (type == FTYPE_INT || type == FTYPE_SNG)
 458                 __fpu_setreg(rd, space[0]);
 459         else {
 460                 for (i = 0; i < OPSZ(type); i += 2) {
 461                         __fpu_setreg64(rd + i, ((u_int64_t)space[i] << 32) |
 462                             space[i + 1]);
 463                 }
 464         }
 465         return (0);     /* success */
 466 }