lib/libc/sparc64/fpu/fpu.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-3-Clause
   3  *
   4  * Copyright (c) 1992, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * This software was developed by the Computer Systems Engineering group
   8  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
   9  * contributed to Berkeley.
  10  *
  11  * All advertising materials mentioning features or use of this software
  12  * must display the following acknowledgement:
  13  *      This product includes software developed by the University of
  14  *      California, Lawrence Berkeley Laboratory.
  15  *
  16  * Redistribution and use in source and binary forms, with or without
  17  * modification, are permitted provided that the following conditions
  18  * are met:
  19  * 1. Redistributions of source code must retain the above copyright
  20  *    notice, this list of conditions and the following disclaimer.
  21  * 2. Redistributions in binary form must reproduce the above copyright
  22  *    notice, this list of conditions and the following disclaimer in the
  23  *    documentation and/or other materials provided with the distribution.
  24  * 3. Neither the name of the University nor the names of its contributors
  25  *    may be used to endorse or promote products derived from this software
  26  *    without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  38  * SUCH DAMAGE.
  39  */
  40 /*-
  41  * Copyright 2001 by Thomas Moestl <tmm@FreeBSD.org>.  All rights reserved.
  42  *
  43  * Redistribution and use in source and binary forms, with or without
  44  * modification, are permitted provided that the following conditions
  45  * are met:
  46  * 1. Redistributions of source code must retain the above copyright
  47  *    notice, this list of conditions and the following disclaimer.
  48  * 2. Redistributions in binary form must reproduce the above copyright
  49  *    notice, this list of conditions and the following disclaimer in the
  50  *    documentation and/or other materials provided with the distribution.
  51  *
  52  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  53  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  54  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  55  * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  56  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  57  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  58  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  59  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  60  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  61  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  62  *
  63  *      @(#)fpu.c       8.1 (Berkeley) 6/11/93
  64  *      $NetBSD: fpu.c,v 1.11 2000/12/06 01:47:50 mrg Exp $
  65  */
  66
  67 #include <sys/cdefs.h>
  68 __FBSDID("$FreeBSD$");
  69
  70 #include <sys/param.h>
  71
  72 #include "namespace.h"
  73 #include <errno.h>
  74 #include <signal.h>
  75 #ifdef FPU_DEBUG
  76 #include <stdio.h>
  77 #endif
  78 #include <stdlib.h>
  79 #include <unistd.h>
  80 #include "un-namespace.h"
  81 #include "libc_private.h"
  82
  83 #include <machine/fp.h>
  84 #include <machine/frame.h>
  85 #include <machine/fsr.h>
  86 #include <machine/instr.h>
  87 #include <machine/pcb.h>
  88 #include <machine/tstate.h>
  89
  90 #include "__sparc_utrap_private.h"
  91 #include "fpu_emu.h"
  92 #include "fpu_extern.h"
  93
  94 /*
  95  * Translate current exceptions into `first' exception.  The
  96  * bits go the wrong way for ffs() (0x10 is most important, etc).
  97  * There are only 5, so do it the obvious way.
  98  */
  99 #define X1(x) x
 100 #define X2(x) x,x
 101 #define X4(x) x,x,x,x
 102 #define X8(x) X4(x),X4(x)
 103 #define X16(x) X8(x),X8(x)
 104
 105 static const char cx_to_trapx[] = {
 106         X1(FSR_NX),
 107         X2(FSR_DZ),
 108         X4(FSR_UF),
 109         X8(FSR_OF),
 110         X16(FSR_NV)
 111 };
 112
 113 #ifdef FPU_DEBUG
 114 #ifdef FPU_DEBUG_MASK
 115 int __fpe_debug = FPU_DEBUG_MASK;
 116 #else
 117 int __fpe_debug = 0;
 118 #endif
 119 #endif  /* FPU_DEBUG */
 120
 121 static int __fpu_execute(struct utrapframe *, struct fpemu *, u_int32_t,
 122     u_long);
 123
 124 /*
 125  * Need to use an fpstate on the stack; we could switch, so we cannot safely
 126  * modify the pcb one, it might get overwritten.
 127  */
 128 int
 129 __fpu_exception(struct utrapframe *uf)
 130 {
 131         struct fpemu fe;
 132         u_long fsr, tstate;
 133         u_int insn;
 134         int sig;
 135
 136         fsr = uf->uf_fsr;
 137
 138         switch (FSR_GET_FTT(fsr)) {
 139         case FSR_FTT_NONE:
 140                 __utrap_write("lost FPU trap type\n");
 141                 return (0);
 142         case FSR_FTT_IEEE:
 143                 return (SIGFPE);
 144         case FSR_FTT_SEQERR:
 145                 __utrap_write("FPU sequence error\n");
 146                 return (SIGFPE);
 147         case FSR_FTT_HWERR:
 148                 __utrap_write("FPU hardware error\n");
 149                 return (SIGFPE);
 150         case FSR_FTT_UNFIN:
 151         case FSR_FTT_UNIMP:
 152                 break;
 153         default:
 154                 __utrap_write("unknown FPU error\n");
 155                 return (SIGFPE);
 156         }
 157
 158         fe.fe_fsr = fsr & ~FSR_FTT_MASK;
 159         insn = *(u_int32_t *)uf->uf_pc;
 160         if (IF_OP(insn) != IOP_MISC || (IF_F3_OP3(insn) != INS2_FPop1 &&
 161             IF_F3_OP3(insn) != INS2_FPop2))
 162                 __utrap_panic("bogus FP fault");
 163         tstate = uf->uf_state;
 164         sig = __fpu_execute(uf, &fe, insn, tstate);
 165         if (sig != 0)
 166                 return (sig);
 167         __asm __volatile("ldx %0, %%fsr" : : "m" (fe.fe_fsr));
 168         return (0);
 169 }
 170
 171 #ifdef FPU_DEBUG
 172 /*
 173  * Dump a `fpn' structure.
 174  */
 175 void
 176 __fpu_dumpfpn(struct fpn *fp)
 177 {
 178         static const char *const class[] = {
 179                 "SNAN", "QNAN", "ZERO", "NUM", "INF"
 180         };
 181
 182         printf("%s %c.%x %x %x %xE%d", class[fp->fp_class + 2],
 183                 fp->fp_sign ? '-' : ' ',
 184                 fp->fp_mant[0], fp->fp_mant[1],
 185                 fp->fp_mant[2], fp->fp_mant[3],
 186                 fp->fp_exp);
 187 }
 188 #endif
 189
 190 static const int opmask[] = {0, 0, 1, 3, 1};
 191
 192 /* Decode 5 bit register field depending on the type. */
 193 #define RN_DECODE(tp, rn) \
 194         ((tp) >= FTYPE_DBL ? INSFPdq_RN(rn) & ~opmask[tp] : (rn))
 195
 196 /*
 197  * Helper for forming the below case statements. Build only the op3 and opf
 198  * field of the instruction, these are the only ones that need to match.
 199  */
 200 #define FOP(op3, opf) \
 201         ((op3) << IF_F3_OP3_SHIFT | (opf) << IF_F3_OPF_SHIFT)
 202
 203 /*
 204  * Implement a move operation for all supported operand types. The additional
 205  * nand and xor parameters will be applied to the upper 32 bit word of the
 206  * source operand. This allows to implement fabs and fneg (for fp operands
 207  * only!) using this functions, too, by passing (1U << 31) for one of the
 208  * parameters, and 0 for the other.
 209  */
 210 static void
 211 __fpu_mov(struct fpemu *fe, int type, int rd, int rs2, u_int32_t nand,
 212     u_int32_t xor)
 213 {
 214
 215         if (type == FTYPE_INT || type == FTYPE_SNG)
 216                 __fpu_setreg(rd, (__fpu_getreg(rs2) & ~nand) ^ xor);
 217         else {
 218                 /*
 219                  * Need to use the double versions to be able to access
 220                  * the upper 32 fp registers.
 221                  */
 222                 __fpu_setreg64(rd, (__fpu_getreg64(rs2) &
 223                     ~((u_int64_t)nand << 32)) ^ ((u_int64_t)xor << 32));
 224                 if (type == FTYPE_EXT)
 225                         __fpu_setreg64(rd + 2, __fpu_getreg64(rs2 + 2));
 226         }
 227 }
 228
 229 static __inline void
 230 __fpu_ccmov(struct fpemu *fe, int type, int rd, int rs2,
 231     u_int32_t insn, int fcc)
 232 {
 233
 234         if (IF_F4_COND(insn) == fcc)
 235                 __fpu_mov(fe, type, rd, rs2, 0, 0);
 236 }
 237
 238 static int
 239 __fpu_cmpck(struct fpemu *fe)
 240 {
 241         u_long fsr;
 242         int cx;
 243
 244         /*
 245          * The only possible exception here is NV; catch it
 246          * early and get out, as there is no result register.
 247          */
 248         cx = fe->fe_cx;
 249         fsr = fe->fe_fsr | (cx << FSR_CEXC_SHIFT);
 250         if (cx != 0) {
 251                 if (fsr & (FSR_NV << FSR_TEM_SHIFT)) {
 252                         fe->fe_fsr = (fsr & ~FSR_FTT_MASK) |
 253                             FSR_FTT(FSR_FTT_IEEE);
 254                         return (SIGFPE);
 255                 }
 256                 fsr |= FSR_NV << FSR_AEXC_SHIFT;
 257         }
 258         fe->fe_fsr = fsr;
 259         return (0);
 260 }
 261
 262 /*
 263  * Execute an FPU instruction (one that runs entirely in the FPU; not
 264  * FBfcc or STF, for instance).  On return, fe->fe_fs->fs_fsr will be
 265  * modified to reflect the setting the hardware would have left.
 266  *
 267  * Note that we do not catch all illegal opcodes, so you can, for instance,
 268  * multiply two integers this way.
 269  */
 270 static int
 271 __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn,
 272     u_long tstate)
 273 {
 274         struct fpn *fp;
 275         int opf, rs1, rs2, rd, type, mask, cx, cond __unused;
 276         u_long reg, fsr;
 277         u_int space[4];
 278
 279         /*
 280          * `Decode' and execute instruction.  Start with no exceptions.
 281          * The type of almost any OPF opcode is in the bottom two bits, so we
 282          * squish them out here.
 283          */
 284         opf = insn & (IF_MASK(IF_F3_OP3_SHIFT, IF_F3_OP3_BITS) |
 285             IF_MASK(IF_F3_OPF_SHIFT + 2, IF_F3_OPF_BITS - 2));
 286         type = IF_F3_OPF(insn) & 3;
 287         rs1 = RN_DECODE(type, IF_F3_RS1(insn));
 288         rs2 = RN_DECODE(type, IF_F3_RS2(insn));
 289         rd = RN_DECODE(type, IF_F3_RD(insn));
 290         cond = 0;
 291 #ifdef notdef
 292         if ((rs1 | rs2 | rd) & opmask[type])
 293                 return (SIGILL);
 294 #endif
 295         fsr = fe->fe_fsr;
 296         fe->fe_fsr &= ~FSR_CEXC_MASK;
 297         fe->fe_cx = 0;
 298         switch (opf) {
 299         case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(0))):
 300                 __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC0(fsr));
 301                 return (0);
 302         case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(1))):
 303                 __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC1(fsr));
 304                 return (0);
 305         case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(2))):
 306                 __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC2(fsr));
 307                 return (0);
 308         case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(3))):
 309                 __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC3(fsr));
 310                 return (0);
 311         case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_ICC)):
 312                 __fpu_ccmov(fe, type, rd, rs2, insn,
 313                     (tstate & TSTATE_ICC_MASK) >> TSTATE_ICC_SHIFT);
 314                 return (0);
 315         case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_XCC)):
 316                 __fpu_ccmov(fe, type, rd, rs2, insn,
 317                     (tstate & TSTATE_XCC_MASK) >> (TSTATE_XCC_SHIFT));
 318                 return (0);
 319         case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_Z)):
 320                 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
 321                 if (reg == 0)
 322                         __fpu_mov(fe, type, rd, rs2, 0, 0);
 323                 return (0);
 324         case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LEZ)):
 325                 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
 326                 if (reg <= 0)
 327                         __fpu_mov(fe, type, rd, rs2, 0, 0);
 328                 return (0);
 329         case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LZ)):
 330                 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
 331                 if (reg < 0)
 332                         __fpu_mov(fe, type, rd, rs2, 0, 0);
 333                 return (0);
 334         case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_NZ)):
 335                 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
 336                 if (reg != 0)
 337                         __fpu_mov(fe, type, rd, rs2, 0, 0);
 338                 return (0);
 339         case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GZ)):
 340                 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
 341                 if (reg > 0)
 342                         __fpu_mov(fe, type, rd, rs2, 0, 0);
 343                 return (0);
 344         case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GEZ)):
 345                 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
 346                 if (reg >= 0)
 347                         __fpu_mov(fe, type, rd, rs2, 0, 0);
 348                 return (0);
 349         case FOP(INS2_FPop2, INSFP2_FCMP):
 350                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 351                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 352                 __fpu_compare(fe, 0, IF_F3_CC(insn));
 353                 return (__fpu_cmpck(fe));
 354         case FOP(INS2_FPop2, INSFP2_FCMPE):
 355                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 356                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 357                 __fpu_compare(fe, 1, IF_F3_CC(insn));
 358                 return (__fpu_cmpck(fe));
 359         case FOP(INS2_FPop1, INSFP1_FMOV):
 360                 __fpu_mov(fe, type, rd, rs2, 0, 0);
 361                 return (0);
 362         case FOP(INS2_FPop1, INSFP1_FNEG):
 363                 __fpu_mov(fe, type, rd, rs2, 0, (1U << 31));
 364                 return (0);
 365         case FOP(INS2_FPop1, INSFP1_FABS):
 366                 __fpu_mov(fe, type, rd, rs2, (1U << 31), 0);
 367                 return (0);
 368         case FOP(INS2_FPop1, INSFP1_FSQRT):
 369                 __fpu_explode(fe, &fe->fe_f1, type, rs2);
 370                 fp = __fpu_sqrt(fe);
 371                 break;
 372         case FOP(INS2_FPop1, INSFP1_FADD):
 373                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 374                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 375                 fp = __fpu_add(fe);
 376                 break;
 377         case FOP(INS2_FPop1, INSFP1_FSUB):
 378                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 379                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 380                 fp = __fpu_sub(fe);
 381                 break;
 382         case FOP(INS2_FPop1, INSFP1_FMUL):
 383                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 384                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 385                 fp = __fpu_mul(fe);
 386                 break;
 387         case FOP(INS2_FPop1, INSFP1_FDIV):
 388                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 389                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 390                 fp = __fpu_div(fe);
 391                 break;
 392         case FOP(INS2_FPop1, INSFP1_FsMULd):
 393         case FOP(INS2_FPop1, INSFP1_FdMULq):
 394                 if (type == FTYPE_EXT)
 395                         return (SIGILL);
 396                 __fpu_explode(fe, &fe->fe_f1, type, rs1);
 397                 __fpu_explode(fe, &fe->fe_f2, type, rs2);
 398                 type++; /* single to double, or double to quad */
 399                 /*
 400                  * Recalculate rd (the old type applied for the source regs
 401                  * only, the target one has a different size).
 402                  */
 403                 rd = RN_DECODE(type, IF_F3_RD(insn));
 404                 fp = __fpu_mul(fe);
 405                 break;
 406         case FOP(INS2_FPop1, INSFP1_FxTOs):
 407         case FOP(INS2_FPop1, INSFP1_FxTOd):
 408         case FOP(INS2_FPop1, INSFP1_FxTOq):
 409                 type = FTYPE_LNG;
 410                 rs2 = RN_DECODE(type, IF_F3_RS2(insn));
 411                 __fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
 412                 /* sneaky; depends on instruction encoding */
 413                 type = (IF_F3_OPF(insn) >> 2) & 3;
 414                 rd = RN_DECODE(type, IF_F3_RD(insn));
 415                 break;
 416         case FOP(INS2_FPop1, INSFP1_FTOx):
 417                 __fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
 418                 type = FTYPE_LNG;
 419                 rd = RN_DECODE(type, IF_F3_RD(insn));
 420                 break;
 421         case FOP(INS2_FPop1, INSFP1_FTOs):
 422         case FOP(INS2_FPop1, INSFP1_FTOd):
 423         case FOP(INS2_FPop1, INSFP1_FTOq):
 424         case FOP(INS2_FPop1, INSFP1_FTOi):
 425                 __fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
 426                 /* sneaky; depends on instruction encoding */
 427                 type = (IF_F3_OPF(insn) >> 2) & 3;
 428                 rd = RN_DECODE(type, IF_F3_RD(insn));
 429                 break;
 430         default:
 431                 return (SIGILL);
 432         }
 433
 434         /*
 435          * ALU operation is complete.  Collapse the result and then check
 436          * for exceptions.  If we got any, and they are enabled, do not
 437          * alter the destination register, just stop with an exception.
 438          * Otherwise set new current exceptions and accrue.
 439          */
 440         __fpu_implode(fe, fp, type, space);
 441         cx = fe->fe_cx;
 442         if (cx != 0) {
 443                 mask = (fsr >> FSR_TEM_SHIFT) & FSR_TEM_MASK;
 444                 if (cx & mask) {
 445                         /* not accrued??? */
 446                         fsr = (fsr & ~FSR_FTT_MASK) |
 447                             FSR_FTT(FSR_FTT_IEEE) |
 448                             FSR_CEXC(cx_to_trapx[(cx & mask) - 1]);
 449                         return (SIGFPE);
 450                 }
 451                 fsr |= (cx << FSR_CEXC_SHIFT) | (cx << FSR_AEXC_SHIFT);
 452         }
 453         fe->fe_fsr = fsr;
 454         if (type == FTYPE_INT || type == FTYPE_SNG)
 455                 __fpu_setreg(rd, space[0]);
 456         else {
 457                 __fpu_setreg64(rd, ((u_int64_t)space[0] << 32) | space[1]);
 458                 if (type == FTYPE_EXT)
 459                         __fpu_setreg64(rd + 2,
 460                             ((u_int64_t)space[2] << 32) | space[3]);
 461         }
 462         return (0);     /* success */
 463 }