sys/amd64/include/ieeefp.h

   1 /*-
   2  * Copyright (c) 2003 Peter Wemm.
   3  * Copyright (c) 1990 Andrew Moore, Talke Studio
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  * 3. All advertising materials mentioning features or use of this software
  15  *    must display the following acknowledgement:
  16  *      This product includes software developed by the University of
  17  *      California, Berkeley and its contributors.
  18  * 4. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  *      from: @(#) ieeefp.h     1.0 (Berkeley) 9/23/93
  35  * $FreeBSD$
  36  */
  37
  38 /*
  39  *      IEEE floating point type and constant definitions.
  40  */
  41
  42 #ifndef _MACHINE_IEEEFP_H_
  43 #define _MACHINE_IEEEFP_H_
  44
  45 #ifndef _SYS_CDEFS_H_
  46 #error this file needs sys/cdefs.h as a prerequisite
  47 #endif
  48
  49 /*
  50  * FP rounding modes
  51  */
  52 typedef enum {
  53         FP_RN=0,        /* round to nearest */
  54         FP_RM,          /* round down to minus infinity */
  55         FP_RP,          /* round up to plus infinity */
  56         FP_RZ           /* truncate */
  57 } fp_rnd_t;
  58
  59 /*
  60  * FP precision modes
  61  */
  62 typedef enum {
  63         FP_PS=0,        /* 24 bit (single-precision) */
  64         FP_PRS,         /* reserved */
  65         FP_PD,          /* 53 bit (double-precision) */
  66         FP_PE           /* 64 bit (extended-precision) */
  67 } fp_prec_t;
  68
  69 #define fp_except_t     int
  70
  71 /*
  72  * FP exception masks
  73  */
  74 #define FP_X_INV        0x01    /* invalid operation */
  75 #define FP_X_DNML       0x02    /* denormal */
  76 #define FP_X_DZ         0x04    /* zero divide */
  77 #define FP_X_OFL        0x08    /* overflow */
  78 #define FP_X_UFL        0x10    /* underflow */
  79 #define FP_X_IMP        0x20    /* (im)precision */
  80 #define FP_X_STK        0x40    /* stack fault */
  81
  82 /*
  83  * FP registers
  84  */
  85 #define FP_MSKS_REG     0       /* exception masks */
  86 #define FP_PRC_REG      0       /* precision */
  87 #define FP_RND_REG      0       /* direction */
  88 #define FP_STKY_REG     1       /* sticky flags */
  89
  90 /*
  91  * FP register bit field masks
  92  */
  93 #define FP_MSKS_FLD     0x3f    /* exception masks field */
  94 #define FP_PRC_FLD      0x300   /* precision control field */
  95 #define FP_RND_FLD      0xc00   /* round control field */
  96 #define FP_STKY_FLD     0x3f    /* sticky flags field */
  97
  98 /*
  99  * SSE mxcsr register bit field masks
 100  */
 101 #define SSE_STKY_FLD    0x3f    /* exception flags */
 102 #define SSE_DAZ_FLD     0x40    /* Denormals are zero */
 103 #define SSE_MSKS_FLD    0x1f80  /* exception masks field */
 104 #define SSE_RND_FLD     0x6000  /* rounding control */
 105 #define SSE_FZ_FLD      0x8000  /* flush to zero on underflow */
 106
 107 /*
 108  * FP register bit field offsets
 109  */
 110 #define FP_MSKS_OFF     0       /* exception masks offset */
 111 #define FP_PRC_OFF      8       /* precision control offset */
 112 #define FP_RND_OFF      10      /* round control offset */
 113 #define FP_STKY_OFF     0       /* sticky flags offset */
 114
 115 /*
 116  * SSE mxcsr register bit field offsets
 117  */
 118 #define SSE_STKY_OFF    0       /* exception flags offset */
 119 #define SSE_DAZ_OFF     6       /* DAZ exception mask offset */
 120 #define SSE_MSKS_OFF    7       /* other exception masks offset */
 121 #define SSE_RND_OFF     13      /* rounding control offset */
 122 #define SSE_FZ_OFF      15      /* flush to zero offset */
 123
 124 #if defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE__) \
 125     && !defined(__cplusplus)
 126
 127 #define __fldenv(addr)  __asm __volatile("fldenv %0" : : "m" (*(addr)))
 128 #define __fnstenv(addr) __asm __volatile("fnstenv %0" : "=m" (*(addr)))
 129 #define __fldcw(addr)   __asm __volatile("fldcw %0" : : "m" (*(addr)))
 130 #define __fnstcw(addr)  __asm __volatile("fnstcw %0" : "=m" (*(addr)))
 131 #define __fnstsw(addr)  __asm __volatile("fnstsw %0" : "=m" (*(addr)))
 132 #define __ldmxcsr(addr) __asm __volatile("ldmxcsr %0" : : "m" (*(addr)))
 133 #define __stmxcsr(addr) __asm __volatile("stmxcsr %0" : "=m" (*(addr)))
 134
 135 /*
 136  * General notes about conflicting SSE vs FP status bits.
 137  * This code assumes that software will not fiddle with the control
 138  * bits of the SSE and x87 in such a way to get them out of sync and
 139  * still expect this to work.  Break this at your peril.
 140  * Because I based this on the i386 port, the x87 state is used for
 141  * the fpget*() functions, and is shadowed into the SSE state for
 142  * the fpset*() functions.  For dual source fpget*() functions, I
 143  * merge the two together.  I think.
 144  */
 145
 146 /* Set rounding control */
 147 static __inline__ fp_rnd_t
 148 __fpgetround(void)
 149 {
 150         unsigned short _cw;
 151
 152         __fnstcw(&_cw);
 153         return ((_cw & FP_RND_FLD) >> FP_RND_OFF);
 154 }
 155
 156 static __inline__ fp_rnd_t
 157 __fpsetround(fp_rnd_t _m)
 158 {
 159         unsigned short _cw;
 160         unsigned int _mxcsr;
 161         fp_rnd_t _p;
 162
 163         __fnstcw(&_cw);
 164         _p = (_cw & FP_RND_FLD) >> FP_RND_OFF;
 165         _cw &= ~FP_RND_FLD;
 166         _cw |= (_m << FP_RND_OFF) & FP_RND_FLD;
 167         __fldcw(&_cw);
 168         __stmxcsr(&_mxcsr);
 169         _mxcsr &= ~SSE_RND_FLD;
 170         _mxcsr |= (_m << SSE_RND_OFF) & SSE_RND_FLD;
 171         __ldmxcsr(&_mxcsr);
 172         return (_p);
 173 }
 174
 175 /*
 176  * Set precision for fadd/fsub/fsqrt etc x87 instructions
 177  * There is no equivalent SSE mode or control.
 178  */
 179 static __inline__ fp_prec_t
 180 __fpgetprec(void)
 181 {
 182         unsigned short _cw;
 183
 184         __fnstcw(&_cw);
 185         return ((_cw & FP_PRC_FLD) >> FP_PRC_OFF);
 186 }
 187
 188 static __inline__ fp_prec_t
 189 __fpsetprec(fp_rnd_t _m)
 190 {
 191         unsigned short _cw;
 192         fp_prec_t _p;
 193
 194         __fnstcw(&_cw);
 195         _p = (_cw & FP_PRC_FLD) >> FP_PRC_OFF;
 196         _cw &= ~FP_PRC_FLD;
 197         _cw |= (_m << FP_PRC_OFF) & FP_PRC_FLD;
 198         __fldcw(&_cw);
 199         return (_p);
 200 }
 201
 202 /*
 203  * Look at the exception masks
 204  * Note that x87 masks are inverse of the fp*() functions
 205  * API.  ie: mask = 1 means disable for x87 and SSE, but
 206  * for the fp*() api, mask = 1 means enabled.
 207  */
 208 static __inline__ fp_except_t
 209 __fpgetmask(void)
 210 {
 211         unsigned short _cw;
 212
 213         __fnstcw(&_cw);
 214         return ((~_cw) & FP_MSKS_FLD);
 215 }
 216
 217 static __inline__ fp_except_t
 218 __fpsetmask(fp_except_t _m)
 219 {
 220         unsigned short _cw;
 221         unsigned int _mxcsr;
 222         fp_except_t _p;
 223
 224         __fnstcw(&_cw);
 225         _p = (~_cw) & FP_MSKS_FLD;
 226         _cw &= ~FP_MSKS_FLD;
 227         _cw |= (~_m) & FP_MSKS_FLD;
 228         __fldcw(&_cw);
 229         __stmxcsr(&_mxcsr);
 230         /* XXX should we clear non-ieee SSE_DAZ_FLD and SSE_FZ_FLD ? */
 231         _mxcsr &= ~SSE_MSKS_FLD;
 232         _mxcsr |= ((~_m) << SSE_MSKS_OFF) & SSE_MSKS_FLD;
 233         __ldmxcsr(&_mxcsr);
 234         return (_p);
 235 }
 236
 237 /* See which sticky exceptions are pending, and reset them */
 238 static __inline__ fp_except_t
 239 __fpgetsticky(void)
 240 {
 241         unsigned short _sw;
 242         unsigned int _mxcsr;
 243         fp_except_t _ex;
 244
 245         __fnstsw(&_sw);
 246         _ex = _sw & FP_STKY_FLD;
 247         __stmxcsr(&_mxcsr);
 248         _ex |= _mxcsr & SSE_STKY_FLD;
 249         return (_ex);
 250 }
 251
 252 #endif /* __GNUCLIKE_ASM && __CC_SUPPORTS___INLINE__ && !__cplusplus */
 253
 254 #if !defined(__IEEEFP_NOINLINES__) && !defined(__cplusplus) \
 255     && defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE__)
 256
 257 #define fpgetround()    __fpgetround()
 258 #define fpsetround(_m)  __fpsetround(_m)
 259 #define fpgetprec()     __fpgetprec()
 260 #define fpsetprec(_m)   __fpsetprec(_m)
 261 #define fpgetmask()     __fpgetmask()
 262 #define fpsetmask(_m)   __fpsetmask(_m)
 263 #define fpgetsticky()   __fpgetsticky()
 264
 265 /* Suppress prototypes in the MI header. */
 266 #define _IEEEFP_INLINED_        1
 267
 268 #else /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUCLIKE_ASM
 269          && __CC_SUPPORTS___INLINE__ */
 270
 271 /* Augment the userland declarations */
 272 __BEGIN_DECLS
 273 extern fp_prec_t fpgetprec(void);
 274 extern fp_prec_t fpsetprec(fp_prec_t);
 275 __END_DECLS
 276
 277 #endif /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUCLIKE_ASM
 278           && __CC_SUPPORTS___INLINE__ */
 279
 280 #endif /* !_MACHINE_IEEEFP_H_ */