contrib/arm-optimized-routines/math/math_config.h

   1 /*
   2  * Configuration for math routines.
   3  *
   4  * Copyright (c) 2017-2020, Arm Limited.
   5  * SPDX-License-Identifier: MIT
   6  */
   7
   8 #ifndef _MATH_CONFIG_H
   9 #define _MATH_CONFIG_H
  10
  11 #include <math.h>
  12 #include <stdint.h>
  13
  14 #ifndef WANT_ROUNDING
  15 /* If defined to 1, return correct results for special cases in non-nearest
  16    rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
  17    This may be set to 0 if there is no fenv support or if math functions only
  18    get called in round to nearest mode.  */
  19 # define WANT_ROUNDING 1
  20 #endif
  21 #ifndef WANT_ERRNO
  22 /* If defined to 1, set errno in math functions according to ISO C.  Many math
  23    libraries do not set errno, so this is 0 by default.  It may need to be
  24    set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
  25 # define WANT_ERRNO 0
  26 #endif
  27 #ifndef WANT_ERRNO_UFLOW
  28 /* Set errno to ERANGE if result underflows to 0 (in all rounding modes).  */
  29 # define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
  30 #endif
  31
  32 /* Compiler can inline round as a single instruction.  */
  33 #ifndef HAVE_FAST_ROUND
  34 # if __aarch64__
  35 #   define HAVE_FAST_ROUND 1
  36 # else
  37 #   define HAVE_FAST_ROUND 0
  38 # endif
  39 #endif
  40
  41 /* Compiler can inline lround, but not (long)round(x).  */
  42 #ifndef HAVE_FAST_LROUND
  43 # if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
  44 #   define HAVE_FAST_LROUND 1
  45 # else
  46 #   define HAVE_FAST_LROUND 0
  47 # endif
  48 #endif
  49
  50 /* Compiler can inline fma as a single instruction.  */
  51 #ifndef HAVE_FAST_FMA
  52 # if defined FP_FAST_FMA || __aarch64__
  53 #   define HAVE_FAST_FMA 1
  54 # else
  55 #   define HAVE_FAST_FMA 0
  56 # endif
  57 #endif
  58
  59 /* Provide *_finite symbols and some of the glibc hidden symbols
  60    so libmathlib can be used with binaries compiled against glibc
  61    to interpose math functions with both static and dynamic linking.  */
  62 #ifndef USE_GLIBC_ABI
  63 # if __GNUC__
  64 #   define USE_GLIBC_ABI 1
  65 # else
  66 #   define USE_GLIBC_ABI 0
  67 # endif
  68 #endif
  69
  70 /* Optionally used extensions.  */
  71 #ifdef __GNUC__
  72 # define HIDDEN __attribute__ ((__visibility__ ("hidden")))
  73 # define NOINLINE __attribute__ ((noinline))
  74 # define UNUSED __attribute__ ((unused))
  75 # define likely(x) __builtin_expect (!!(x), 1)
  76 # define unlikely(x) __builtin_expect (x, 0)
  77 # if __GNUC__ >= 9
  78 #   define attribute_copy(f) __attribute__ ((copy (f)))
  79 # else
  80 #   define attribute_copy(f)
  81 # endif
  82 # define strong_alias(f, a) \
  83   extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
  84 # define hidden_alias(f, a) \
  85   extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
  86   attribute_copy (f);
  87 #else
  88 # define HIDDEN
  89 # define NOINLINE
  90 # define UNUSED
  91 # define likely(x) (x)
  92 # define unlikely(x) (x)
  93 #endif
  94
  95 #if HAVE_FAST_ROUND
  96 /* When set, the roundtoint and converttoint functions are provided with
  97    the semantics documented below.  */
  98 # define TOINT_INTRINSICS 1
  99
 100 /* Round x to nearest int in all rounding modes, ties have to be rounded
 101    consistently with converttoint so the results match.  If the result
 102    would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
 103 static inline double_t
 104 roundtoint (double_t x)
 105 {
 106   return round (x);
 107 }
 108
 109 /* Convert x to nearest int in all rounding modes, ties have to be rounded
 110    consistently with roundtoint.  If the result is not representible in an
 111    int32_t then the semantics is unspecified.  */
 112 static inline int32_t
 113 converttoint (double_t x)
 114 {
 115 # if HAVE_FAST_LROUND
 116   return lround (x);
 117 # else
 118   return (long) round (x);
 119 # endif
 120 }
 121 #endif
 122
 123 static inline uint32_t
 124 asuint (float f)
 125 {
 126   union
 127   {
 128     float f;
 129     uint32_t i;
 130   } u = {f};
 131   return u.i;
 132 }
 133
 134 static inline float
 135 asfloat (uint32_t i)
 136 {
 137   union
 138   {
 139     uint32_t i;
 140     float f;
 141   } u = {i};
 142   return u.f;
 143 }
 144
 145 static inline uint64_t
 146 asuint64 (double f)
 147 {
 148   union
 149   {
 150     double f;
 151     uint64_t i;
 152   } u = {f};
 153   return u.i;
 154 }
 155
 156 static inline double
 157 asdouble (uint64_t i)
 158 {
 159   union
 160   {
 161     uint64_t i;
 162     double f;
 163   } u = {i};
 164   return u.f;
 165 }
 166
 167 #ifndef IEEE_754_2008_SNAN
 168 # define IEEE_754_2008_SNAN 1
 169 #endif
 170 static inline int
 171 issignalingf_inline (float x)
 172 {
 173   uint32_t ix = asuint (x);
 174   if (!IEEE_754_2008_SNAN)
 175     return (ix & 0x7fc00000) == 0x7fc00000;
 176   return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
 177 }
 178
 179 static inline int
 180 issignaling_inline (double x)
 181 {
 182   uint64_t ix = asuint64 (x);
 183   if (!IEEE_754_2008_SNAN)
 184     return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
 185   return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
 186 }
 187
 188 #if __aarch64__ && __GNUC__
 189 /* Prevent the optimization of a floating-point expression.  */
 190 static inline float
 191 opt_barrier_float (float x)
 192 {
 193   __asm__ __volatile__ ("" : "+w" (x));
 194   return x;
 195 }
 196 static inline double
 197 opt_barrier_double (double x)
 198 {
 199   __asm__ __volatile__ ("" : "+w" (x));
 200   return x;
 201 }
 202 /* Force the evaluation of a floating-point expression for its side-effect.  */
 203 static inline void
 204 force_eval_float (float x)
 205 {
 206   __asm__ __volatile__ ("" : "+w" (x));
 207 }
 208 static inline void
 209 force_eval_double (double x)
 210 {
 211   __asm__ __volatile__ ("" : "+w" (x));
 212 }
 213 #else
 214 static inline float
 215 opt_barrier_float (float x)
 216 {
 217   volatile float y = x;
 218   return y;
 219 }
 220 static inline double
 221 opt_barrier_double (double x)
 222 {
 223   volatile double y = x;
 224   return y;
 225 }
 226 static inline void
 227 force_eval_float (float x)
 228 {
 229   volatile float y UNUSED = x;
 230 }
 231 static inline void
 232 force_eval_double (double x)
 233 {
 234   volatile double y UNUSED = x;
 235 }
 236 #endif
 237
 238 /* Evaluate an expression as the specified type, normally a type
 239    cast should be enough, but compilers implement non-standard
 240    excess-precision handling, so when FLT_EVAL_METHOD != 0 then
 241    these functions may need to be customized.  */
 242 static inline float
 243 eval_as_float (float x)
 244 {
 245   return x;
 246 }
 247 static inline double
 248 eval_as_double (double x)
 249 {
 250   return x;
 251 }
 252
 253 /* Error handling tail calls for special cases, with a sign argument.
 254    The sign of the return value is set if the argument is non-zero.  */
 255
 256 /* The result overflows.  */
 257 HIDDEN float __math_oflowf (uint32_t);
 258 /* The result underflows to 0 in nearest rounding mode.  */
 259 HIDDEN float __math_uflowf (uint32_t);
 260 /* The result underflows to 0 in some directed rounding mode only.  */
 261 HIDDEN float __math_may_uflowf (uint32_t);
 262 /* Division by zero.  */
 263 HIDDEN float __math_divzerof (uint32_t);
 264 /* The result overflows.  */
 265 HIDDEN double __math_oflow (uint32_t);
 266 /* The result underflows to 0 in nearest rounding mode.  */
 267 HIDDEN double __math_uflow (uint32_t);
 268 /* The result underflows to 0 in some directed rounding mode only.  */
 269 HIDDEN double __math_may_uflow (uint32_t);
 270 /* Division by zero.  */
 271 HIDDEN double __math_divzero (uint32_t);
 272
 273 /* Error handling using input checking.  */
 274
 275 /* Invalid input unless it is a quiet NaN.  */
 276 HIDDEN float __math_invalidf (float);
 277 /* Invalid input unless it is a quiet NaN.  */
 278 HIDDEN double __math_invalid (double);
 279
 280 /* Error handling using output checking, only for errno setting.  */
 281
 282 /* Check if the result overflowed to infinity.  */
 283 HIDDEN double __math_check_oflow (double);
 284 /* Check if the result underflowed to 0.  */
 285 HIDDEN double __math_check_uflow (double);
 286
 287 /* Check if the result overflowed to infinity.  */
 288 static inline double
 289 check_oflow (double x)
 290 {
 291   return WANT_ERRNO ? __math_check_oflow (x) : x;
 292 }
 293
 294 /* Check if the result underflowed to 0.  */
 295 static inline double
 296 check_uflow (double x)
 297 {
 298   return WANT_ERRNO ? __math_check_uflow (x) : x;
 299 }
 300
 301 /* Check if the result overflowed to infinity.  */
 302 HIDDEN float __math_check_oflowf (float);
 303 /* Check if the result underflowed to 0.  */
 304 HIDDEN float __math_check_uflowf (float);
 305
 306 /* Check if the result overflowed to infinity.  */
 307 static inline float
 308 check_oflowf (float x)
 309 {
 310   return WANT_ERRNO ? __math_check_oflowf (x) : x;
 311 }
 312
 313 /* Check if the result underflowed to 0.  */
 314 static inline float
 315 check_uflowf (float x)
 316 {
 317   return WANT_ERRNO ? __math_check_uflowf (x) : x;
 318 }
 319
 320 /* Shared between expf, exp2f and powf.  */
 321 #define EXP2F_TABLE_BITS 5
 322 #define EXP2F_POLY_ORDER 3
 323 extern const struct exp2f_data
 324 {
 325   uint64_t tab[1 << EXP2F_TABLE_BITS];
 326   double shift_scaled;
 327   double poly[EXP2F_POLY_ORDER];
 328   double shift;
 329   double invln2_scaled;
 330   double poly_scaled[EXP2F_POLY_ORDER];
 331 } __exp2f_data HIDDEN;
 332
 333 #define LOGF_TABLE_BITS 4
 334 #define LOGF_POLY_ORDER 4
 335 extern const struct logf_data
 336 {
 337   struct
 338   {
 339     double invc, logc;
 340   } tab[1 << LOGF_TABLE_BITS];
 341   double ln2;
 342   double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
 343 } __logf_data HIDDEN;
 344
 345 #define LOG2F_TABLE_BITS 4
 346 #define LOG2F_POLY_ORDER 4
 347 extern const struct log2f_data
 348 {
 349   struct
 350   {
 351     double invc, logc;
 352   } tab[1 << LOG2F_TABLE_BITS];
 353   double poly[LOG2F_POLY_ORDER];
 354 } __log2f_data HIDDEN;
 355
 356 #define POWF_LOG2_TABLE_BITS 4
 357 #define POWF_LOG2_POLY_ORDER 5
 358 #if TOINT_INTRINSICS
 359 # define POWF_SCALE_BITS EXP2F_TABLE_BITS
 360 #else
 361 # define POWF_SCALE_BITS 0
 362 #endif
 363 #define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
 364 extern const struct powf_log2_data
 365 {
 366   struct
 367   {
 368     double invc, logc;
 369   } tab[1 << POWF_LOG2_TABLE_BITS];
 370   double poly[POWF_LOG2_POLY_ORDER];
 371 } __powf_log2_data HIDDEN;
 372
 373
 374 #define EXP_TABLE_BITS 7
 375 #define EXP_POLY_ORDER 5
 376 /* Use polynomial that is optimized for a wider input range.  This may be
 377    needed for good precision in non-nearest rounding and !TOINT_INTRINSICS.  */
 378 #define EXP_POLY_WIDE 0
 379 /* Use close to nearest rounding toint when !TOINT_INTRINSICS.  This may be
 380    needed for good precision in non-nearest rouning and !EXP_POLY_WIDE.  */
 381 #define EXP_USE_TOINT_NARROW 0
 382 #define EXP2_POLY_ORDER 5
 383 #define EXP2_POLY_WIDE 0
 384 extern const struct exp_data
 385 {
 386   double invln2N;
 387   double shift;
 388   double negln2hiN;
 389   double negln2loN;
 390   double poly[4]; /* Last four coefficients.  */
 391   double exp2_shift;
 392   double exp2_poly[EXP2_POLY_ORDER];
 393   uint64_t tab[2*(1 << EXP_TABLE_BITS)];
 394 } __exp_data HIDDEN;
 395
 396 #define LOG_TABLE_BITS 7
 397 #define LOG_POLY_ORDER 6
 398 #define LOG_POLY1_ORDER 12
 399 extern const struct log_data
 400 {
 401   double ln2hi;
 402   double ln2lo;
 403   double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
 404   double poly1[LOG_POLY1_ORDER - 1];
 405   struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
 406 #if !HAVE_FAST_FMA
 407   struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
 408 #endif
 409 } __log_data HIDDEN;
 410
 411 #define LOG2_TABLE_BITS 6
 412 #define LOG2_POLY_ORDER 7
 413 #define LOG2_POLY1_ORDER 11
 414 extern const struct log2_data
 415 {
 416   double invln2hi;
 417   double invln2lo;
 418   double poly[LOG2_POLY_ORDER - 1];
 419   double poly1[LOG2_POLY1_ORDER - 1];
 420   struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
 421 #if !HAVE_FAST_FMA
 422   struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
 423 #endif
 424 } __log2_data HIDDEN;
 425
 426 #define POW_LOG_TABLE_BITS 7
 427 #define POW_LOG_POLY_ORDER 8
 428 extern const struct pow_log_data
 429 {
 430   double ln2hi;
 431   double ln2lo;
 432   double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
 433   /* Note: the pad field is unused, but allows slightly faster indexing.  */
 434   struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
 435 } __pow_log_data HIDDEN;
 436
 437 extern const struct erff_data
 438 {
 439   float erff_poly_A[6];
 440   float erff_poly_B[7];
 441 } __erff_data HIDDEN;
 442
 443 #define ERF_POLY_A_ORDER 19
 444 #define ERF_POLY_A_NCOEFFS 10
 445 #define ERFC_POLY_C_NCOEFFS 16
 446 #define ERFC_POLY_D_NCOEFFS 18
 447 #define ERFC_POLY_E_NCOEFFS 14
 448 #define ERFC_POLY_F_NCOEFFS 17
 449 extern const struct erf_data
 450 {
 451   double erf_poly_A[ERF_POLY_A_NCOEFFS];
 452   double erf_ratio_N_A[5];
 453   double erf_ratio_D_A[5];
 454   double erf_ratio_N_B[7];
 455   double erf_ratio_D_B[6];
 456   double erfc_poly_C[ERFC_POLY_C_NCOEFFS];
 457   double erfc_poly_D[ERFC_POLY_D_NCOEFFS];
 458   double erfc_poly_E[ERFC_POLY_E_NCOEFFS];
 459   double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
 460 } __erf_data HIDDEN;
 461
 462 #endif