contrib/llvm/tools/clang/lib/Headers/__clang_cuda_cmath.h

   1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
   2  *
   3  * Permission is hereby granted, free of charge, to any person obtaining a copy
   4  * of this software and associated documentation files (the "Software"), to deal
   5  * in the Software without restriction, including without limitation the rights
   6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   7  * copies of the Software, and to permit persons to whom the Software is
   8  * furnished to do so, subject to the following conditions:
   9  *
  10  * The above copyright notice and this permission notice shall be included in
  11  * all copies or substantial portions of the Software.
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19  * THE SOFTWARE.
  20  *
  21  *===-----------------------------------------------------------------------===
  22  */
  23 #ifndef __CLANG_CUDA_CMATH_H__
  24 #define __CLANG_CUDA_CMATH_H__
  25 #ifndef __CUDA__
  26 #error "This file is for CUDA compilation only."
  27 #endif
  28
  29 #include <limits>
  30
  31 // CUDA lets us use various std math functions on the device side.  This file
  32 // works in concert with __clang_cuda_math_forward_declares.h to make this work.
  33 //
  34 // Specifically, the forward-declares header declares __device__ overloads for
  35 // these functions in the global namespace, then pulls them into namespace std
  36 // with 'using' statements.  Then this file implements those functions, after
  37 // their implementations have been pulled in.
  38 //
  39 // It's important that we declare the functions in the global namespace and pull
  40 // them into namespace std with using statements, as opposed to simply declaring
  41 // these functions in namespace std, because our device functions need to
  42 // overload the standard library functions, which may be declared in the global
  43 // namespace or in std, depending on the degree of conformance of the stdlib
  44 // implementation.  Declaring in the global namespace and pulling into namespace
  45 // std covers all of the known knowns.
  46
  47 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
  48
  49 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
  50 __DEVICE__ long abs(long __n) { return ::labs(__n); }
  51 __DEVICE__ float abs(float __x) { return ::fabsf(__x); }
  52 __DEVICE__ double abs(double __x) { return ::fabs(__x); }
  53 __DEVICE__ float acos(float __x) { return ::acosf(__x); }
  54 __DEVICE__ float asin(float __x) { return ::asinf(__x); }
  55 __DEVICE__ float atan(float __x) { return ::atanf(__x); }
  56 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
  57 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
  58 __DEVICE__ float cos(float __x) { return ::cosf(__x); }
  59 __DEVICE__ float cosh(float __x) { return ::coshf(__x); }
  60 __DEVICE__ float exp(float __x) { return ::expf(__x); }
  61 __DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
  62 __DEVICE__ float floor(float __x) { return ::floorf(__x); }
  63 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
  64 __DEVICE__ int fpclassify(float __x) {
  65   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
  66                               FP_ZERO, __x);
  67 }
  68 __DEVICE__ int fpclassify(double __x) {
  69   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
  70                               FP_ZERO, __x);
  71 }
  72 __DEVICE__ float frexp(float __arg, int *__exp) {
  73   return ::frexpf(__arg, __exp);
  74 }
  75 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
  76 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
  77 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
  78 // For inscrutable reasons, __finite(), the double-precision version of
  79 // __finitef, does not exist when compiling for MacOS.  __isfinited is available
  80 // everywhere and is just as good.
  81 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
  82 __DEVICE__ bool isgreater(float __x, float __y) {
  83   return __builtin_isgreater(__x, __y);
  84 }
  85 __DEVICE__ bool isgreater(double __x, double __y) {
  86   return __builtin_isgreater(__x, __y);
  87 }
  88 __DEVICE__ bool isgreaterequal(float __x, float __y) {
  89   return __builtin_isgreaterequal(__x, __y);
  90 }
  91 __DEVICE__ bool isgreaterequal(double __x, double __y) {
  92   return __builtin_isgreaterequal(__x, __y);
  93 }
  94 __DEVICE__ bool isless(float __x, float __y) {
  95   return __builtin_isless(__x, __y);
  96 }
  97 __DEVICE__ bool isless(double __x, double __y) {
  98   return __builtin_isless(__x, __y);
  99 }
 100 __DEVICE__ bool islessequal(float __x, float __y) {
 101   return __builtin_islessequal(__x, __y);
 102 }
 103 __DEVICE__ bool islessequal(double __x, double __y) {
 104   return __builtin_islessequal(__x, __y);
 105 }
 106 __DEVICE__ bool islessgreater(float __x, float __y) {
 107   return __builtin_islessgreater(__x, __y);
 108 }
 109 __DEVICE__ bool islessgreater(double __x, double __y) {
 110   return __builtin_islessgreater(__x, __y);
 111 }
 112 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
 113 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
 114 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
 115 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
 116 __DEVICE__ bool isunordered(float __x, float __y) {
 117   return __builtin_isunordered(__x, __y);
 118 }
 119 __DEVICE__ bool isunordered(double __x, double __y) {
 120   return __builtin_isunordered(__x, __y);
 121 }
 122 __DEVICE__ float ldexp(float __arg, int __exp) {
 123   return ::ldexpf(__arg, __exp);
 124 }
 125 __DEVICE__ float log(float __x) { return ::logf(__x); }
 126 __DEVICE__ float log10(float __x) { return ::log10f(__x); }
 127 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
 128 __DEVICE__ float nexttoward(float __from, double __to) {
 129   return __builtin_nexttowardf(__from, __to);
 130 }
 131 __DEVICE__ double nexttoward(double __from, double __to) {
 132   return __builtin_nexttoward(__from, __to);
 133 }
 134 __DEVICE__ float nexttowardf(float __from, double __to) {
 135   return __builtin_nexttowardf(__from, __to);
 136 }
 137 __DEVICE__ float pow(float __base, float __exp) {
 138   return ::powf(__base, __exp);
 139 }
 140 __DEVICE__ float pow(float __base, int __iexp) {
 141   return ::powif(__base, __iexp);
 142 }
 143 __DEVICE__ double pow(double __base, int __iexp) {
 144   return ::powi(__base, __iexp);
 145 }
 146 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
 147 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
 148 __DEVICE__ float sin(float __x) { return ::sinf(__x); }
 149 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
 150 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
 151 __DEVICE__ float tan(float __x) { return ::tanf(__x); }
 152 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
 153
 154 // Now we've defined everything we promised we'd define in
 155 // __clang_cuda_math_forward_declares.h.  We need to do two additional things to
 156 // fix up our math functions.
 157 //
 158 // 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
 159 //    only sin(float) and sin(double), which means that e.g. sin(0) is
 160 //    ambiguous.
 161 //
 162 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace
 163 //    std.  These are defined in the CUDA headers in the global namespace,
 164 //    independent of everything else we've done here.
 165
 166 // We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
 167 // we go ahead and unconditionally define functions that are only available when
 168 // compiling for C++11 to match the behavior of the CUDA headers.
 169 template<bool __B, class __T = void>
 170 struct __clang_cuda_enable_if {};
 171
 172 template <class __T> struct __clang_cuda_enable_if<true, __T> {
 173   typedef __T type;
 174 };
 175
 176 // Defines an overload of __fn that accepts one integral argument, calls
 177 // __fn((double)x), and returns __retty.
 178 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)                      \
 179   template <typename __T>                                                      \
 180   __DEVICE__                                                                   \
 181       typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,    \
 182                                       __retty>::type                           \
 183       __fn(__T __x) {                                                          \
 184     return ::__fn((double)__x);                                                \
 185   }
 186
 187 // Defines an overload of __fn that accepts one two arithmetic arguments, calls
 188 // __fn((double)x, (double)y), and returns a double.
 189 //
 190 // Note this is different from OVERLOAD_1, which generates an overload that
 191 // accepts only *integral* arguments.
 192 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)                      \
 193   template <typename __T1, typename __T2>                                      \
 194   __DEVICE__ typename __clang_cuda_enable_if<                                  \
 195       std::numeric_limits<__T1>::is_specialized &&                             \
 196           std::numeric_limits<__T2>::is_specialized,                           \
 197       __retty>::type                                                           \
 198   __fn(__T1 __x, __T2 __y) {                                                   \
 199     return __fn((double)__x, (double)__y);                                     \
 200   }
 201
 202 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
 203 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
 204 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
 205 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
 206 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
 207 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
 208 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
 209 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
 210 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
 211 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
 212 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
 213 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
 214 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
 215 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
 216 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
 217 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
 218 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
 219 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
 220 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
 221 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
 222 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
 223 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
 224 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
 225 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
 226 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
 227 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
 228 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
 229 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
 230 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
 231 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
 232 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
 233 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
 234 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
 235 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
 236 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
 237 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
 238 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
 239 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
 240 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
 241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
 242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
 243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
 244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
 245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
 246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
 247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
 248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
 249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
 250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
 251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
 252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
 253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
 254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
 255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
 256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
 257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
 258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
 259 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
 260 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
 261 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
 262
 263 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
 264 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
 265
 266 // Overloads for functions that don't match the patterns expected by
 267 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
 268 template <typename __T1, typename __T2, typename __T3>
 269 __DEVICE__ typename __clang_cuda_enable_if<
 270     std::numeric_limits<__T1>::is_specialized &&
 271         std::numeric_limits<__T2>::is_specialized &&
 272         std::numeric_limits<__T3>::is_specialized,
 273     double>::type
 274 fma(__T1 __x, __T2 __y, __T3 __z) {
 275   return std::fma((double)__x, (double)__y, (double)__z);
 276 }
 277
 278 template <typename __T>
 279 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 280                                            double>::type
 281 frexp(__T __x, int *__exp) {
 282   return std::frexp((double)__x, __exp);
 283 }
 284
 285 template <typename __T>
 286 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 287                                            double>::type
 288 ldexp(__T __x, int __exp) {
 289   return std::ldexp((double)__x, __exp);
 290 }
 291
 292 template <typename __T>
 293 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 294                                            double>::type
 295 nexttoward(__T __from, double __to) {
 296   return std::nexttoward((double)__from, __to);
 297 }
 298
 299 template <typename __T1, typename __T2>
 300 __DEVICE__ typename __clang_cuda_enable_if<
 301     std::numeric_limits<__T1>::is_specialized &&
 302         std::numeric_limits<__T2>::is_specialized,
 303     double>::type
 304 remquo(__T1 __x, __T2 __y, int *__quo) {
 305   return std::remquo((double)__x, (double)__y, __quo);
 306 }
 307
 308 template <typename __T>
 309 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 310                                            double>::type
 311 scalbln(__T __x, long __exp) {
 312   return std::scalbln((double)__x, __exp);
 313 }
 314
 315 template <typename __T>
 316 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 317                                            double>::type
 318 scalbn(__T __x, int __exp) {
 319   return std::scalbn((double)__x, __exp);
 320 }
 321
 322 // We need to define these overloads in exactly the namespace our standard
 323 // library uses (including the right inline namespace), otherwise they won't be
 324 // picked up by other functions in the standard library (e.g. functions in
 325 // <complex>).  Thus the ugliness below.
 326 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
 327 _LIBCPP_BEGIN_NAMESPACE_STD
 328 #else
 329 namespace std {
 330 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 331 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 332 #endif
 333 #endif
 334
 335 // Pull the new overloads we defined above into namespace std.
 336 using ::acos;
 337 using ::acosh;
 338 using ::asin;
 339 using ::asinh;
 340 using ::atan;
 341 using ::atan2;
 342 using ::atanh;
 343 using ::cbrt;
 344 using ::ceil;
 345 using ::copysign;
 346 using ::cos;
 347 using ::cosh;
 348 using ::erf;
 349 using ::erfc;
 350 using ::exp;
 351 using ::exp2;
 352 using ::expm1;
 353 using ::fabs;
 354 using ::fdim;
 355 using ::floor;
 356 using ::fma;
 357 using ::fmax;
 358 using ::fmin;
 359 using ::fmod;
 360 using ::fpclassify;
 361 using ::frexp;
 362 using ::hypot;
 363 using ::ilogb;
 364 using ::isfinite;
 365 using ::isgreater;
 366 using ::isgreaterequal;
 367 using ::isless;
 368 using ::islessequal;
 369 using ::islessgreater;
 370 using ::isnormal;
 371 using ::isunordered;
 372 using ::ldexp;
 373 using ::lgamma;
 374 using ::llrint;
 375 using ::llround;
 376 using ::log;
 377 using ::log10;
 378 using ::log1p;
 379 using ::log2;
 380 using ::logb;
 381 using ::lrint;
 382 using ::lround;
 383 using ::nearbyint;
 384 using ::nextafter;
 385 using ::nexttoward;
 386 using ::pow;
 387 using ::remainder;
 388 using ::remquo;
 389 using ::rint;
 390 using ::round;
 391 using ::scalbln;
 392 using ::scalbn;
 393 using ::signbit;
 394 using ::sin;
 395 using ::sinh;
 396 using ::sqrt;
 397 using ::tan;
 398 using ::tanh;
 399 using ::tgamma;
 400 using ::trunc;
 401
 402 // Well this is fun: We need to pull these symbols in for libc++, but we can't
 403 // pull them in with libstdc++, because its ::isinf and ::isnan are different
 404 // than its std::isinf and std::isnan.
 405 #ifndef __GLIBCXX__
 406 using ::isinf;
 407 using ::isnan;
 408 #endif
 409
 410 // Finally, pull the "foobarf" functions that CUDA defines in its headers into
 411 // namespace std.
 412 using ::acosf;
 413 using ::acoshf;
 414 using ::asinf;
 415 using ::asinhf;
 416 using ::atan2f;
 417 using ::atanf;
 418 using ::atanhf;
 419 using ::cbrtf;
 420 using ::ceilf;
 421 using ::copysignf;
 422 using ::cosf;
 423 using ::coshf;
 424 using ::erfcf;
 425 using ::erff;
 426 using ::exp2f;
 427 using ::expf;
 428 using ::expm1f;
 429 using ::fabsf;
 430 using ::fdimf;
 431 using ::floorf;
 432 using ::fmaf;
 433 using ::fmaxf;
 434 using ::fminf;
 435 using ::fmodf;
 436 using ::frexpf;
 437 using ::hypotf;
 438 using ::ilogbf;
 439 using ::ldexpf;
 440 using ::lgammaf;
 441 using ::llrintf;
 442 using ::llroundf;
 443 using ::log10f;
 444 using ::log1pf;
 445 using ::log2f;
 446 using ::logbf;
 447 using ::logf;
 448 using ::lrintf;
 449 using ::lroundf;
 450 using ::modff;
 451 using ::nearbyintf;
 452 using ::nextafterf;
 453 using ::nexttowardf;
 454 using ::nexttowardf;
 455 using ::powf;
 456 using ::remainderf;
 457 using ::remquof;
 458 using ::rintf;
 459 using ::roundf;
 460 using ::scalblnf;
 461 using ::scalbnf;
 462 using ::sinf;
 463 using ::sinhf;
 464 using ::sqrtf;
 465 using ::tanf;
 466 using ::tanhf;
 467 using ::tgammaf;
 468 using ::truncf;
 469
 470 #ifdef _LIBCPP_END_NAMESPACE_STD
 471 _LIBCPP_END_NAMESPACE_STD
 472 #else
 473 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 474 _GLIBCXX_END_NAMESPACE_VERSION
 475 #endif
 476 } // namespace std
 477 #endif
 478
 479 #undef __DEVICE__
 480
 481 #endif