contrib/llvm/tools/clang/lib/Headers/__clang_cuda_cmath.h

   1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
   2  *
   3  * Permission is hereby granted, free of charge, to any person obtaining a copy
   4  * of this software and associated documentation files (the "Software"), to deal
   5  * in the Software without restriction, including without limitation the rights
   6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   7  * copies of the Software, and to permit persons to whom the Software is
   8  * furnished to do so, subject to the following conditions:
   9  *
  10  * The above copyright notice and this permission notice shall be included in
  11  * all copies or substantial portions of the Software.
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19  * THE SOFTWARE.
  20  *
  21  *===-----------------------------------------------------------------------===
  22  */
  23 #ifndef __CLANG_CUDA_CMATH_H__
  24 #define __CLANG_CUDA_CMATH_H__
  25 #ifndef __CUDA__
  26 #error "This file is for CUDA compilation only."
  27 #endif
  28
  29 #include <limits>
  30
  31 // CUDA lets us use various std math functions on the device side.  This file
  32 // works in concert with __clang_cuda_math_forward_declares.h to make this work.
  33 //
  34 // Specifically, the forward-declares header declares __device__ overloads for
  35 // these functions in the global namespace, then pulls them into namespace std
  36 // with 'using' statements.  Then this file implements those functions, after
  37 // their implementations have been pulled in.
  38 //
  39 // It's important that we declare the functions in the global namespace and pull
  40 // them into namespace std with using statements, as opposed to simply declaring
  41 // these functions in namespace std, because our device functions need to
  42 // overload the standard library functions, which may be declared in the global
  43 // namespace or in std, depending on the degree of conformance of the stdlib
  44 // implementation.  Declaring in the global namespace and pulling into namespace
  45 // std covers all of the known knowns.
  46
  47 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
  48
  49 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
  50 __DEVICE__ long abs(long __n) { return ::labs(__n); }
  51 __DEVICE__ float abs(float __x) { return ::fabsf(__x); }
  52 __DEVICE__ double abs(double __x) { return ::fabs(__x); }
  53 __DEVICE__ float acos(float __x) { return ::acosf(__x); }
  54 __DEVICE__ float asin(float __x) { return ::asinf(__x); }
  55 __DEVICE__ float atan(float __x) { return ::atanf(__x); }
  56 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
  57 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
  58 __DEVICE__ float cos(float __x) { return ::cosf(__x); }
  59 __DEVICE__ float cosh(float __x) { return ::coshf(__x); }
  60 __DEVICE__ float exp(float __x) { return ::expf(__x); }
  61 __DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
  62 __DEVICE__ float floor(float __x) { return ::floorf(__x); }
  63 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
  64 __DEVICE__ int fpclassify(float __x) {
  65   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
  66                               FP_ZERO, __x);
  67 }
  68 __DEVICE__ int fpclassify(double __x) {
  69   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
  70                               FP_ZERO, __x);
  71 }
  72 __DEVICE__ float frexp(float __arg, int *__exp) {
  73   return ::frexpf(__arg, __exp);
  74 }
  75
  76 // For inscrutable reasons, the CUDA headers define these functions for us on
  77 // Windows.
  78 #ifndef _MSC_VER
  79 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
  80 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
  81 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
  82 // For inscrutable reasons, __finite(), the double-precision version of
  83 // __finitef, does not exist when compiling for MacOS.  __isfinited is available
  84 // everywhere and is just as good.
  85 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
  86 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
  87 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
  88 #endif
  89
  90 __DEVICE__ bool isgreater(float __x, float __y) {
  91   return __builtin_isgreater(__x, __y);
  92 }
  93 __DEVICE__ bool isgreater(double __x, double __y) {
  94   return __builtin_isgreater(__x, __y);
  95 }
  96 __DEVICE__ bool isgreaterequal(float __x, float __y) {
  97   return __builtin_isgreaterequal(__x, __y);
  98 }
  99 __DEVICE__ bool isgreaterequal(double __x, double __y) {
 100   return __builtin_isgreaterequal(__x, __y);
 101 }
 102 __DEVICE__ bool isless(float __x, float __y) {
 103   return __builtin_isless(__x, __y);
 104 }
 105 __DEVICE__ bool isless(double __x, double __y) {
 106   return __builtin_isless(__x, __y);
 107 }
 108 __DEVICE__ bool islessequal(float __x, float __y) {
 109   return __builtin_islessequal(__x, __y);
 110 }
 111 __DEVICE__ bool islessequal(double __x, double __y) {
 112   return __builtin_islessequal(__x, __y);
 113 }
 114 __DEVICE__ bool islessgreater(float __x, float __y) {
 115   return __builtin_islessgreater(__x, __y);
 116 }
 117 __DEVICE__ bool islessgreater(double __x, double __y) {
 118   return __builtin_islessgreater(__x, __y);
 119 }
 120 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
 121 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
 122 __DEVICE__ bool isunordered(float __x, float __y) {
 123   return __builtin_isunordered(__x, __y);
 124 }
 125 __DEVICE__ bool isunordered(double __x, double __y) {
 126   return __builtin_isunordered(__x, __y);
 127 }
 128 __DEVICE__ float ldexp(float __arg, int __exp) {
 129   return ::ldexpf(__arg, __exp);
 130 }
 131 __DEVICE__ float log(float __x) { return ::logf(__x); }
 132 __DEVICE__ float log10(float __x) { return ::log10f(__x); }
 133 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
 134 __DEVICE__ float nexttoward(float __from, double __to) {
 135   return __builtin_nexttowardf(__from, __to);
 136 }
 137 __DEVICE__ double nexttoward(double __from, double __to) {
 138   return __builtin_nexttoward(__from, __to);
 139 }
 140 __DEVICE__ float nexttowardf(float __from, double __to) {
 141   return __builtin_nexttowardf(__from, __to);
 142 }
 143 __DEVICE__ float pow(float __base, float __exp) {
 144   return ::powf(__base, __exp);
 145 }
 146 __DEVICE__ float pow(float __base, int __iexp) {
 147   return ::powif(__base, __iexp);
 148 }
 149 __DEVICE__ double pow(double __base, int __iexp) {
 150   return ::powi(__base, __iexp);
 151 }
 152 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
 153 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
 154 __DEVICE__ float sin(float __x) { return ::sinf(__x); }
 155 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
 156 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
 157 __DEVICE__ float tan(float __x) { return ::tanf(__x); }
 158 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
 159
 160 // Now we've defined everything we promised we'd define in
 161 // __clang_cuda_math_forward_declares.h.  We need to do two additional things to
 162 // fix up our math functions.
 163 //
 164 // 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
 165 //    only sin(float) and sin(double), which means that e.g. sin(0) is
 166 //    ambiguous.
 167 //
 168 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace
 169 //    std.  These are defined in the CUDA headers in the global namespace,
 170 //    independent of everything else we've done here.
 171
 172 // We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
 173 // we go ahead and unconditionally define functions that are only available when
 174 // compiling for C++11 to match the behavior of the CUDA headers.
 175 template<bool __B, class __T = void>
 176 struct __clang_cuda_enable_if {};
 177
 178 template <class __T> struct __clang_cuda_enable_if<true, __T> {
 179   typedef __T type;
 180 };
 181
 182 // Defines an overload of __fn that accepts one integral argument, calls
 183 // __fn((double)x), and returns __retty.
 184 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)                      \
 185   template <typename __T>                                                      \
 186   __DEVICE__                                                                   \
 187       typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,    \
 188                                       __retty>::type                           \
 189       __fn(__T __x) {                                                          \
 190     return ::__fn((double)__x);                                                \
 191   }
 192
 193 // Defines an overload of __fn that accepts one two arithmetic arguments, calls
 194 // __fn((double)x, (double)y), and returns a double.
 195 //
 196 // Note this is different from OVERLOAD_1, which generates an overload that
 197 // accepts only *integral* arguments.
 198 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)                      \
 199   template <typename __T1, typename __T2>                                      \
 200   __DEVICE__ typename __clang_cuda_enable_if<                                  \
 201       std::numeric_limits<__T1>::is_specialized &&                             \
 202           std::numeric_limits<__T2>::is_specialized,                           \
 203       __retty>::type                                                           \
 204   __fn(__T1 __x, __T2 __y) {                                                   \
 205     return __fn((double)__x, (double)__y);                                     \
 206   }
 207
 208 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
 209 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
 210 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
 211 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
 212 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
 213 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
 214 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
 215 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
 216 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
 217 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
 218 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
 219 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
 220 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
 221 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
 222 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
 223 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
 224 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
 225 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
 226 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
 227 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
 228 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
 229 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
 230 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
 231 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
 232 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
 233 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
 234 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
 235 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
 236 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
 237 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
 238 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
 239 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
 240 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
 241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
 242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
 243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
 244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
 245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
 246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
 247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
 248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
 249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
 250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
 251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
 252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
 253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
 254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
 255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
 256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
 257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
 258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
 259 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
 260 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
 261 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
 262 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
 263 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
 264 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
 265 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
 266 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
 267 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
 268
 269 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
 270 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
 271
 272 // Overloads for functions that don't match the patterns expected by
 273 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
 274 template <typename __T1, typename __T2, typename __T3>
 275 __DEVICE__ typename __clang_cuda_enable_if<
 276     std::numeric_limits<__T1>::is_specialized &&
 277         std::numeric_limits<__T2>::is_specialized &&
 278         std::numeric_limits<__T3>::is_specialized,
 279     double>::type
 280 fma(__T1 __x, __T2 __y, __T3 __z) {
 281   return std::fma((double)__x, (double)__y, (double)__z);
 282 }
 283
 284 template <typename __T>
 285 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 286                                            double>::type
 287 frexp(__T __x, int *__exp) {
 288   return std::frexp((double)__x, __exp);
 289 }
 290
 291 template <typename __T>
 292 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 293                                            double>::type
 294 ldexp(__T __x, int __exp) {
 295   return std::ldexp((double)__x, __exp);
 296 }
 297
 298 template <typename __T>
 299 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 300                                            double>::type
 301 nexttoward(__T __from, double __to) {
 302   return std::nexttoward((double)__from, __to);
 303 }
 304
 305 template <typename __T1, typename __T2>
 306 __DEVICE__ typename __clang_cuda_enable_if<
 307     std::numeric_limits<__T1>::is_specialized &&
 308         std::numeric_limits<__T2>::is_specialized,
 309     double>::type
 310 remquo(__T1 __x, __T2 __y, int *__quo) {
 311   return std::remquo((double)__x, (double)__y, __quo);
 312 }
 313
 314 template <typename __T>
 315 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 316                                            double>::type
 317 scalbln(__T __x, long __exp) {
 318   return std::scalbln((double)__x, __exp);
 319 }
 320
 321 template <typename __T>
 322 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 323                                            double>::type
 324 scalbn(__T __x, int __exp) {
 325   return std::scalbn((double)__x, __exp);
 326 }
 327
 328 // We need to define these overloads in exactly the namespace our standard
 329 // library uses (including the right inline namespace), otherwise they won't be
 330 // picked up by other functions in the standard library (e.g. functions in
 331 // <complex>).  Thus the ugliness below.
 332 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
 333 _LIBCPP_BEGIN_NAMESPACE_STD
 334 #else
 335 namespace std {
 336 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 337 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 338 #endif
 339 #endif
 340
 341 // Pull the new overloads we defined above into namespace std.
 342 using ::acos;
 343 using ::acosh;
 344 using ::asin;
 345 using ::asinh;
 346 using ::atan;
 347 using ::atan2;
 348 using ::atanh;
 349 using ::cbrt;
 350 using ::ceil;
 351 using ::copysign;
 352 using ::cos;
 353 using ::cosh;
 354 using ::erf;
 355 using ::erfc;
 356 using ::exp;
 357 using ::exp2;
 358 using ::expm1;
 359 using ::fabs;
 360 using ::fdim;
 361 using ::floor;
 362 using ::fma;
 363 using ::fmax;
 364 using ::fmin;
 365 using ::fmod;
 366 using ::fpclassify;
 367 using ::frexp;
 368 using ::hypot;
 369 using ::ilogb;
 370 using ::isfinite;
 371 using ::isgreater;
 372 using ::isgreaterequal;
 373 using ::isless;
 374 using ::islessequal;
 375 using ::islessgreater;
 376 using ::isnormal;
 377 using ::isunordered;
 378 using ::ldexp;
 379 using ::lgamma;
 380 using ::llrint;
 381 using ::llround;
 382 using ::log;
 383 using ::log10;
 384 using ::log1p;
 385 using ::log2;
 386 using ::logb;
 387 using ::lrint;
 388 using ::lround;
 389 using ::nearbyint;
 390 using ::nextafter;
 391 using ::nexttoward;
 392 using ::pow;
 393 using ::remainder;
 394 using ::remquo;
 395 using ::rint;
 396 using ::round;
 397 using ::scalbln;
 398 using ::scalbn;
 399 using ::signbit;
 400 using ::sin;
 401 using ::sinh;
 402 using ::sqrt;
 403 using ::tan;
 404 using ::tanh;
 405 using ::tgamma;
 406 using ::trunc;
 407
 408 // Well this is fun: We need to pull these symbols in for libc++, but we can't
 409 // pull them in with libstdc++, because its ::isinf and ::isnan are different
 410 // than its std::isinf and std::isnan.
 411 #ifndef __GLIBCXX__
 412 using ::isinf;
 413 using ::isnan;
 414 #endif
 415
 416 // Finally, pull the "foobarf" functions that CUDA defines in its headers into
 417 // namespace std.
 418 using ::acosf;
 419 using ::acoshf;
 420 using ::asinf;
 421 using ::asinhf;
 422 using ::atan2f;
 423 using ::atanf;
 424 using ::atanhf;
 425 using ::cbrtf;
 426 using ::ceilf;
 427 using ::copysignf;
 428 using ::cosf;
 429 using ::coshf;
 430 using ::erfcf;
 431 using ::erff;
 432 using ::exp2f;
 433 using ::expf;
 434 using ::expm1f;
 435 using ::fabsf;
 436 using ::fdimf;
 437 using ::floorf;
 438 using ::fmaf;
 439 using ::fmaxf;
 440 using ::fminf;
 441 using ::fmodf;
 442 using ::frexpf;
 443 using ::hypotf;
 444 using ::ilogbf;
 445 using ::ldexpf;
 446 using ::lgammaf;
 447 using ::llrintf;
 448 using ::llroundf;
 449 using ::log10f;
 450 using ::log1pf;
 451 using ::log2f;
 452 using ::logbf;
 453 using ::logf;
 454 using ::lrintf;
 455 using ::lroundf;
 456 using ::modff;
 457 using ::nearbyintf;
 458 using ::nextafterf;
 459 using ::nexttowardf;
 460 using ::nexttowardf;
 461 using ::powf;
 462 using ::remainderf;
 463 using ::remquof;
 464 using ::rintf;
 465 using ::roundf;
 466 using ::scalblnf;
 467 using ::scalbnf;
 468 using ::sinf;
 469 using ::sinhf;
 470 using ::sqrtf;
 471 using ::tanf;
 472 using ::tanhf;
 473 using ::tgammaf;
 474 using ::truncf;
 475
 476 #ifdef _LIBCPP_END_NAMESPACE_STD
 477 _LIBCPP_END_NAMESPACE_STD
 478 #else
 479 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 480 _GLIBCXX_END_NAMESPACE_VERSION
 481 #endif
 482 } // namespace std
 483 #endif
 484
 485 #undef __DEVICE__
 486
 487 #endif