contrib/llvm-project/clang/lib/Headers/__clang_cuda_cmath.h

   1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
   2  *
   3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4  * See https://llvm.org/LICENSE.txt for license information.
   5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6  *
   7  *===-----------------------------------------------------------------------===
   8  */
   9 #ifndef __CLANG_CUDA_CMATH_H__
  10 #define __CLANG_CUDA_CMATH_H__
  11 #ifndef __CUDA__
  12 #error "This file is for CUDA compilation only."
  13 #endif
  14
  15 #include <limits>
  16
  17 // CUDA lets us use various std math functions on the device side.  This file
  18 // works in concert with __clang_cuda_math_forward_declares.h to make this work.
  19 //
  20 // Specifically, the forward-declares header declares __device__ overloads for
  21 // these functions in the global namespace, then pulls them into namespace std
  22 // with 'using' statements.  Then this file implements those functions, after
  23 // their implementations have been pulled in.
  24 //
  25 // It's important that we declare the functions in the global namespace and pull
  26 // them into namespace std with using statements, as opposed to simply declaring
  27 // these functions in namespace std, because our device functions need to
  28 // overload the standard library functions, which may be declared in the global
  29 // namespace or in std, depending on the degree of conformance of the stdlib
  30 // implementation.  Declaring in the global namespace and pulling into namespace
  31 // std covers all of the known knowns.
  32
  33 #ifdef _OPENMP
  34 #define __DEVICE__ static __attribute__((always_inline))
  35 #else
  36 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
  37 #endif
  38
  39 // For C++ 17 we need to include noexcept attribute to be compatible
  40 // with the header-defined version. This may be removed once
  41 // variant is supported.
  42 #if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
  43 #define __NOEXCEPT noexcept
  44 #else
  45 #define __NOEXCEPT
  46 #endif
  47
  48 #if !(defined(_OPENMP) && defined(__cplusplus))
  49 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
  50 __DEVICE__ long abs(long __n) { return ::labs(__n); }
  51 __DEVICE__ float abs(float __x) { return ::fabsf(__x); }
  52 __DEVICE__ double abs(double __x) { return ::fabs(__x); }
  53 #endif
  54 // TODO: remove once variat is supported.
  55 #if defined(_OPENMP) && defined(__cplusplus)
  56 __DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); }
  57 __DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); }
  58 #endif
  59 __DEVICE__ float acos(float __x) { return ::acosf(__x); }
  60 __DEVICE__ float asin(float __x) { return ::asinf(__x); }
  61 __DEVICE__ float atan(float __x) { return ::atanf(__x); }
  62 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
  63 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
  64 __DEVICE__ float cos(float __x) { return ::cosf(__x); }
  65 __DEVICE__ float cosh(float __x) { return ::coshf(__x); }
  66 __DEVICE__ float exp(float __x) { return ::expf(__x); }
  67 __DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); }
  68 __DEVICE__ float floor(float __x) { return ::floorf(__x); }
  69 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
  70 // TODO: remove when variant is supported
  71 #ifndef _OPENMP
  72 __DEVICE__ int fpclassify(float __x) {
  73   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
  74                               FP_ZERO, __x);
  75 }
  76 __DEVICE__ int fpclassify(double __x) {
  77   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
  78                               FP_ZERO, __x);
  79 }
  80 #endif
  81 __DEVICE__ float frexp(float __arg, int *__exp) {
  82   return ::frexpf(__arg, __exp);
  83 }
  84
  85 // For inscrutable reasons, the CUDA headers define these functions for us on
  86 // Windows.
  87 #ifndef _MSC_VER
  88 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
  89 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
  90 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
  91 // For inscrutable reasons, __finite(), the double-precision version of
  92 // __finitef, does not exist when compiling for MacOS.  __isfinited is available
  93 // everywhere and is just as good.
  94 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
  95 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
  96 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
  97 #endif
  98
  99 __DEVICE__ bool isgreater(float __x, float __y) {
 100   return __builtin_isgreater(__x, __y);
 101 }
 102 __DEVICE__ bool isgreater(double __x, double __y) {
 103   return __builtin_isgreater(__x, __y);
 104 }
 105 __DEVICE__ bool isgreaterequal(float __x, float __y) {
 106   return __builtin_isgreaterequal(__x, __y);
 107 }
 108 __DEVICE__ bool isgreaterequal(double __x, double __y) {
 109   return __builtin_isgreaterequal(__x, __y);
 110 }
 111 __DEVICE__ bool isless(float __x, float __y) {
 112   return __builtin_isless(__x, __y);
 113 }
 114 __DEVICE__ bool isless(double __x, double __y) {
 115   return __builtin_isless(__x, __y);
 116 }
 117 __DEVICE__ bool islessequal(float __x, float __y) {
 118   return __builtin_islessequal(__x, __y);
 119 }
 120 __DEVICE__ bool islessequal(double __x, double __y) {
 121   return __builtin_islessequal(__x, __y);
 122 }
 123 __DEVICE__ bool islessgreater(float __x, float __y) {
 124   return __builtin_islessgreater(__x, __y);
 125 }
 126 __DEVICE__ bool islessgreater(double __x, double __y) {
 127   return __builtin_islessgreater(__x, __y);
 128 }
 129 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
 130 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
 131 __DEVICE__ bool isunordered(float __x, float __y) {
 132   return __builtin_isunordered(__x, __y);
 133 }
 134 __DEVICE__ bool isunordered(double __x, double __y) {
 135   return __builtin_isunordered(__x, __y);
 136 }
 137 __DEVICE__ float ldexp(float __arg, int __exp) {
 138   return ::ldexpf(__arg, __exp);
 139 }
 140 __DEVICE__ float log(float __x) { return ::logf(__x); }
 141 __DEVICE__ float log10(float __x) { return ::log10f(__x); }
 142 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
 143 __DEVICE__ float pow(float __base, float __exp) {
 144   return ::powf(__base, __exp);
 145 }
 146 __DEVICE__ float pow(float __base, int __iexp) {
 147   return ::powif(__base, __iexp);
 148 }
 149 __DEVICE__ double pow(double __base, int __iexp) {
 150   return ::powi(__base, __iexp);
 151 }
 152 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
 153 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
 154 __DEVICE__ float sin(float __x) { return ::sinf(__x); }
 155 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
 156 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
 157 __DEVICE__ float tan(float __x) { return ::tanf(__x); }
 158 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
 159
 160 // Notably missing above is nexttoward.  We omit it because
 161 // libdevice doesn't provide an implementation, and we don't want to be in the
 162 // business of implementing tricky libm functions in this header.
 163
 164 // Now we've defined everything we promised we'd define in
 165 // __clang_cuda_math_forward_declares.h.  We need to do two additional things to
 166 // fix up our math functions.
 167 //
 168 // 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
 169 //    only sin(float) and sin(double), which means that e.g. sin(0) is
 170 //    ambiguous.
 171 //
 172 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace
 173 //    std.  These are defined in the CUDA headers in the global namespace,
 174 //    independent of everything else we've done here.
 175
 176 // We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
 177 // we go ahead and unconditionally define functions that are only available when
 178 // compiling for C++11 to match the behavior of the CUDA headers.
 179 template<bool __B, class __T = void>
 180 struct __clang_cuda_enable_if {};
 181
 182 template <class __T> struct __clang_cuda_enable_if<true, __T> {
 183   typedef __T type;
 184 };
 185
 186 // Defines an overload of __fn that accepts one integral argument, calls
 187 // __fn((double)x), and returns __retty.
 188 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)                      \
 189   template <typename __T>                                                      \
 190   __DEVICE__                                                                   \
 191       typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,    \
 192                                       __retty>::type                           \
 193       __fn(__T __x) {                                                          \
 194     return ::__fn((double)__x);                                                \
 195   }
 196
 197 // Defines an overload of __fn that accepts one two arithmetic arguments, calls
 198 // __fn((double)x, (double)y), and returns a double.
 199 //
 200 // Note this is different from OVERLOAD_1, which generates an overload that
 201 // accepts only *integral* arguments.
 202 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)                      \
 203   template <typename __T1, typename __T2>                                      \
 204   __DEVICE__ typename __clang_cuda_enable_if<                                  \
 205       std::numeric_limits<__T1>::is_specialized &&                             \
 206           std::numeric_limits<__T2>::is_specialized,                           \
 207       __retty>::type                                                           \
 208   __fn(__T1 __x, __T2 __y) {                                                   \
 209     return __fn((double)__x, (double)__y);                                     \
 210   }
 211
 212 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
 213 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
 214 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
 215 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
 216 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
 217 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
 218 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
 219 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
 220 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
 221 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
 222 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
 223 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
 224 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
 225 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
 226 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
 227 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
 228 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
 229 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
 230 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
 231 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
 232 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
 233 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
 234 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
 235 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
 236 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
 237 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
 238 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
 239 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
 240 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
 241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
 242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
 243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
 244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
 245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
 246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
 247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
 248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
 249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
 250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
 251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
 252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
 253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
 254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
 255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
 256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
 257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
 258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
 259 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
 260 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
 261 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
 262 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
 263 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
 264 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
 265 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
 266 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
 267 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
 268 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
 269 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
 270 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
 271 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
 272
 273 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
 274 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
 275
 276 // Overloads for functions that don't match the patterns expected by
 277 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
 278 template <typename __T1, typename __T2, typename __T3>
 279 __DEVICE__ typename __clang_cuda_enable_if<
 280     std::numeric_limits<__T1>::is_specialized &&
 281         std::numeric_limits<__T2>::is_specialized &&
 282         std::numeric_limits<__T3>::is_specialized,
 283     double>::type
 284 fma(__T1 __x, __T2 __y, __T3 __z) {
 285   return std::fma((double)__x, (double)__y, (double)__z);
 286 }
 287
 288 template <typename __T>
 289 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 290                                            double>::type
 291 frexp(__T __x, int *__exp) {
 292   return std::frexp((double)__x, __exp);
 293 }
 294
 295 template <typename __T>
 296 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 297                                            double>::type
 298 ldexp(__T __x, int __exp) {
 299   return std::ldexp((double)__x, __exp);
 300 }
 301
 302 template <typename __T1, typename __T2>
 303 __DEVICE__ typename __clang_cuda_enable_if<
 304     std::numeric_limits<__T1>::is_specialized &&
 305         std::numeric_limits<__T2>::is_specialized,
 306     double>::type
 307 remquo(__T1 __x, __T2 __y, int *__quo) {
 308   return std::remquo((double)__x, (double)__y, __quo);
 309 }
 310
 311 template <typename __T>
 312 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 313                                            double>::type
 314 scalbln(__T __x, long __exp) {
 315   return std::scalbln((double)__x, __exp);
 316 }
 317
 318 template <typename __T>
 319 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 320                                            double>::type
 321 scalbn(__T __x, int __exp) {
 322   return std::scalbn((double)__x, __exp);
 323 }
 324
 325 // We need to define these overloads in exactly the namespace our standard
 326 // library uses (including the right inline namespace), otherwise they won't be
 327 // picked up by other functions in the standard library (e.g. functions in
 328 // <complex>).  Thus the ugliness below.
 329 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
 330 _LIBCPP_BEGIN_NAMESPACE_STD
 331 #else
 332 namespace std {
 333 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 334 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 335 #endif
 336 #endif
 337
 338 // Pull the new overloads we defined above into namespace std.
 339 using ::acos;
 340 using ::acosh;
 341 using ::asin;
 342 using ::asinh;
 343 using ::atan;
 344 using ::atan2;
 345 using ::atanh;
 346 using ::cbrt;
 347 using ::ceil;
 348 using ::copysign;
 349 using ::cos;
 350 using ::cosh;
 351 using ::erf;
 352 using ::erfc;
 353 using ::exp;
 354 using ::exp2;
 355 using ::expm1;
 356 using ::fabs;
 357 using ::fdim;
 358 using ::floor;
 359 using ::fma;
 360 using ::fmax;
 361 using ::fmin;
 362 using ::fmod;
 363 using ::fpclassify;
 364 using ::frexp;
 365 using ::hypot;
 366 using ::ilogb;
 367 using ::isfinite;
 368 using ::isgreater;
 369 using ::isgreaterequal;
 370 using ::isless;
 371 using ::islessequal;
 372 using ::islessgreater;
 373 using ::isnormal;
 374 using ::isunordered;
 375 using ::ldexp;
 376 using ::lgamma;
 377 using ::llrint;
 378 using ::llround;
 379 using ::log;
 380 using ::log10;
 381 using ::log1p;
 382 using ::log2;
 383 using ::logb;
 384 using ::lrint;
 385 using ::lround;
 386 using ::nearbyint;
 387 using ::nextafter;
 388 using ::pow;
 389 using ::remainder;
 390 using ::remquo;
 391 using ::rint;
 392 using ::round;
 393 using ::scalbln;
 394 using ::scalbn;
 395 using ::signbit;
 396 using ::sin;
 397 using ::sinh;
 398 using ::sqrt;
 399 using ::tan;
 400 using ::tanh;
 401 using ::tgamma;
 402 using ::trunc;
 403
 404 // Well this is fun: We need to pull these symbols in for libc++, but we can't
 405 // pull them in with libstdc++, because its ::isinf and ::isnan are different
 406 // than its std::isinf and std::isnan.
 407 #ifndef __GLIBCXX__
 408 using ::isinf;
 409 using ::isnan;
 410 #endif
 411
 412 // Finally, pull the "foobarf" functions that CUDA defines in its headers into
 413 // namespace std.
 414 using ::acosf;
 415 using ::acoshf;
 416 using ::asinf;
 417 using ::asinhf;
 418 using ::atan2f;
 419 using ::atanf;
 420 using ::atanhf;
 421 using ::cbrtf;
 422 using ::ceilf;
 423 using ::copysignf;
 424 using ::cosf;
 425 using ::coshf;
 426 using ::erfcf;
 427 using ::erff;
 428 using ::exp2f;
 429 using ::expf;
 430 using ::expm1f;
 431 using ::fabsf;
 432 using ::fdimf;
 433 using ::floorf;
 434 using ::fmaf;
 435 using ::fmaxf;
 436 using ::fminf;
 437 using ::fmodf;
 438 using ::frexpf;
 439 using ::hypotf;
 440 using ::ilogbf;
 441 using ::ldexpf;
 442 using ::lgammaf;
 443 using ::llrintf;
 444 using ::llroundf;
 445 using ::log10f;
 446 using ::log1pf;
 447 using ::log2f;
 448 using ::logbf;
 449 using ::logf;
 450 using ::lrintf;
 451 using ::lroundf;
 452 using ::modff;
 453 using ::nearbyintf;
 454 using ::nextafterf;
 455 using ::powf;
 456 using ::remainderf;
 457 using ::remquof;
 458 using ::rintf;
 459 using ::roundf;
 460 // TODO: remove once variant is supported
 461 #ifndef _OPENMP
 462 using ::scalblnf;
 463 #endif
 464 using ::scalbnf;
 465 using ::sinf;
 466 using ::sinhf;
 467 using ::sqrtf;
 468 using ::tanf;
 469 using ::tanhf;
 470 using ::tgammaf;
 471 using ::truncf;
 472
 473 #ifdef _LIBCPP_END_NAMESPACE_STD
 474 _LIBCPP_END_NAMESPACE_STD
 475 #else
 476 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 477 _GLIBCXX_END_NAMESPACE_VERSION
 478 #endif
 479 } // namespace std
 480 #endif
 481
 482 #undef __NOEXCEPT
 483 #undef __DEVICE__
 484
 485 #endif