contrib/llvm-project/clang/lib/Headers/__clang_cuda_cmath.h

   1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
   2  *
   3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4  * See https://llvm.org/LICENSE.txt for license information.
   5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6  *
   7  *===-----------------------------------------------------------------------===
   8  */
   9 #ifndef __CLANG_CUDA_CMATH_H__
  10 #define __CLANG_CUDA_CMATH_H__
  11 #ifndef __CUDA__
  12 #error "This file is for CUDA compilation only."
  13 #endif
  14
  15 #ifndef __OPENMP_NVPTX__
  16 #include <limits>
  17 #endif
  18
  19 // CUDA lets us use various std math functions on the device side.  This file
  20 // works in concert with __clang_cuda_math_forward_declares.h to make this work.
  21 //
  22 // Specifically, the forward-declares header declares __device__ overloads for
  23 // these functions in the global namespace, then pulls them into namespace std
  24 // with 'using' statements.  Then this file implements those functions, after
  25 // their implementations have been pulled in.
  26 //
  27 // It's important that we declare the functions in the global namespace and pull
  28 // them into namespace std with using statements, as opposed to simply declaring
  29 // these functions in namespace std, because our device functions need to
  30 // overload the standard library functions, which may be declared in the global
  31 // namespace or in std, depending on the degree of conformance of the stdlib
  32 // implementation.  Declaring in the global namespace and pulling into namespace
  33 // std covers all of the known knowns.
  34
  35 #ifdef __OPENMP_NVPTX__
  36 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
  37 #else
  38 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
  39 #endif
  40
  41 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
  42 __DEVICE__ long abs(long __n) { return ::labs(__n); }
  43 __DEVICE__ float abs(float __x) { return ::fabsf(__x); }
  44 __DEVICE__ double abs(double __x) { return ::fabs(__x); }
  45 __DEVICE__ float acos(float __x) { return ::acosf(__x); }
  46 __DEVICE__ float asin(float __x) { return ::asinf(__x); }
  47 __DEVICE__ float atan(float __x) { return ::atanf(__x); }
  48 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
  49 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
  50 __DEVICE__ float cos(float __x) { return ::cosf(__x); }
  51 __DEVICE__ float cosh(float __x) { return ::coshf(__x); }
  52 __DEVICE__ float exp(float __x) { return ::expf(__x); }
  53 __DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
  54 __DEVICE__ float floor(float __x) { return ::floorf(__x); }
  55 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
  56 __DEVICE__ int fpclassify(float __x) {
  57   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
  58                               FP_ZERO, __x);
  59 }
  60 __DEVICE__ int fpclassify(double __x) {
  61   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
  62                               FP_ZERO, __x);
  63 }
  64 __DEVICE__ float frexp(float __arg, int *__exp) {
  65   return ::frexpf(__arg, __exp);
  66 }
  67
  68 // For inscrutable reasons, the CUDA headers define these functions for us on
  69 // Windows. For OpenMP we omit these as some old system headers have
  70 // non-conforming `isinf(float)` and `isnan(float)` implementations that return
  71 // an `int`. The system versions of these functions should be fine anyway.
  72 #if !defined(_MSC_VER) && !defined(__OPENMP_NVPTX__)
  73 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
  74 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
  75 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
  76 // For inscrutable reasons, __finite(), the double-precision version of
  77 // __finitef, does not exist when compiling for MacOS.  __isfinited is available
  78 // everywhere and is just as good.
  79 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
  80 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
  81 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
  82 #endif
  83
  84 __DEVICE__ bool isgreater(float __x, float __y) {
  85   return __builtin_isgreater(__x, __y);
  86 }
  87 __DEVICE__ bool isgreater(double __x, double __y) {
  88   return __builtin_isgreater(__x, __y);
  89 }
  90 __DEVICE__ bool isgreaterequal(float __x, float __y) {
  91   return __builtin_isgreaterequal(__x, __y);
  92 }
  93 __DEVICE__ bool isgreaterequal(double __x, double __y) {
  94   return __builtin_isgreaterequal(__x, __y);
  95 }
  96 __DEVICE__ bool isless(float __x, float __y) {
  97   return __builtin_isless(__x, __y);
  98 }
  99 __DEVICE__ bool isless(double __x, double __y) {
 100   return __builtin_isless(__x, __y);
 101 }
 102 __DEVICE__ bool islessequal(float __x, float __y) {
 103   return __builtin_islessequal(__x, __y);
 104 }
 105 __DEVICE__ bool islessequal(double __x, double __y) {
 106   return __builtin_islessequal(__x, __y);
 107 }
 108 __DEVICE__ bool islessgreater(float __x, float __y) {
 109   return __builtin_islessgreater(__x, __y);
 110 }
 111 __DEVICE__ bool islessgreater(double __x, double __y) {
 112   return __builtin_islessgreater(__x, __y);
 113 }
 114 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
 115 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
 116 __DEVICE__ bool isunordered(float __x, float __y) {
 117   return __builtin_isunordered(__x, __y);
 118 }
 119 __DEVICE__ bool isunordered(double __x, double __y) {
 120   return __builtin_isunordered(__x, __y);
 121 }
 122 __DEVICE__ float ldexp(float __arg, int __exp) {
 123   return ::ldexpf(__arg, __exp);
 124 }
 125 __DEVICE__ float log(float __x) { return ::logf(__x); }
 126 __DEVICE__ float log10(float __x) { return ::log10f(__x); }
 127 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
 128 __DEVICE__ float pow(float __base, float __exp) {
 129   return ::powf(__base, __exp);
 130 }
 131 __DEVICE__ float pow(float __base, int __iexp) {
 132   return ::powif(__base, __iexp);
 133 }
 134 __DEVICE__ double pow(double __base, int __iexp) {
 135   return ::powi(__base, __iexp);
 136 }
 137 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
 138 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
 139 __DEVICE__ float sin(float __x) { return ::sinf(__x); }
 140 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
 141 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
 142 __DEVICE__ float tan(float __x) { return ::tanf(__x); }
 143 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
 144
 145 // Notably missing above is nexttoward.  We omit it because
 146 // libdevice doesn't provide an implementation, and we don't want to be in the
 147 // business of implementing tricky libm functions in this header.
 148
 149 #ifndef __OPENMP_NVPTX__
 150
 151 // Now we've defined everything we promised we'd define in
 152 // __clang_cuda_math_forward_declares.h.  We need to do two additional things to
 153 // fix up our math functions.
 154 //
 155 // 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
 156 //    only sin(float) and sin(double), which means that e.g. sin(0) is
 157 //    ambiguous.
 158 //
 159 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace
 160 //    std.  These are defined in the CUDA headers in the global namespace,
 161 //    independent of everything else we've done here.
 162
 163 // We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
 164 // we go ahead and unconditionally define functions that are only available when
 165 // compiling for C++11 to match the behavior of the CUDA headers.
 166 template<bool __B, class __T = void>
 167 struct __clang_cuda_enable_if {};
 168
 169 template <class __T> struct __clang_cuda_enable_if<true, __T> {
 170   typedef __T type;
 171 };
 172
 173 // Defines an overload of __fn that accepts one integral argument, calls
 174 // __fn((double)x), and returns __retty.
 175 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)                      \
 176   template <typename __T>                                                      \
 177   __DEVICE__                                                                   \
 178       typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,    \
 179                                       __retty>::type                           \
 180       __fn(__T __x) {                                                          \
 181     return ::__fn((double)__x);                                                \
 182   }
 183
 184 // Defines an overload of __fn that accepts one two arithmetic arguments, calls
 185 // __fn((double)x, (double)y), and returns a double.
 186 //
 187 // Note this is different from OVERLOAD_1, which generates an overload that
 188 // accepts only *integral* arguments.
 189 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)                      \
 190   template <typename __T1, typename __T2>                                      \
 191   __DEVICE__ typename __clang_cuda_enable_if<                                  \
 192       std::numeric_limits<__T1>::is_specialized &&                             \
 193           std::numeric_limits<__T2>::is_specialized,                           \
 194       __retty>::type                                                           \
 195   __fn(__T1 __x, __T2 __y) {                                                   \
 196     return __fn((double)__x, (double)__y);                                     \
 197   }
 198
 199 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
 200 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
 201 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
 202 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
 203 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
 204 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
 205 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
 206 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
 207 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
 208 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
 209 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
 210 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
 211 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
 212 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
 213 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
 214 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
 215 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
 216 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
 217 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
 218 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
 219 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
 220 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
 221 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
 222 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
 223 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
 224 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
 225 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
 226 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
 227 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
 228 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
 229 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
 230 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
 231 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
 232 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
 233 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
 234 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
 235 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
 236 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
 237 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
 238 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
 239 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
 240 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
 241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
 242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
 243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
 244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
 245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
 246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
 247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
 248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
 249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
 250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
 251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
 252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
 253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
 254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
 255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
 256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
 257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
 258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
 259
 260 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
 261 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
 262
 263 // Overloads for functions that don't match the patterns expected by
 264 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
 265 template <typename __T1, typename __T2, typename __T3>
 266 __DEVICE__ typename __clang_cuda_enable_if<
 267     std::numeric_limits<__T1>::is_specialized &&
 268         std::numeric_limits<__T2>::is_specialized &&
 269         std::numeric_limits<__T3>::is_specialized,
 270     double>::type
 271 fma(__T1 __x, __T2 __y, __T3 __z) {
 272   return std::fma((double)__x, (double)__y, (double)__z);
 273 }
 274
 275 template <typename __T>
 276 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 277                                            double>::type
 278 frexp(__T __x, int *__exp) {
 279   return std::frexp((double)__x, __exp);
 280 }
 281
 282 template <typename __T>
 283 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 284                                            double>::type
 285 ldexp(__T __x, int __exp) {
 286   return std::ldexp((double)__x, __exp);
 287 }
 288
 289 template <typename __T1, typename __T2>
 290 __DEVICE__ typename __clang_cuda_enable_if<
 291     std::numeric_limits<__T1>::is_specialized &&
 292         std::numeric_limits<__T2>::is_specialized,
 293     double>::type
 294 remquo(__T1 __x, __T2 __y, int *__quo) {
 295   return std::remquo((double)__x, (double)__y, __quo);
 296 }
 297
 298 template <typename __T>
 299 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 300                                            double>::type
 301 scalbln(__T __x, long __exp) {
 302   return std::scalbln((double)__x, __exp);
 303 }
 304
 305 template <typename __T>
 306 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
 307                                            double>::type
 308 scalbn(__T __x, int __exp) {
 309   return std::scalbn((double)__x, __exp);
 310 }
 311
 312 // We need to define these overloads in exactly the namespace our standard
 313 // library uses (including the right inline namespace), otherwise they won't be
 314 // picked up by other functions in the standard library (e.g. functions in
 315 // <complex>).  Thus the ugliness below.
 316 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
 317 _LIBCPP_BEGIN_NAMESPACE_STD
 318 #else
 319 namespace std {
 320 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 321 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 322 #endif
 323 #endif
 324
 325 // Pull the new overloads we defined above into namespace std.
 326 using ::acos;
 327 using ::acosh;
 328 using ::asin;
 329 using ::asinh;
 330 using ::atan;
 331 using ::atan2;
 332 using ::atanh;
 333 using ::cbrt;
 334 using ::ceil;
 335 using ::copysign;
 336 using ::cos;
 337 using ::cosh;
 338 using ::erf;
 339 using ::erfc;
 340 using ::exp;
 341 using ::exp2;
 342 using ::expm1;
 343 using ::fabs;
 344 using ::fdim;
 345 using ::floor;
 346 using ::fma;
 347 using ::fmax;
 348 using ::fmin;
 349 using ::fmod;
 350 using ::fpclassify;
 351 using ::frexp;
 352 using ::hypot;
 353 using ::ilogb;
 354 using ::isfinite;
 355 using ::isgreater;
 356 using ::isgreaterequal;
 357 using ::isless;
 358 using ::islessequal;
 359 using ::islessgreater;
 360 using ::isnormal;
 361 using ::isunordered;
 362 using ::ldexp;
 363 using ::lgamma;
 364 using ::llrint;
 365 using ::llround;
 366 using ::log;
 367 using ::log10;
 368 using ::log1p;
 369 using ::log2;
 370 using ::logb;
 371 using ::lrint;
 372 using ::lround;
 373 using ::nearbyint;
 374 using ::nextafter;
 375 using ::pow;
 376 using ::remainder;
 377 using ::remquo;
 378 using ::rint;
 379 using ::round;
 380 using ::scalbln;
 381 using ::scalbn;
 382 using ::signbit;
 383 using ::sin;
 384 using ::sinh;
 385 using ::sqrt;
 386 using ::tan;
 387 using ::tanh;
 388 using ::tgamma;
 389 using ::trunc;
 390
 391 // Well this is fun: We need to pull these symbols in for libc++, but we can't
 392 // pull them in with libstdc++, because its ::isinf and ::isnan are different
 393 // than its std::isinf and std::isnan.
 394 #ifndef __GLIBCXX__
 395 using ::isinf;
 396 using ::isnan;
 397 #endif
 398
 399 // Finally, pull the "foobarf" functions that CUDA defines in its headers into
 400 // namespace std.
 401 using ::acosf;
 402 using ::acoshf;
 403 using ::asinf;
 404 using ::asinhf;
 405 using ::atan2f;
 406 using ::atanf;
 407 using ::atanhf;
 408 using ::cbrtf;
 409 using ::ceilf;
 410 using ::copysignf;
 411 using ::cosf;
 412 using ::coshf;
 413 using ::erfcf;
 414 using ::erff;
 415 using ::exp2f;
 416 using ::expf;
 417 using ::expm1f;
 418 using ::fabsf;
 419 using ::fdimf;
 420 using ::floorf;
 421 using ::fmaf;
 422 using ::fmaxf;
 423 using ::fminf;
 424 using ::fmodf;
 425 using ::frexpf;
 426 using ::hypotf;
 427 using ::ilogbf;
 428 using ::ldexpf;
 429 using ::lgammaf;
 430 using ::llrintf;
 431 using ::llroundf;
 432 using ::log10f;
 433 using ::log1pf;
 434 using ::log2f;
 435 using ::logbf;
 436 using ::logf;
 437 using ::lrintf;
 438 using ::lroundf;
 439 using ::modff;
 440 using ::nearbyintf;
 441 using ::nextafterf;
 442 using ::powf;
 443 using ::remainderf;
 444 using ::remquof;
 445 using ::rintf;
 446 using ::roundf;
 447 using ::scalblnf;
 448 using ::scalbnf;
 449 using ::sinf;
 450 using ::sinhf;
 451 using ::sqrtf;
 452 using ::tanf;
 453 using ::tanhf;
 454 using ::tgammaf;
 455 using ::truncf;
 456
 457 #ifdef _LIBCPP_END_NAMESPACE_STD
 458 _LIBCPP_END_NAMESPACE_STD
 459 #else
 460 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 461 _GLIBCXX_END_NAMESPACE_VERSION
 462 #endif
 463 } // namespace std
 464 #endif
 465
 466 #endif // __OPENMP_NVPTX__
 467
 468 #undef __DEVICE__
 469
 470 #endif