1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 *===-----------------------------------------------------------------------===
23 #ifndef __CLANG_CUDA_CMATH_H__
24 #define __CLANG_CUDA_CMATH_H__
26 #error "This file is for CUDA compilation only."
31 // CUDA lets us use various std math functions on the device side. This file
32 // works in concert with __clang_cuda_math_forward_declares.h to make this work.
34 // Specifically, the forward-declares header declares __device__ overloads for
35 // these functions in the global namespace, then pulls them into namespace std
36 // with 'using' statements. Then this file implements those functions, after
37 // their implementations have been pulled in.
39 // It's important that we declare the functions in the global namespace and pull
40 // them into namespace std with using statements, as opposed to simply declaring
41 // these functions in namespace std, because our device functions need to
42 // overload the standard library functions, which may be declared in the global
43 // namespace or in std, depending on the degree of conformance of the stdlib
44 // implementation. Declaring in the global namespace and pulling into namespace
45 // std covers all of the known knowns.
47 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
49 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
50 __DEVICE__ long abs(long __n) { return ::labs(__n); }
51 __DEVICE__ float abs(float __x) { return ::fabsf(__x); }
52 __DEVICE__ double abs(double __x) { return ::fabs(__x); }
53 __DEVICE__ float acos(float __x) { return ::acosf(__x); }
54 __DEVICE__ float asin(float __x) { return ::asinf(__x); }
55 __DEVICE__ float atan(float __x) { return ::atanf(__x); }
56 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
57 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
58 __DEVICE__ float cos(float __x) { return ::cosf(__x); }
59 __DEVICE__ float cosh(float __x) { return ::coshf(__x); }
60 __DEVICE__ float exp(float __x) { return ::expf(__x); }
61 __DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
62 __DEVICE__ float floor(float __x) { return ::floorf(__x); }
63 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
64 __DEVICE__ int fpclassify(float __x) {
65 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
68 __DEVICE__ int fpclassify(double __x) {
69 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
72 __DEVICE__ float frexp(float __arg, int *__exp) {
73 return ::frexpf(__arg, __exp);
76 // For inscrutable reasons, the CUDA headers define these functions for us on
79 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
80 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
81 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
82 // For inscrutable reasons, __finite(), the double-precision version of
83 // __finitef, does not exist when compiling for MacOS. __isfinited is available
84 // everywhere and is just as good.
85 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
86 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
87 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
90 __DEVICE__ bool isgreater(float __x, float __y) {
91 return __builtin_isgreater(__x, __y);
93 __DEVICE__ bool isgreater(double __x, double __y) {
94 return __builtin_isgreater(__x, __y);
96 __DEVICE__ bool isgreaterequal(float __x, float __y) {
97 return __builtin_isgreaterequal(__x, __y);
99 __DEVICE__ bool isgreaterequal(double __x, double __y) {
100 return __builtin_isgreaterequal(__x, __y);
102 __DEVICE__ bool isless(float __x, float __y) {
103 return __builtin_isless(__x, __y);
105 __DEVICE__ bool isless(double __x, double __y) {
106 return __builtin_isless(__x, __y);
108 __DEVICE__ bool islessequal(float __x, float __y) {
109 return __builtin_islessequal(__x, __y);
111 __DEVICE__ bool islessequal(double __x, double __y) {
112 return __builtin_islessequal(__x, __y);
114 __DEVICE__ bool islessgreater(float __x, float __y) {
115 return __builtin_islessgreater(__x, __y);
117 __DEVICE__ bool islessgreater(double __x, double __y) {
118 return __builtin_islessgreater(__x, __y);
120 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
121 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
122 __DEVICE__ bool isunordered(float __x, float __y) {
123 return __builtin_isunordered(__x, __y);
125 __DEVICE__ bool isunordered(double __x, double __y) {
126 return __builtin_isunordered(__x, __y);
128 __DEVICE__ float ldexp(float __arg, int __exp) {
129 return ::ldexpf(__arg, __exp);
131 __DEVICE__ float log(float __x) { return ::logf(__x); }
132 __DEVICE__ float log10(float __x) { return ::log10f(__x); }
133 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
134 __DEVICE__ float nexttoward(float __from, double __to) {
135 return __builtin_nexttowardf(__from, __to);
137 __DEVICE__ double nexttoward(double __from, double __to) {
138 return __builtin_nexttoward(__from, __to);
140 __DEVICE__ float nexttowardf(float __from, double __to) {
141 return __builtin_nexttowardf(__from, __to);
143 __DEVICE__ float pow(float __base, float __exp) {
144 return ::powf(__base, __exp);
146 __DEVICE__ float pow(float __base, int __iexp) {
147 return ::powif(__base, __iexp);
149 __DEVICE__ double pow(double __base, int __iexp) {
150 return ::powi(__base, __iexp);
152 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
153 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
154 __DEVICE__ float sin(float __x) { return ::sinf(__x); }
155 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
156 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
157 __DEVICE__ float tan(float __x) { return ::tanf(__x); }
158 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
160 // Now we've defined everything we promised we'd define in
161 // __clang_cuda_math_forward_declares.h. We need to do two additional things to
162 // fix up our math functions.
164 // 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define
165 // only sin(float) and sin(double), which means that e.g. sin(0) is
168 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace
169 // std. These are defined in the CUDA headers in the global namespace,
170 // independent of everything else we've done here.
172 // We can't use std::enable_if, because we want to be pre-C++11 compatible. But
173 // we go ahead and unconditionally define functions that are only available when
174 // compiling for C++11 to match the behavior of the CUDA headers.
175 template<bool __B, class __T = void>
176 struct __clang_cuda_enable_if {};
178 template <class __T> struct __clang_cuda_enable_if<true, __T> {
182 // Defines an overload of __fn that accepts one integral argument, calls
183 // __fn((double)x), and returns __retty.
184 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \
185 template <typename __T> \
187 typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \
190 return ::__fn((double)__x); \
193 // Defines an overload of __fn that accepts one two arithmetic arguments, calls
194 // __fn((double)x, (double)y), and returns a double.
196 // Note this is different from OVERLOAD_1, which generates an overload that
197 // accepts only *integral* arguments.
198 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \
199 template <typename __T1, typename __T2> \
200 __DEVICE__ typename __clang_cuda_enable_if< \
201 std::numeric_limits<__T1>::is_specialized && \
202 std::numeric_limits<__T2>::is_specialized, \
204 __fn(__T1 __x, __T2 __y) { \
205 return __fn((double)__x, (double)__y); \
208 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
209 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
210 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
211 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
212 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
213 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
214 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
215 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
216 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
217 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
218 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
219 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
220 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
221 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
222 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
223 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
224 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
225 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
226 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
227 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
228 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
229 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
230 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
231 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
232 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
233 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
234 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
235 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
236 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
237 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
238 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
239 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
240 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
259 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
260 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
261 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
262 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
263 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
264 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
265 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
266 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
267 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
269 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
270 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
272 // Overloads for functions that don't match the patterns expected by
273 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
274 template <typename __T1, typename __T2, typename __T3>
275 __DEVICE__ typename __clang_cuda_enable_if<
276 std::numeric_limits<__T1>::is_specialized &&
277 std::numeric_limits<__T2>::is_specialized &&
278 std::numeric_limits<__T3>::is_specialized,
280 fma(__T1 __x, __T2 __y, __T3 __z) {
281 return std::fma((double)__x, (double)__y, (double)__z);
284 template <typename __T>
285 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
287 frexp(__T __x, int *__exp) {
288 return std::frexp((double)__x, __exp);
291 template <typename __T>
292 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
294 ldexp(__T __x, int __exp) {
295 return std::ldexp((double)__x, __exp);
298 template <typename __T>
299 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
301 nexttoward(__T __from, double __to) {
302 return std::nexttoward((double)__from, __to);
305 template <typename __T1, typename __T2>
306 __DEVICE__ typename __clang_cuda_enable_if<
307 std::numeric_limits<__T1>::is_specialized &&
308 std::numeric_limits<__T2>::is_specialized,
310 remquo(__T1 __x, __T2 __y, int *__quo) {
311 return std::remquo((double)__x, (double)__y, __quo);
314 template <typename __T>
315 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
317 scalbln(__T __x, long __exp) {
318 return std::scalbln((double)__x, __exp);
321 template <typename __T>
322 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
324 scalbn(__T __x, int __exp) {
325 return std::scalbn((double)__x, __exp);
328 // We need to define these overloads in exactly the namespace our standard
329 // library uses (including the right inline namespace), otherwise they won't be
330 // picked up by other functions in the standard library (e.g. functions in
331 // <complex>). Thus the ugliness below.
332 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
333 _LIBCPP_BEGIN_NAMESPACE_STD
336 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
337 _GLIBCXX_BEGIN_NAMESPACE_VERSION
341 // Pull the new overloads we defined above into namespace std.
372 using ::isgreaterequal;
375 using ::islessgreater;
408 // Well this is fun: We need to pull these symbols in for libc++, but we can't
409 // pull them in with libstdc++, because its ::isinf and ::isnan are different
410 // than its std::isinf and std::isnan.
416 // Finally, pull the "foobarf" functions that CUDA defines in its headers into
476 #ifdef _LIBCPP_END_NAMESPACE_STD
477 _LIBCPP_END_NAMESPACE_STD
479 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
480 _GLIBCXX_END_NAMESPACE_VERSION