1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 #ifndef __CLANG_CUDA_CMATH_H__ 24 #define __CLANG_CUDA_CMATH_H__ 25 #ifndef __CUDA__ 26 #error "This file is for CUDA compilation only." 27 #endif 28 29 #include <limits> 30 31 // CUDA lets us use various std math functions on the device side. This file 32 // works in concert with __clang_cuda_math_forward_declares.h to make this work. 33 // 34 // Specifically, the forward-declares header declares __device__ overloads for 35 // these functions in the global namespace, then pulls them into namespace std 36 // with 'using' statements. Then this file implements those functions, after 37 // their implementations have been pulled in. 38 // 39 // It's important that we declare the functions in the global namespace and pull 40 // them into namespace std with using statements, as opposed to simply declaring 41 // these functions in namespace std, because our device functions need to 42 // overload the standard library functions, which may be declared in the global 43 // namespace or in std, depending on the degree of conformance of the stdlib 44 // implementation. Declaring in the global namespace and pulling into namespace 45 // std covers all of the known knowns. 46 47 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) 48 49 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); } 50 __DEVICE__ long abs(long __n) { return ::labs(__n); } 51 __DEVICE__ float abs(float __x) { return ::fabsf(__x); } 52 __DEVICE__ double abs(double __x) { return ::fabs(__x); } 53 __DEVICE__ float acos(float __x) { return ::acosf(__x); } 54 __DEVICE__ float asin(float __x) { return ::asinf(__x); } 55 __DEVICE__ float atan(float __x) { return ::atanf(__x); } 56 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } 57 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); } 58 __DEVICE__ float cos(float __x) { return ::cosf(__x); } 59 __DEVICE__ float cosh(float __x) { return ::coshf(__x); } 60 __DEVICE__ float exp(float __x) { return ::expf(__x); } 61 __DEVICE__ float fabs(float __x) { return ::fabsf(__x); } 62 __DEVICE__ float floor(float __x) { return ::floorf(__x); } 63 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } 64 __DEVICE__ int fpclassify(float __x) { 65 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 66 FP_ZERO, __x); 67 } 68 __DEVICE__ int fpclassify(double __x) { 69 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 70 FP_ZERO, __x); 71 } 72 __DEVICE__ float frexp(float __arg, int *__exp) { 73 return ::frexpf(__arg, __exp); 74 } 75 76 // For inscrutable reasons, the CUDA headers define these functions for us on 77 // Windows. 78 #ifndef _MSC_VER 79 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } 80 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } 81 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } 82 // For inscrutable reasons, __finite(), the double-precision version of 83 // __finitef, does not exist when compiling for MacOS. __isfinited is available 84 // everywhere and is just as good. 85 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } 86 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } 87 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } 88 #endif 89 90 __DEVICE__ bool isgreater(float __x, float __y) { 91 return __builtin_isgreater(__x, __y); 92 } 93 __DEVICE__ bool isgreater(double __x, double __y) { 94 return __builtin_isgreater(__x, __y); 95 } 96 __DEVICE__ bool isgreaterequal(float __x, float __y) { 97 return __builtin_isgreaterequal(__x, __y); 98 } 99 __DEVICE__ bool isgreaterequal(double __x, double __y) { 100 return __builtin_isgreaterequal(__x, __y); 101 } 102 __DEVICE__ bool isless(float __x, float __y) { 103 return __builtin_isless(__x, __y); 104 } 105 __DEVICE__ bool isless(double __x, double __y) { 106 return __builtin_isless(__x, __y); 107 } 108 __DEVICE__ bool islessequal(float __x, float __y) { 109 return __builtin_islessequal(__x, __y); 110 } 111 __DEVICE__ bool islessequal(double __x, double __y) { 112 return __builtin_islessequal(__x, __y); 113 } 114 __DEVICE__ bool islessgreater(float __x, float __y) { 115 return __builtin_islessgreater(__x, __y); 116 } 117 __DEVICE__ bool islessgreater(double __x, double __y) { 118 return __builtin_islessgreater(__x, __y); 119 } 120 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } 121 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } 122 __DEVICE__ bool isunordered(float __x, float __y) { 123 return __builtin_isunordered(__x, __y); 124 } 125 __DEVICE__ bool isunordered(double __x, double __y) { 126 return __builtin_isunordered(__x, __y); 127 } 128 __DEVICE__ float ldexp(float __arg, int __exp) { 129 return ::ldexpf(__arg, __exp); 130 } 131 __DEVICE__ float log(float __x) { return ::logf(__x); } 132 __DEVICE__ float log10(float __x) { return ::log10f(__x); } 133 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } 134 __DEVICE__ float nexttoward(float __from, double __to) { 135 return __builtin_nexttowardf(__from, __to); 136 } 137 __DEVICE__ double nexttoward(double __from, double __to) { 138 return __builtin_nexttoward(__from, __to); 139 } 140 __DEVICE__ float nexttowardf(float __from, double __to) { 141 return __builtin_nexttowardf(__from, __to); 142 } 143 __DEVICE__ float pow(float __base, float __exp) { 144 return ::powf(__base, __exp); 145 } 146 __DEVICE__ float pow(float __base, int __iexp) { 147 return ::powif(__base, __iexp); 148 } 149 __DEVICE__ double pow(double __base, int __iexp) { 150 return ::powi(__base, __iexp); 151 } 152 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } 153 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); } 154 __DEVICE__ float sin(float __x) { return ::sinf(__x); } 155 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); } 156 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } 157 __DEVICE__ float tan(float __x) { return ::tanf(__x); } 158 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); } 159 160 // Now we've defined everything we promised we'd define in 161 // __clang_cuda_math_forward_declares.h. We need to do two additional things to 162 // fix up our math functions. 163 // 164 // 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define 165 // only sin(float) and sin(double), which means that e.g. sin(0) is 166 // ambiguous. 167 // 168 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace 169 // std. These are defined in the CUDA headers in the global namespace, 170 // independent of everything else we've done here. 171 172 // We can't use std::enable_if, because we want to be pre-C++11 compatible. But 173 // we go ahead and unconditionally define functions that are only available when 174 // compiling for C++11 to match the behavior of the CUDA headers. 175 template<bool __B, class __T = void> 176 struct __clang_cuda_enable_if {}; 177 178 template <class __T> struct __clang_cuda_enable_if<true, __T> { 179 typedef __T type; 180 }; 181 182 // Defines an overload of __fn that accepts one integral argument, calls 183 // __fn((double)x), and returns __retty. 184 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \ 185 template <typename __T> \ 186 __DEVICE__ \ 187 typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \ 188 __retty>::type \ 189 __fn(__T __x) { \ 190 return ::__fn((double)__x); \ 191 } 192 193 // Defines an overload of __fn that accepts one two arithmetic arguments, calls 194 // __fn((double)x, (double)y), and returns a double. 195 // 196 // Note this is different from OVERLOAD_1, which generates an overload that 197 // accepts only *integral* arguments. 198 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ 199 template <typename __T1, typename __T2> \ 200 __DEVICE__ typename __clang_cuda_enable_if< \ 201 std::numeric_limits<__T1>::is_specialized && \ 202 std::numeric_limits<__T2>::is_specialized, \ 203 __retty>::type \ 204 __fn(__T1 __x, __T2 __y) { \ 205 return __fn((double)__x, (double)__y); \ 206 } 207 208 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos) 209 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh) 210 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin) 211 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh) 212 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan) 213 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2); 214 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh) 215 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt) 216 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil) 217 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign); 218 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos) 219 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh) 220 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf) 221 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc) 222 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp) 223 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2) 224 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1) 225 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs) 226 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim); 227 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor) 228 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax); 229 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin); 230 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod); 231 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify) 232 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot); 233 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb) 234 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite) 235 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater); 236 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal); 237 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf); 238 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless); 239 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal); 240 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater); 241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan); 242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal) 243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered); 244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma) 245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log) 246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10) 247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p) 248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2) 249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb) 250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint) 251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround) 252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint) 253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround) 254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint); 255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter); 256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow); 257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder); 258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint); 259 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round); 260 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit) 261 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin) 262 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh) 263 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt) 264 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan) 265 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh) 266 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma) 267 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc); 268 269 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1 270 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2 271 272 // Overloads for functions that don't match the patterns expected by 273 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}. 274 template <typename __T1, typename __T2, typename __T3> 275 __DEVICE__ typename __clang_cuda_enable_if< 276 std::numeric_limits<__T1>::is_specialized && 277 std::numeric_limits<__T2>::is_specialized && 278 std::numeric_limits<__T3>::is_specialized, 279 double>::type 280 fma(__T1 __x, __T2 __y, __T3 __z) { 281 return std::fma((double)__x, (double)__y, (double)__z); 282 } 283 284 template <typename __T> 285 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 286 double>::type 287 frexp(__T __x, int *__exp) { 288 return std::frexp((double)__x, __exp); 289 } 290 291 template <typename __T> 292 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 293 double>::type 294 ldexp(__T __x, int __exp) { 295 return std::ldexp((double)__x, __exp); 296 } 297 298 template <typename __T> 299 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 300 double>::type 301 nexttoward(__T __from, double __to) { 302 return std::nexttoward((double)__from, __to); 303 } 304 305 template <typename __T1, typename __T2> 306 __DEVICE__ typename __clang_cuda_enable_if< 307 std::numeric_limits<__T1>::is_specialized && 308 std::numeric_limits<__T2>::is_specialized, 309 double>::type 310 remquo(__T1 __x, __T2 __y, int *__quo) { 311 return std::remquo((double)__x, (double)__y, __quo); 312 } 313 314 template <typename __T> 315 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 316 double>::type 317 scalbln(__T __x, long __exp) { 318 return std::scalbln((double)__x, __exp); 319 } 320 321 template <typename __T> 322 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 323 double>::type 324 scalbn(__T __x, int __exp) { 325 return std::scalbn((double)__x, __exp); 326 } 327 328 // We need to define these overloads in exactly the namespace our standard 329 // library uses (including the right inline namespace), otherwise they won't be 330 // picked up by other functions in the standard library (e.g. functions in 331 // <complex>). Thus the ugliness below. 332 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD 333 _LIBCPP_BEGIN_NAMESPACE_STD 334 #else 335 namespace std { 336 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 337 _GLIBCXX_BEGIN_NAMESPACE_VERSION 338 #endif 339 #endif 340 341 // Pull the new overloads we defined above into namespace std. 342 using ::acos; 343 using ::acosh; 344 using ::asin; 345 using ::asinh; 346 using ::atan; 347 using ::atan2; 348 using ::atanh; 349 using ::cbrt; 350 using ::ceil; 351 using ::copysign; 352 using ::cos; 353 using ::cosh; 354 using ::erf; 355 using ::erfc; 356 using ::exp; 357 using ::exp2; 358 using ::expm1; 359 using ::fabs; 360 using ::fdim; 361 using ::floor; 362 using ::fma; 363 using ::fmax; 364 using ::fmin; 365 using ::fmod; 366 using ::fpclassify; 367 using ::frexp; 368 using ::hypot; 369 using ::ilogb; 370 using ::isfinite; 371 using ::isgreater; 372 using ::isgreaterequal; 373 using ::isless; 374 using ::islessequal; 375 using ::islessgreater; 376 using ::isnormal; 377 using ::isunordered; 378 using ::ldexp; 379 using ::lgamma; 380 using ::llrint; 381 using ::llround; 382 using ::log; 383 using ::log10; 384 using ::log1p; 385 using ::log2; 386 using ::logb; 387 using ::lrint; 388 using ::lround; 389 using ::nearbyint; 390 using ::nextafter; 391 using ::nexttoward; 392 using ::pow; 393 using ::remainder; 394 using ::remquo; 395 using ::rint; 396 using ::round; 397 using ::scalbln; 398 using ::scalbn; 399 using ::signbit; 400 using ::sin; 401 using ::sinh; 402 using ::sqrt; 403 using ::tan; 404 using ::tanh; 405 using ::tgamma; 406 using ::trunc; 407 408 // Well this is fun: We need to pull these symbols in for libc++, but we can't 409 // pull them in with libstdc++, because its ::isinf and ::isnan are different 410 // than its std::isinf and std::isnan. 411 #ifndef __GLIBCXX__ 412 using ::isinf; 413 using ::isnan; 414 #endif 415 416 // Finally, pull the "foobarf" functions that CUDA defines in its headers into 417 // namespace std. 418 using ::acosf; 419 using ::acoshf; 420 using ::asinf; 421 using ::asinhf; 422 using ::atan2f; 423 using ::atanf; 424 using ::atanhf; 425 using ::cbrtf; 426 using ::ceilf; 427 using ::copysignf; 428 using ::cosf; 429 using ::coshf; 430 using ::erfcf; 431 using ::erff; 432 using ::exp2f; 433 using ::expf; 434 using ::expm1f; 435 using ::fabsf; 436 using ::fdimf; 437 using ::floorf; 438 using ::fmaf; 439 using ::fmaxf; 440 using ::fminf; 441 using ::fmodf; 442 using ::frexpf; 443 using ::hypotf; 444 using ::ilogbf; 445 using ::ldexpf; 446 using ::lgammaf; 447 using ::llrintf; 448 using ::llroundf; 449 using ::log10f; 450 using ::log1pf; 451 using ::log2f; 452 using ::logbf; 453 using ::logf; 454 using ::lrintf; 455 using ::lroundf; 456 using ::modff; 457 using ::nearbyintf; 458 using ::nextafterf; 459 using ::nexttowardf; 460 using ::nexttowardf; 461 using ::powf; 462 using ::remainderf; 463 using ::remquof; 464 using ::rintf; 465 using ::roundf; 466 using ::scalblnf; 467 using ::scalbnf; 468 using ::sinf; 469 using ::sinhf; 470 using ::sqrtf; 471 using ::tanf; 472 using ::tanhf; 473 using ::tgammaf; 474 using ::truncf; 475 476 #ifdef _LIBCPP_END_NAMESPACE_STD 477 _LIBCPP_END_NAMESPACE_STD 478 #else 479 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 480 _GLIBCXX_END_NAMESPACE_VERSION 481 #endif 482 } // namespace std 483 #endif 484 485 #undef __DEVICE__ 486 487 #endif 488