Home | History | Annotate | Download | only in include
      1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 #ifndef __CLANG_CUDA_CMATH_H__
     24 #define __CLANG_CUDA_CMATH_H__
     25 #ifndef __CUDA__
     26 #error "This file is for CUDA compilation only."
     27 #endif
     28 
     29 #include <limits>
     30 
     31 // CUDA lets us use various std math functions on the device side.  This file
     32 // works in concert with __clang_cuda_math_forward_declares.h to make this work.
     33 //
     34 // Specifically, the forward-declares header declares __device__ overloads for
     35 // these functions in the global namespace, then pulls them into namespace std
     36 // with 'using' statements.  Then this file implements those functions, after
     37 // their implementations have been pulled in.
     38 //
     39 // It's important that we declare the functions in the global namespace and pull
     40 // them into namespace std with using statements, as opposed to simply declaring
     41 // these functions in namespace std, because our device functions need to
     42 // overload the standard library functions, which may be declared in the global
     43 // namespace or in std, depending on the degree of conformance of the stdlib
     44 // implementation.  Declaring in the global namespace and pulling into namespace
     45 // std covers all of the known knowns.
     46 
     47 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
     48 
     49 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
     50 __DEVICE__ long abs(long __n) { return ::labs(__n); }
     51 __DEVICE__ float abs(float __x) { return ::fabsf(__x); }
     52 __DEVICE__ double abs(double __x) { return ::fabs(__x); }
     53 __DEVICE__ float acos(float __x) { return ::acosf(__x); }
     54 __DEVICE__ float asin(float __x) { return ::asinf(__x); }
     55 __DEVICE__ float atan(float __x) { return ::atanf(__x); }
     56 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
     57 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
     58 __DEVICE__ float cos(float __x) { return ::cosf(__x); }
     59 __DEVICE__ float cosh(float __x) { return ::coshf(__x); }
     60 __DEVICE__ float exp(float __x) { return ::expf(__x); }
     61 __DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
     62 __DEVICE__ float floor(float __x) { return ::floorf(__x); }
     63 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
     64 __DEVICE__ int fpclassify(float __x) {
     65   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
     66                               FP_ZERO, __x);
     67 }
     68 __DEVICE__ int fpclassify(double __x) {
     69   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
     70                               FP_ZERO, __x);
     71 }
     72 __DEVICE__ float frexp(float __arg, int *__exp) {
     73   return ::frexpf(__arg, __exp);
     74 }
     75 
     76 // For inscrutable reasons, the CUDA headers define these functions for us on
     77 // Windows.
     78 #ifndef _MSC_VER
     79 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
     80 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
     81 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
     82 // For inscrutable reasons, __finite(), the double-precision version of
     83 // __finitef, does not exist when compiling for MacOS.  __isfinited is available
     84 // everywhere and is just as good.
     85 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
     86 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
     87 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
     88 #endif
     89 
     90 __DEVICE__ bool isgreater(float __x, float __y) {
     91   return __builtin_isgreater(__x, __y);
     92 }
     93 __DEVICE__ bool isgreater(double __x, double __y) {
     94   return __builtin_isgreater(__x, __y);
     95 }
     96 __DEVICE__ bool isgreaterequal(float __x, float __y) {
     97   return __builtin_isgreaterequal(__x, __y);
     98 }
     99 __DEVICE__ bool isgreaterequal(double __x, double __y) {
    100   return __builtin_isgreaterequal(__x, __y);
    101 }
    102 __DEVICE__ bool isless(float __x, float __y) {
    103   return __builtin_isless(__x, __y);
    104 }
    105 __DEVICE__ bool isless(double __x, double __y) {
    106   return __builtin_isless(__x, __y);
    107 }
    108 __DEVICE__ bool islessequal(float __x, float __y) {
    109   return __builtin_islessequal(__x, __y);
    110 }
    111 __DEVICE__ bool islessequal(double __x, double __y) {
    112   return __builtin_islessequal(__x, __y);
    113 }
    114 __DEVICE__ bool islessgreater(float __x, float __y) {
    115   return __builtin_islessgreater(__x, __y);
    116 }
    117 __DEVICE__ bool islessgreater(double __x, double __y) {
    118   return __builtin_islessgreater(__x, __y);
    119 }
    120 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
    121 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
    122 __DEVICE__ bool isunordered(float __x, float __y) {
    123   return __builtin_isunordered(__x, __y);
    124 }
    125 __DEVICE__ bool isunordered(double __x, double __y) {
    126   return __builtin_isunordered(__x, __y);
    127 }
    128 __DEVICE__ float ldexp(float __arg, int __exp) {
    129   return ::ldexpf(__arg, __exp);
    130 }
    131 __DEVICE__ float log(float __x) { return ::logf(__x); }
    132 __DEVICE__ float log10(float __x) { return ::log10f(__x); }
    133 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
    134 __DEVICE__ float nexttoward(float __from, double __to) {
    135   return __builtin_nexttowardf(__from, __to);
    136 }
    137 __DEVICE__ double nexttoward(double __from, double __to) {
    138   return __builtin_nexttoward(__from, __to);
    139 }
    140 __DEVICE__ float nexttowardf(float __from, double __to) {
    141   return __builtin_nexttowardf(__from, __to);
    142 }
    143 __DEVICE__ float pow(float __base, float __exp) {
    144   return ::powf(__base, __exp);
    145 }
    146 __DEVICE__ float pow(float __base, int __iexp) {
    147   return ::powif(__base, __iexp);
    148 }
    149 __DEVICE__ double pow(double __base, int __iexp) {
    150   return ::powi(__base, __iexp);
    151 }
    152 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
    153 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
    154 __DEVICE__ float sin(float __x) { return ::sinf(__x); }
    155 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
    156 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
    157 __DEVICE__ float tan(float __x) { return ::tanf(__x); }
    158 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
    159 
    160 // Now we've defined everything we promised we'd define in
    161 // __clang_cuda_math_forward_declares.h.  We need to do two additional things to
    162 // fix up our math functions.
    163 //
    164 // 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
    165 //    only sin(float) and sin(double), which means that e.g. sin(0) is
    166 //    ambiguous.
    167 //
    168 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace
    169 //    std.  These are defined in the CUDA headers in the global namespace,
    170 //    independent of everything else we've done here.
    171 
    172 // We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
    173 // we go ahead and unconditionally define functions that are only available when
    174 // compiling for C++11 to match the behavior of the CUDA headers.
    175 template<bool __B, class __T = void>
    176 struct __clang_cuda_enable_if {};
    177 
    178 template <class __T> struct __clang_cuda_enable_if<true, __T> {
    179   typedef __T type;
    180 };
    181 
    182 // Defines an overload of __fn that accepts one integral argument, calls
    183 // __fn((double)x), and returns __retty.
    184 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)                      \
    185   template <typename __T>                                                      \
    186   __DEVICE__                                                                   \
    187       typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,    \
    188                                       __retty>::type                           \
    189       __fn(__T __x) {                                                          \
    190     return ::__fn((double)__x);                                                \
    191   }
    192 
    193 // Defines an overload of __fn that accepts one two arithmetic arguments, calls
    194 // __fn((double)x, (double)y), and returns a double.
    195 //
    196 // Note this is different from OVERLOAD_1, which generates an overload that
    197 // accepts only *integral* arguments.
    198 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)                      \
    199   template <typename __T1, typename __T2>                                      \
    200   __DEVICE__ typename __clang_cuda_enable_if<                                  \
    201       std::numeric_limits<__T1>::is_specialized &&                             \
    202           std::numeric_limits<__T2>::is_specialized,                           \
    203       __retty>::type                                                           \
    204   __fn(__T1 __x, __T2 __y) {                                                   \
    205     return __fn((double)__x, (double)__y);                                     \
    206   }
    207 
    208 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
    209 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
    210 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
    211 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
    212 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
    213 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
    214 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
    215 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
    216 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
    217 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
    218 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
    219 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
    220 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
    221 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
    222 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
    223 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
    224 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
    225 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
    226 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
    227 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
    228 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
    229 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
    230 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
    231 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
    232 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
    233 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
    234 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
    235 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
    236 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
    237 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
    238 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
    239 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
    240 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
    241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
    242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
    243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
    244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
    245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
    246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
    247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
    248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
    249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
    250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
    251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
    252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
    253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
    254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
    255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
    256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
    257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
    258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
    259 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
    260 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
    261 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
    262 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
    263 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
    264 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
    265 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
    266 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
    267 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
    268 
    269 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
    270 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
    271 
    272 // Overloads for functions that don't match the patterns expected by
    273 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
    274 template <typename __T1, typename __T2, typename __T3>
    275 __DEVICE__ typename __clang_cuda_enable_if<
    276     std::numeric_limits<__T1>::is_specialized &&
    277         std::numeric_limits<__T2>::is_specialized &&
    278         std::numeric_limits<__T3>::is_specialized,
    279     double>::type
    280 fma(__T1 __x, __T2 __y, __T3 __z) {
    281   return std::fma((double)__x, (double)__y, (double)__z);
    282 }
    283 
    284 template <typename __T>
    285 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
    286                                            double>::type
    287 frexp(__T __x, int *__exp) {
    288   return std::frexp((double)__x, __exp);
    289 }
    290 
    291 template <typename __T>
    292 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
    293                                            double>::type
    294 ldexp(__T __x, int __exp) {
    295   return std::ldexp((double)__x, __exp);
    296 }
    297 
    298 template <typename __T>
    299 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
    300                                            double>::type
    301 nexttoward(__T __from, double __to) {
    302   return std::nexttoward((double)__from, __to);
    303 }
    304 
    305 template <typename __T1, typename __T2>
    306 __DEVICE__ typename __clang_cuda_enable_if<
    307     std::numeric_limits<__T1>::is_specialized &&
    308         std::numeric_limits<__T2>::is_specialized,
    309     double>::type
    310 remquo(__T1 __x, __T2 __y, int *__quo) {
    311   return std::remquo((double)__x, (double)__y, __quo);
    312 }
    313 
    314 template <typename __T>
    315 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
    316                                            double>::type
    317 scalbln(__T __x, long __exp) {
    318   return std::scalbln((double)__x, __exp);
    319 }
    320 
    321 template <typename __T>
    322 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
    323                                            double>::type
    324 scalbn(__T __x, int __exp) {
    325   return std::scalbn((double)__x, __exp);
    326 }
    327 
    328 // We need to define these overloads in exactly the namespace our standard
    329 // library uses (including the right inline namespace), otherwise they won't be
    330 // picked up by other functions in the standard library (e.g. functions in
    331 // <complex>).  Thus the ugliness below.
    332 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
    333 _LIBCPP_BEGIN_NAMESPACE_STD
    334 #else
    335 namespace std {
    336 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
    337 _GLIBCXX_BEGIN_NAMESPACE_VERSION
    338 #endif
    339 #endif
    340 
    341 // Pull the new overloads we defined above into namespace std.
    342 using ::acos;
    343 using ::acosh;
    344 using ::asin;
    345 using ::asinh;
    346 using ::atan;
    347 using ::atan2;
    348 using ::atanh;
    349 using ::cbrt;
    350 using ::ceil;
    351 using ::copysign;
    352 using ::cos;
    353 using ::cosh;
    354 using ::erf;
    355 using ::erfc;
    356 using ::exp;
    357 using ::exp2;
    358 using ::expm1;
    359 using ::fabs;
    360 using ::fdim;
    361 using ::floor;
    362 using ::fma;
    363 using ::fmax;
    364 using ::fmin;
    365 using ::fmod;
    366 using ::fpclassify;
    367 using ::frexp;
    368 using ::hypot;
    369 using ::ilogb;
    370 using ::isfinite;
    371 using ::isgreater;
    372 using ::isgreaterequal;
    373 using ::isless;
    374 using ::islessequal;
    375 using ::islessgreater;
    376 using ::isnormal;
    377 using ::isunordered;
    378 using ::ldexp;
    379 using ::lgamma;
    380 using ::llrint;
    381 using ::llround;
    382 using ::log;
    383 using ::log10;
    384 using ::log1p;
    385 using ::log2;
    386 using ::logb;
    387 using ::lrint;
    388 using ::lround;
    389 using ::nearbyint;
    390 using ::nextafter;
    391 using ::nexttoward;
    392 using ::pow;
    393 using ::remainder;
    394 using ::remquo;
    395 using ::rint;
    396 using ::round;
    397 using ::scalbln;
    398 using ::scalbn;
    399 using ::signbit;
    400 using ::sin;
    401 using ::sinh;
    402 using ::sqrt;
    403 using ::tan;
    404 using ::tanh;
    405 using ::tgamma;
    406 using ::trunc;
    407 
    408 // Well this is fun: We need to pull these symbols in for libc++, but we can't
    409 // pull them in with libstdc++, because its ::isinf and ::isnan are different
    410 // than its std::isinf and std::isnan.
    411 #ifndef __GLIBCXX__
    412 using ::isinf;
    413 using ::isnan;
    414 #endif
    415 
    416 // Finally, pull the "foobarf" functions that CUDA defines in its headers into
    417 // namespace std.
    418 using ::acosf;
    419 using ::acoshf;
    420 using ::asinf;
    421 using ::asinhf;
    422 using ::atan2f;
    423 using ::atanf;
    424 using ::atanhf;
    425 using ::cbrtf;
    426 using ::ceilf;
    427 using ::copysignf;
    428 using ::cosf;
    429 using ::coshf;
    430 using ::erfcf;
    431 using ::erff;
    432 using ::exp2f;
    433 using ::expf;
    434 using ::expm1f;
    435 using ::fabsf;
    436 using ::fdimf;
    437 using ::floorf;
    438 using ::fmaf;
    439 using ::fmaxf;
    440 using ::fminf;
    441 using ::fmodf;
    442 using ::frexpf;
    443 using ::hypotf;
    444 using ::ilogbf;
    445 using ::ldexpf;
    446 using ::lgammaf;
    447 using ::llrintf;
    448 using ::llroundf;
    449 using ::log10f;
    450 using ::log1pf;
    451 using ::log2f;
    452 using ::logbf;
    453 using ::logf;
    454 using ::lrintf;
    455 using ::lroundf;
    456 using ::modff;
    457 using ::nearbyintf;
    458 using ::nextafterf;
    459 using ::nexttowardf;
    460 using ::nexttowardf;
    461 using ::powf;
    462 using ::remainderf;
    463 using ::remquof;
    464 using ::rintf;
    465 using ::roundf;
    466 using ::scalblnf;
    467 using ::scalbnf;
    468 using ::sinf;
    469 using ::sinhf;
    470 using ::sqrtf;
    471 using ::tanf;
    472 using ::tanhf;
    473 using ::tgammaf;
    474 using ::truncf;
    475 
    476 #ifdef _LIBCPP_END_NAMESPACE_STD
    477 _LIBCPP_END_NAMESPACE_STD
    478 #else
    479 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
    480 _GLIBCXX_END_NAMESPACE_VERSION
    481 #endif
    482 } // namespace std
    483 #endif
    484 
    485 #undef __DEVICE__
    486 
    487 #endif
    488