Home | History | Annotate | Download | only in include
      1 /*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __CLANG_CUDA_COMPLEX_BUILTINS
     25 #define __CLANG_CUDA_COMPLEX_BUILTINS
     26 
     27 // This header defines __muldc3, __mulsc3, __divdc3, and __divsc3.  These are
     28 // libgcc functions that clang assumes are available when compiling c99 complex
     29 // operations.  (These implementations come from libc++, and have been modified
     30 // to work with CUDA.)
     31 
     32 extern "C" inline __device__ double _Complex __muldc3(double __a, double __b,
     33                                                       double __c, double __d) {
     34   double __ac = __a * __c;
     35   double __bd = __b * __d;
     36   double __ad = __a * __d;
     37   double __bc = __b * __c;
     38   double _Complex z;
     39   __real__(z) = __ac - __bd;
     40   __imag__(z) = __ad + __bc;
     41   if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
     42     int __recalc = 0;
     43     if (std::isinf(__a) || std::isinf(__b)) {
     44       __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
     45       __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
     46       if (std::isnan(__c))
     47         __c = std::copysign(0, __c);
     48       if (std::isnan(__d))
     49         __d = std::copysign(0, __d);
     50       __recalc = 1;
     51     }
     52     if (std::isinf(__c) || std::isinf(__d)) {
     53       __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
     54       __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
     55       if (std::isnan(__a))
     56         __a = std::copysign(0, __a);
     57       if (std::isnan(__b))
     58         __b = std::copysign(0, __b);
     59       __recalc = 1;
     60     }
     61     if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||
     62                       std::isinf(__ad) || std::isinf(__bc))) {
     63       if (std::isnan(__a))
     64         __a = std::copysign(0, __a);
     65       if (std::isnan(__b))
     66         __b = std::copysign(0, __b);
     67       if (std::isnan(__c))
     68         __c = std::copysign(0, __c);
     69       if (std::isnan(__d))
     70         __d = std::copysign(0, __d);
     71       __recalc = 1;
     72     }
     73     if (__recalc) {
     74       // Can't use std::numeric_limits<double>::infinity() -- that doesn't have
     75       // a device overload (and isn't constexpr before C++11, naturally).
     76       __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
     77       __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
     78     }
     79   }
     80   return z;
     81 }
     82 
     83 extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b,
     84                                                      float __c, float __d) {
     85   float __ac = __a * __c;
     86   float __bd = __b * __d;
     87   float __ad = __a * __d;
     88   float __bc = __b * __c;
     89   float _Complex z;
     90   __real__(z) = __ac - __bd;
     91   __imag__(z) = __ad + __bc;
     92   if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
     93     int __recalc = 0;
     94     if (std::isinf(__a) || std::isinf(__b)) {
     95       __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
     96       __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
     97       if (std::isnan(__c))
     98         __c = std::copysign(0, __c);
     99       if (std::isnan(__d))
    100         __d = std::copysign(0, __d);
    101       __recalc = 1;
    102     }
    103     if (std::isinf(__c) || std::isinf(__d)) {
    104       __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
    105       __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
    106       if (std::isnan(__a))
    107         __a = std::copysign(0, __a);
    108       if (std::isnan(__b))
    109         __b = std::copysign(0, __b);
    110       __recalc = 1;
    111     }
    112     if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||
    113                       std::isinf(__ad) || std::isinf(__bc))) {
    114       if (std::isnan(__a))
    115         __a = std::copysign(0, __a);
    116       if (std::isnan(__b))
    117         __b = std::copysign(0, __b);
    118       if (std::isnan(__c))
    119         __c = std::copysign(0, __c);
    120       if (std::isnan(__d))
    121         __d = std::copysign(0, __d);
    122       __recalc = 1;
    123     }
    124     if (__recalc) {
    125       __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
    126       __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
    127     }
    128   }
    129   return z;
    130 }
    131 
    132 extern "C" inline __device__ double _Complex __divdc3(double __a, double __b,
    133                                                       double __c, double __d) {
    134   int __ilogbw = 0;
    135   // Can't use std::max, because that's defined in <algorithm>, and we don't
    136   // want to pull that in for every compile.  The CUDA headers define
    137   // ::max(float, float) and ::max(double, double), which is sufficient for us.
    138   double __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
    139   if (std::isfinite(__logbw)) {
    140     __ilogbw = (int)__logbw;
    141     __c = std::scalbn(__c, -__ilogbw);
    142     __d = std::scalbn(__d, -__ilogbw);
    143   }
    144   double __denom = __c * __c + __d * __d;
    145   double _Complex z;
    146   __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
    147   __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
    148   if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
    149     if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) {
    150       __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;
    151       __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;
    152     } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&
    153                std::isfinite(__d)) {
    154       __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a);
    155       __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b);
    156       __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
    157       __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
    158     } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) &&
    159                std::isfinite(__b)) {
    160       __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c);
    161       __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d);
    162       __real__(z) = 0.0 * (__a * __c + __b * __d);
    163       __imag__(z) = 0.0 * (__b * __c - __a * __d);
    164     }
    165   }
    166   return z;
    167 }
    168 
    169 extern "C" inline __device__ float _Complex __divsc3(float __a, float __b,
    170                                                      float __c, float __d) {
    171   int __ilogbw = 0;
    172   float __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
    173   if (std::isfinite(__logbw)) {
    174     __ilogbw = (int)__logbw;
    175     __c = std::scalbn(__c, -__ilogbw);
    176     __d = std::scalbn(__d, -__ilogbw);
    177   }
    178   float __denom = __c * __c + __d * __d;
    179   float _Complex z;
    180   __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
    181   __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
    182   if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
    183     if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) {
    184       __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;
    185       __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;
    186     } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&
    187                std::isfinite(__d)) {
    188       __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
    189       __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
    190       __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
    191       __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
    192     } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) &&
    193                std::isfinite(__b)) {
    194       __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
    195       __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
    196       __real__(z) = 0 * (__a * __c + __b * __d);
    197       __imag__(z) = 0 * (__b * __c - __a * __d);
    198     }
    199   }
    200   return z;
    201 }
    202 
    203 #endif // __CLANG_CUDA_COMPLEX_BUILTINS
    204