Home | History | Annotate | Download | only in runtime
      1 #include "rs_types.rsh"
      2 
      3 extern float2 __attribute__((overloadable)) convert_float2(int2 c);
      4 extern float3 __attribute__((overloadable)) convert_float3(int3 c);
      5 extern float4 __attribute__((overloadable)) convert_float4(int4 c);
      6 
      7 extern int2 __attribute__((overloadable)) convert_int2(float2 c);
      8 extern int3 __attribute__((overloadable)) convert_int3(float3 c);
      9 extern int4 __attribute__((overloadable)) convert_int4(float4 c);
     10 
     11 
     12 extern float __attribute__((overloadable)) fmin(float v, float v2);
     13 extern float2 __attribute__((overloadable)) fmin(float2 v, float v2);
     14 extern float3 __attribute__((overloadable)) fmin(float3 v, float v2);
     15 extern float4 __attribute__((overloadable)) fmin(float4 v, float v2);
     16 
     17 extern float __attribute__((overloadable)) fmax(float v, float v2);
     18 extern float2 __attribute__((overloadable)) fmax(float2 v, float v2);
     19 extern float3 __attribute__((overloadable)) fmax(float3 v, float v2);
     20 extern float4 __attribute__((overloadable)) fmax(float4 v, float v2);
     21 
     22 // Float ops, 6.11.2
     23 
     24 #define FN_FUNC_FN(fnc)                                         \
     25 extern float2 __attribute__((overloadable)) fnc(float2 v) { \
     26     float2 r;                                                   \
     27     r.x = fnc(v.x);                                             \
     28     r.y = fnc(v.y);                                             \
     29     return r;                                                   \
     30 }                                                               \
     31 extern float3 __attribute__((overloadable)) fnc(float3 v) { \
     32     float3 r;                                                   \
     33     r.x = fnc(v.x);                                             \
     34     r.y = fnc(v.y);                                             \
     35     r.z = fnc(v.z);                                             \
     36     return r;                                                   \
     37 }                                                               \
     38 extern float4 __attribute__((overloadable)) fnc(float4 v) { \
     39     float4 r;                                                   \
     40     r.x = fnc(v.x);                                             \
     41     r.y = fnc(v.y);                                             \
     42     r.z = fnc(v.z);                                             \
     43     r.w = fnc(v.w);                                             \
     44     return r;                                                   \
     45 }
     46 
     47 #define IN_FUNC_FN(fnc)                                         \
     48 extern int2 __attribute__((overloadable)) fnc(float2 v) {   \
     49     int2 r;                                                     \
     50     r.x = fnc(v.x);                                             \
     51     r.y = fnc(v.y);                                             \
     52     return r;                                                   \
     53 }                                                               \
     54 extern int3 __attribute__((overloadable)) fnc(float3 v) {   \
     55     int3 r;                                                     \
     56     r.x = fnc(v.x);                                             \
     57     r.y = fnc(v.y);                                             \
     58     r.z = fnc(v.z);                                             \
     59     return r;                                                   \
     60 }                                                               \
     61 extern int4 __attribute__((overloadable)) fnc(float4 v) {   \
     62     int4 r;                                                     \
     63     r.x = fnc(v.x);                                             \
     64     r.y = fnc(v.y);                                             \
     65     r.z = fnc(v.z);                                             \
     66     r.w = fnc(v.w);                                             \
     67     return r;                                                   \
     68 }
     69 
     70 #define FN_FUNC_FN_FN(fnc)                                                  \
     71 extern float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2) { \
     72     float2 r;                                                               \
     73     r.x = fnc(v1.x, v2.x);                                                  \
     74     r.y = fnc(v1.y, v2.y);                                                  \
     75     return r;                                                               \
     76 }                                                                           \
     77 extern float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2) { \
     78     float3 r;                                                               \
     79     r.x = fnc(v1.x, v2.x);                                                  \
     80     r.y = fnc(v1.y, v2.y);                                                  \
     81     r.z = fnc(v1.z, v2.z);                                                  \
     82     return r;                                                               \
     83 }                                                                           \
     84 extern float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2) { \
     85     float4 r;                                                               \
     86     r.x = fnc(v1.x, v2.x);                                                  \
     87     r.y = fnc(v1.y, v2.y);                                                  \
     88     r.z = fnc(v1.z, v2.z);                                                  \
     89     r.w = fnc(v1.w, v2.w);                                                  \
     90     return r;                                                               \
     91 }
     92 
     93 #define FN_FUNC_FN_F(fnc)                                                   \
     94 extern float2 __attribute__((overloadable)) fnc(float2 v1, float v2) {  \
     95     float2 r;                                                               \
     96     r.x = fnc(v1.x, v2);                                                    \
     97     r.y = fnc(v1.y, v2);                                                    \
     98     return r;                                                               \
     99 }                                                                           \
    100 extern float3 __attribute__((overloadable)) fnc(float3 v1, float v2) {  \
    101     float3 r;                                                               \
    102     r.x = fnc(v1.x, v2);                                                    \
    103     r.y = fnc(v1.y, v2);                                                    \
    104     r.z = fnc(v1.z, v2);                                                    \
    105     return r;                                                               \
    106 }                                                                           \
    107 extern float4 __attribute__((overloadable)) fnc(float4 v1, float v2) {  \
    108     float4 r;                                                               \
    109     r.x = fnc(v1.x, v2);                                                    \
    110     r.y = fnc(v1.y, v2);                                                    \
    111     r.z = fnc(v1.z, v2);                                                    \
    112     r.w = fnc(v1.w, v2);                                                    \
    113     return r;                                                               \
    114 }
    115 
    116 #define FN_FUNC_FN_IN(fnc)                                                  \
    117 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2) {   \
    118     float2 r;                                                               \
    119     r.x = fnc(v1.x, v2.x);                                                  \
    120     r.y = fnc(v1.y, v2.y);                                                  \
    121     return r;                                                               \
    122 }                                                                           \
    123 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2) {   \
    124     float3 r;                                                               \
    125     r.x = fnc(v1.x, v2.x);                                                  \
    126     r.y = fnc(v1.y, v2.y);                                                  \
    127     r.z = fnc(v1.z, v2.z);                                                  \
    128     return r;                                                               \
    129 }                                                                           \
    130 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2) {   \
    131     float4 r;                                                               \
    132     r.x = fnc(v1.x, v2.x);                                                  \
    133     r.y = fnc(v1.y, v2.y);                                                  \
    134     r.z = fnc(v1.z, v2.z);                                                  \
    135     r.w = fnc(v1.w, v2.w);                                                  \
    136     return r;                                                               \
    137 }
    138 
    139 #define FN_FUNC_FN_I(fnc)                                                   \
    140 extern float2 __attribute__((overloadable)) fnc(float2 v1, int v2) {    \
    141     float2 r;                                                               \
    142     r.x = fnc(v1.x, v2);                                                    \
    143     r.y = fnc(v1.y, v2);                                                    \
    144     return r;                                                               \
    145 }                                                                           \
    146 extern float3 __attribute__((overloadable)) fnc(float3 v1, int v2) {    \
    147     float3 r;                                                               \
    148     r.x = fnc(v1.x, v2);                                                    \
    149     r.y = fnc(v1.y, v2);                                                    \
    150     r.z = fnc(v1.z, v2);                                                    \
    151     return r;                                                               \
    152 }                                                                           \
    153 extern float4 __attribute__((overloadable)) fnc(float4 v1, int v2) {    \
    154     float4 r;                                                               \
    155     r.x = fnc(v1.x, v2);                                                    \
    156     r.y = fnc(v1.y, v2);                                                    \
    157     r.z = fnc(v1.z, v2);                                                    \
    158     r.w = fnc(v1.w, v2);                                                    \
    159     return r;                                                               \
    160 }
    161 
    162 #define FN_FUNC_FN_PFN(fnc)                     \
    163 extern float2 __attribute__((overloadable)) \
    164         fnc(float2 v1, float2 *v2) {            \
    165     float2 r;                                   \
    166     float t[2];                                 \
    167     r.x = fnc(v1.x, &t[0]);                     \
    168     r.y = fnc(v1.y, &t[1]);                     \
    169     v2->x = t[0];                               \
    170     v2->y = t[1];                               \
    171     return r;                                   \
    172 }                                               \
    173 extern float3 __attribute__((overloadable)) \
    174         fnc(float3 v1, float3 *v2) {            \
    175     float3 r;                                   \
    176     float t[3];                                 \
    177     r.x = fnc(v1.x, &t[0]);                     \
    178     r.y = fnc(v1.y, &t[1]);                     \
    179     r.z = fnc(v1.z, &t[2]);                     \
    180     v2->x = t[0];                               \
    181     v2->y = t[1];                               \
    182     v2->z = t[2];                               \
    183     return r;                                   \
    184 }                                               \
    185 extern float4 __attribute__((overloadable)) \
    186         fnc(float4 v1, float4 *v2) {            \
    187     float4 r;                                   \
    188     float t[4];                                 \
    189     r.x = fnc(v1.x, &t[0]);                     \
    190     r.y = fnc(v1.y, &t[1]);                     \
    191     r.z = fnc(v1.z, &t[2]);                     \
    192     r.w = fnc(v1.w, &t[3]);                     \
    193     v2->x = t[0];                               \
    194     v2->y = t[1];                               \
    195     v2->z = t[2];                               \
    196     v2->w = t[3];                               \
    197     return r;                                   \
    198 }
    199 
    200 #define FN_FUNC_FN_PIN(fnc)                                                 \
    201 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2) {  \
    202     float2 r;                                                               \
    203     int t[2];                                                               \
    204     r.x = fnc(v1.x, &t[0]);                                                 \
    205     r.y = fnc(v1.y, &t[1]);                                                 \
    206     v2->x = t[0];                                                           \
    207     v2->y = t[1];                                                           \
    208     return r;                                                               \
    209 }                                                                           \
    210 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2) {  \
    211     float3 r;                                                               \
    212     int t[3];                                                               \
    213     r.x = fnc(v1.x, &t[0]);                                                 \
    214     r.y = fnc(v1.y, &t[1]);                                                 \
    215     r.z = fnc(v1.z, &t[2]);                                                 \
    216     v2->x = t[0];                                                           \
    217     v2->y = t[1];                                                           \
    218     v2->z = t[2];                                                           \
    219     return r;                                                               \
    220 }                                                                           \
    221 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 *v2) {  \
    222     float4 r;                                                               \
    223     int t[4];                                                               \
    224     r.x = fnc(v1.x, &t[0]);                                                 \
    225     r.y = fnc(v1.y, &t[1]);                                                 \
    226     r.z = fnc(v1.z, &t[2]);                                                 \
    227     r.w = fnc(v1.w, &t[3]);                                                 \
    228     v2->x = t[0];                                                           \
    229     v2->y = t[1];                                                           \
    230     v2->z = t[2];                                                           \
    231     v2->w = t[3];                                                           \
    232     return r;                                                               \
    233 }
    234 
    235 #define FN_FUNC_FN_FN_FN(fnc)                   \
    236 extern float2 __attribute__((overloadable)) \
    237         fnc(float2 v1, float2 v2, float2 v3) {  \
    238     float2 r;                                   \
    239     r.x = fnc(v1.x, v2.x, v3.x);                \
    240     r.y = fnc(v1.y, v2.y, v3.y);                \
    241     return r;                                   \
    242 }                                               \
    243 extern float3 __attribute__((overloadable)) \
    244         fnc(float3 v1, float3 v2, float3 v3) {  \
    245     float3 r;                                   \
    246     r.x = fnc(v1.x, v2.x, v3.x);                \
    247     r.y = fnc(v1.y, v2.y, v3.y);                \
    248     r.z = fnc(v1.z, v2.z, v3.z);                \
    249     return r;                                   \
    250 }                                               \
    251 extern float4 __attribute__((overloadable)) \
    252         fnc(float4 v1, float4 v2, float4 v3) {  \
    253     float4 r;                                   \
    254     r.x = fnc(v1.x, v2.x, v3.x);                \
    255     r.y = fnc(v1.y, v2.y, v3.y);                \
    256     r.z = fnc(v1.z, v2.z, v3.z);                \
    257     r.w = fnc(v1.w, v2.w, v3.w);                \
    258     return r;                                   \
    259 }
    260 
    261 #define FN_FUNC_FN_FN_PIN(fnc)                  \
    262 extern float2 __attribute__((overloadable)) \
    263         fnc(float2 v1, float2 v2, int2 *v3) {   \
    264     float2 r;                                   \
    265     int t[2];                                   \
    266     r.x = fnc(v1.x, v2.x, &t[0]);               \
    267     r.y = fnc(v1.y, v2.y, &t[1]);               \
    268     v3->x = t[0];                               \
    269     v3->y = t[1];                               \
    270     return r;                                   \
    271 }                                               \
    272 extern float3 __attribute__((overloadable)) \
    273         fnc(float3 v1, float3 v2, int3 *v3) {   \
    274     float3 r;                                   \
    275     int t[3];                                   \
    276     r.x = fnc(v1.x, v2.x, &t[0]);               \
    277     r.y = fnc(v1.y, v2.y, &t[1]);               \
    278     r.z = fnc(v1.z, v2.z, &t[2]);               \
    279     v3->x = t[0];                               \
    280     v3->y = t[1];                               \
    281     v3->z = t[2];                               \
    282     return r;                                   \
    283 }                                               \
    284 extern float4 __attribute__((overloadable)) \
    285         fnc(float4 v1, float4 v2, int4 *v3) {   \
    286     float4 r;                                   \
    287     int t[4];                                   \
    288     r.x = fnc(v1.x, v2.x, &t[0]);               \
    289     r.y = fnc(v1.y, v2.y, &t[1]);               \
    290     r.z = fnc(v1.z, v2.z, &t[2]);               \
    291     r.w = fnc(v1.w, v2.w, &t[3]);               \
    292     v3->x = t[0];                               \
    293     v3->y = t[1];                               \
    294     v3->z = t[2];                               \
    295     v3->w = t[3];                               \
    296     return r;                                   \
    297 }
    298 
    299 static const int iposinf = 0x7f800000;
    300 static const int ineginf = 0xff800000;
    301 
    302 static const float posinf() {
    303     float f = *((float*)&iposinf);
    304     return f;
    305 }
    306 
    307 static const float neginf() {
    308     float f = *((float*)&ineginf);
    309     return f;
    310 }
    311 
    312 static bool isinf(float f) {
    313     int i = *((int*)(void*)&f);
    314     return (i == iposinf) || (i == ineginf);
    315 }
    316 
    317 static bool isnan(float f) {
    318     int i = *((int*)(void*)&f);
    319     return (((i & 0x7f800000) == 0x7f800000) && (i & 0x007fffff));
    320 }
    321 
    322 static bool isposzero(float f) {
    323     int i = *((int*)(void*)&f);
    324     return (i == 0x00000000);
    325 }
    326 
    327 static bool isnegzero(float f) {
    328     int i = *((int*)(void*)&f);
    329     return (i == 0x80000000);
    330 }
    331 
    332 static bool iszero(float f) {
    333     return isposzero(f) || isnegzero(f);
    334 }
    335 
    336 
    337 extern float __attribute__((overloadable)) acos(float);
    338 FN_FUNC_FN(acos)
    339 
    340 extern float __attribute__((overloadable)) acosh(float);
    341 FN_FUNC_FN(acosh)
    342 
    343 
    344 extern float __attribute__((overloadable)) acospi(float v) {
    345     return acos(v) / M_PI;
    346 }
    347 FN_FUNC_FN(acospi)
    348 
    349 extern float __attribute__((overloadable)) asin(float);
    350 FN_FUNC_FN(asin)
    351 
    352 extern float __attribute__((overloadable)) asinh(float);
    353 FN_FUNC_FN(asinh)
    354 
    355 extern float __attribute__((overloadable)) asinpi(float v) {
    356     return asin(v) / M_PI;
    357 }
    358 FN_FUNC_FN(asinpi)
    359 
    360 extern float __attribute__((overloadable)) atan(float);
    361 FN_FUNC_FN(atan)
    362 
    363 extern float __attribute__((overloadable)) atan2(float, float);
    364 FN_FUNC_FN_FN(atan2)
    365 
    366 extern float __attribute__((overloadable)) atanh(float);
    367 FN_FUNC_FN(atanh)
    368 
    369 extern float __attribute__((overloadable)) atanpi(float v) {
    370     return atan(v) / M_PI;
    371 }
    372 FN_FUNC_FN(atanpi)
    373 
    374 
    375 extern float __attribute__((overloadable)) atan2pi(float y, float x) {
    376     return atan2(y, x) / M_PI;
    377 }
    378 FN_FUNC_FN_FN(atan2pi)
    379 
    380 extern float __attribute__((overloadable)) cbrt(float);
    381 FN_FUNC_FN(cbrt)
    382 
    383 extern float __attribute__((overloadable)) ceil(float);
    384 FN_FUNC_FN(ceil)
    385 
    386 extern float __attribute__((overloadable)) copysign(float, float);
    387 FN_FUNC_FN_FN(copysign)
    388 
    389 extern float __attribute__((overloadable)) cos(float);
    390 FN_FUNC_FN(cos)
    391 
    392 extern float __attribute__((overloadable)) cosh(float);
    393 FN_FUNC_FN(cosh)
    394 
    395 extern float __attribute__((overloadable)) cospi(float v) {
    396     return cos(v * M_PI);
    397 }
    398 FN_FUNC_FN(cospi)
    399 
    400 extern float __attribute__((overloadable)) erfc(float);
    401 FN_FUNC_FN(erfc)
    402 
    403 extern float __attribute__((overloadable)) erf(float);
    404 FN_FUNC_FN(erf)
    405 
    406 extern float __attribute__((overloadable)) exp(float);
    407 FN_FUNC_FN(exp)
    408 
    409 extern float __attribute__((overloadable)) exp2(float);
    410 FN_FUNC_FN(exp2)
    411 
    412 extern float __attribute__((overloadable)) pow(float, float);
    413 
    414 extern float __attribute__((overloadable)) exp10(float v) {
    415     return exp2(v * 3.321928095f);
    416 }
    417 FN_FUNC_FN(exp10)
    418 
    419 extern float __attribute__((overloadable)) expm1(float);
    420 FN_FUNC_FN(expm1)
    421 
    422 extern float __attribute__((overloadable)) fabs(float v) {
    423     int i = *((int*)(void*)&v) & 0x7fffffff;
    424     return  *((float*)(void*)&i);
    425 }
    426 FN_FUNC_FN(fabs)
    427 
    428 extern float __attribute__((overloadable)) fdim(float, float);
    429 FN_FUNC_FN_FN(fdim)
    430 
    431 extern float __attribute__((overloadable)) floor(float);
    432 FN_FUNC_FN(floor)
    433 
    434 extern float __attribute__((overloadable)) fma(float, float, float);
    435 FN_FUNC_FN_FN_FN(fma)
    436 
    437 extern float __attribute__((overloadable)) fmin(float, float);
    438 
    439 extern float __attribute__((overloadable)) fmod(float, float);
    440 FN_FUNC_FN_FN(fmod)
    441 
    442 extern float __attribute__((overloadable)) fract(float v, float *iptr) {
    443     int i = (int)floor(v);
    444     if (iptr) {
    445         iptr[0] = i;
    446     }
    447     return fmin(v - i, 0x1.fffffep-1f);
    448 }
    449 FN_FUNC_FN_PFN(fract)
    450 
    451 extern float __attribute__((overloadable)) frexp(float, int *);
    452 FN_FUNC_FN_PIN(frexp)
    453 
    454 extern float __attribute__((overloadable)) hypot(float, float);
    455 FN_FUNC_FN_FN(hypot)
    456 
    457 extern int __attribute__((overloadable)) ilogb(float);
    458 IN_FUNC_FN(ilogb)
    459 
    460 extern float __attribute__((overloadable)) ldexp(float, int);
    461 FN_FUNC_FN_IN(ldexp)
    462 FN_FUNC_FN_I(ldexp)
    463 
    464 extern float __attribute__((overloadable)) lgamma(float);
    465 FN_FUNC_FN(lgamma)
    466 extern float __attribute__((overloadable)) lgamma(float, int*);
    467 FN_FUNC_FN_PIN(lgamma)
    468 
    469 extern float __attribute__((overloadable)) log(float);
    470 FN_FUNC_FN(log)
    471 
    472 extern float __attribute__((overloadable)) log10(float);
    473 FN_FUNC_FN(log10)
    474 
    475 
    476 extern float __attribute__((overloadable)) log2(float v) {
    477     return log10(v) * 3.321928095f;
    478 }
    479 FN_FUNC_FN(log2)
    480 
    481 extern float __attribute__((overloadable)) log1p(float);
    482 FN_FUNC_FN(log1p)
    483 
    484 extern float __attribute__((overloadable)) logb(float);
    485 FN_FUNC_FN(logb)
    486 
    487 extern float __attribute__((overloadable)) mad(float a, float b, float c) {
    488     return a * b + c;
    489 }
    490 extern float2 __attribute__((overloadable)) mad(float2 a, float2 b, float2 c) {
    491     return a * b + c;
    492 }
    493 extern float3 __attribute__((overloadable)) mad(float3 a, float3 b, float3 c) {
    494     return a * b + c;
    495 }
    496 extern float4 __attribute__((overloadable)) mad(float4 a, float4 b, float4 c) {
    497     return a * b + c;
    498 }
    499 
    500 extern float __attribute__((overloadable)) modf(float, float *);
    501 FN_FUNC_FN_PFN(modf);
    502 
    503 extern float __attribute__((overloadable)) nan(uint v) {
    504     float f[1];
    505     uint32_t *ip = (uint32_t *)f;
    506     *ip = v | 0x7fc00000;
    507     return f[0];
    508 }
    509 
    510 extern float __attribute__((overloadable)) nextafter(float, float);
    511 FN_FUNC_FN_FN(nextafter)
    512 
    513 FN_FUNC_FN_FN(pow)
    514 
    515 extern float __attribute__((overloadable)) pown(float v, int p) {
    516     return pow(v, (float)p);
    517 }
    518 extern float2 __attribute__((overloadable)) pown(float2 v, int2 p) {
    519     float2 f2 = convert_float2(p);
    520     return pow(v, f2);
    521 }
    522 extern float3 __attribute__((overloadable)) pown(float3 v, int3 p) {
    523     float3 f3 = convert_float3(p);
    524     return pow(v, f3);
    525 }
    526 extern float4 __attribute__((overloadable)) pown(float4 v, int4 p) {
    527     float4 f4 = convert_float4(p);
    528     return pow(v, f4);
    529 }
    530 
    531 extern float __attribute__((overloadable)) powr(float v, float p) {
    532     return pow(v, p);
    533 }
    534 extern float2 __attribute__((overloadable)) powr(float2 v, float2 p) {
    535     return pow(v, p);
    536 }
    537 extern float3 __attribute__((overloadable)) powr(float3 v, float3 p) {
    538     return pow(v, p);
    539 }
    540 extern float4 __attribute__((overloadable)) powr(float4 v, float4 p) {
    541     return pow(v, p);
    542 }
    543 
    544 extern float __attribute__((overloadable)) remainder(float, float);
    545 FN_FUNC_FN_FN(remainder)
    546 
    547 extern float __attribute__((overloadable)) remquo(float, float, int *);
    548 FN_FUNC_FN_FN_PIN(remquo)
    549 
    550 extern float __attribute__((overloadable)) rint(float);
    551 FN_FUNC_FN(rint)
    552 
    553 extern float __attribute__((overloadable)) rootn(float v, int r) {
    554     if (r == 0) {
    555         return nan(0);
    556     }
    557 
    558     if (iszero(v)) {
    559         if (r < 0) {
    560             if (r & 1) {
    561                 return copysign(posinf(), v);
    562             } else {
    563                 return posinf();
    564             }
    565         } else {
    566             if (r & 1) {
    567                 return copysign(0.f, v);
    568             } else {
    569                 return 0.f;
    570             }
    571         }
    572     }
    573 
    574     if (!isinf(v) && !isnan(v) && (v < 0.f)) {
    575         if (r & 1) {
    576             return (-1.f * pow(-1.f * v, 1.f / r));
    577         } else {
    578             return nan(0);
    579         }
    580     }
    581 
    582     return pow(v, 1.f / r);
    583 }
    584 FN_FUNC_FN_IN(rootn);
    585 
    586 extern float __attribute__((overloadable)) round(float);
    587 FN_FUNC_FN(round)
    588 
    589 
    590 extern float __attribute__((overloadable)) sqrt(float);
    591 extern float __attribute__((overloadable)) rsqrt(float v) {
    592     return 1.f / sqrt(v);
    593 }
    594 FN_FUNC_FN(rsqrt)
    595 
    596 extern float __attribute__((overloadable)) sin(float);
    597 FN_FUNC_FN(sin)
    598 
    599 extern float __attribute__((overloadable)) sincos(float v, float *cosptr) {
    600     *cosptr = cos(v);
    601     return sin(v);
    602 }
    603 extern float2 __attribute__((overloadable)) sincos(float2 v, float2 *cosptr) {
    604     *cosptr = cos(v);
    605     return sin(v);
    606 }
    607 extern float3 __attribute__((overloadable)) sincos(float3 v, float3 *cosptr) {
    608     *cosptr = cos(v);
    609     return sin(v);
    610 }
    611 extern float4 __attribute__((overloadable)) sincos(float4 v, float4 *cosptr) {
    612     *cosptr = cos(v);
    613     return sin(v);
    614 }
    615 
    616 extern float __attribute__((overloadable)) sinh(float);
    617 FN_FUNC_FN(sinh)
    618 
    619 extern float __attribute__((overloadable)) sinpi(float v) {
    620     return sin(v * M_PI);
    621 }
    622 FN_FUNC_FN(sinpi)
    623 
    624 extern float __attribute__((overloadable)) tan(float);
    625 FN_FUNC_FN(tan)
    626 
    627 extern float __attribute__((overloadable)) tanh(float);
    628 FN_FUNC_FN(tanh)
    629 
    630 extern float __attribute__((overloadable)) tanpi(float v) {
    631     return tan(v * M_PI);
    632 }
    633 FN_FUNC_FN(tanpi)
    634 
    635 
    636 extern float __attribute__((overloadable)) tgamma(float);
    637 FN_FUNC_FN(tgamma)
    638 
    639 extern float __attribute__((overloadable)) trunc(float);
    640 FN_FUNC_FN(trunc)
    641 
    642 // Int ops (partial), 6.11.3
    643 
    644 #define XN_FUNC_YN(typeout, fnc, typein)                                \
    645 extern typeout __attribute__((overloadable)) fnc(typein);               \
    646 extern typeout##2 __attribute__((overloadable)) fnc(typein##2 v) {  \
    647     typeout##2 r;                                                       \
    648     r.x = fnc(v.x);                                                     \
    649     r.y = fnc(v.y);                                                     \
    650     return r;                                                           \
    651 }                                                                       \
    652 extern typeout##3 __attribute__((overloadable)) fnc(typein##3 v) {  \
    653     typeout##3 r;                                                       \
    654     r.x = fnc(v.x);                                                     \
    655     r.y = fnc(v.y);                                                     \
    656     r.z = fnc(v.z);                                                     \
    657     return r;                                                           \
    658 }                                                                       \
    659 extern typeout##4 __attribute__((overloadable)) fnc(typein##4 v) {  \
    660     typeout##4 r;                                                       \
    661     r.x = fnc(v.x);                                                     \
    662     r.y = fnc(v.y);                                                     \
    663     r.z = fnc(v.z);                                                     \
    664     r.w = fnc(v.w);                                                     \
    665     return r;                                                           \
    666 }
    667 
    668 
    669 #define UIN_FUNC_IN(fnc)          \
    670 XN_FUNC_YN(uchar, fnc, char)      \
    671 XN_FUNC_YN(ushort, fnc, short)    \
    672 XN_FUNC_YN(uint, fnc, int)
    673 
    674 #define IN_FUNC_IN(fnc)           \
    675 XN_FUNC_YN(uchar, fnc, uchar)     \
    676 XN_FUNC_YN(char, fnc, char)       \
    677 XN_FUNC_YN(ushort, fnc, ushort)   \
    678 XN_FUNC_YN(short, fnc, short)     \
    679 XN_FUNC_YN(uint, fnc, uint)       \
    680 XN_FUNC_YN(int, fnc, int)
    681 
    682 
    683 #define XN_FUNC_XN_XN_BODY(type, fnc, body)         \
    684 extern type __attribute__((overloadable))       \
    685         fnc(type v1, type v2) {                     \
    686     return body;                                    \
    687 }                                                   \
    688 extern type##2 __attribute__((overloadable))    \
    689         fnc(type##2 v1, type##2 v2) {               \
    690     type##2 r;                                      \
    691     r.x = fnc(v1.x, v2.x);                          \
    692     r.y = fnc(v1.y, v2.y);                          \
    693     return r;                                       \
    694 }                                                   \
    695 extern type##3 __attribute__((overloadable))    \
    696         fnc(type##3 v1, type##3 v2) {               \
    697     type##3 r;                                      \
    698     r.x = fnc(v1.x, v2.x);                          \
    699     r.y = fnc(v1.y, v2.y);                          \
    700     r.z = fnc(v1.z, v2.z);                          \
    701     return r;                                       \
    702 }                                                   \
    703 extern type##4 __attribute__((overloadable))    \
    704         fnc(type##4 v1, type##4 v2) {               \
    705     type##4 r;                                      \
    706     r.x = fnc(v1.x, v2.x);                          \
    707     r.y = fnc(v1.y, v2.y);                          \
    708     r.z = fnc(v1.z, v2.z);                          \
    709     r.w = fnc(v1.w, v2.w);                          \
    710     return r;                                       \
    711 }
    712 
    713 #define IN_FUNC_IN_IN_BODY(fnc, body) \
    714 XN_FUNC_XN_XN_BODY(uchar, fnc, body)  \
    715 XN_FUNC_XN_XN_BODY(char, fnc, body)   \
    716 XN_FUNC_XN_XN_BODY(ushort, fnc, body) \
    717 XN_FUNC_XN_XN_BODY(short, fnc, body)  \
    718 XN_FUNC_XN_XN_BODY(uint, fnc, body)   \
    719 XN_FUNC_XN_XN_BODY(int, fnc, body)    \
    720 XN_FUNC_XN_XN_BODY(float, fnc, body)
    721 
    722 
    723 /**
    724  * abs
    725  */
    726 extern uint32_t __attribute__((overloadable)) abs(int32_t v) {
    727     if (v < 0)
    728         return -v;
    729     return v;
    730 }
    731 extern uint16_t __attribute__((overloadable)) abs(int16_t v) {
    732     if (v < 0)
    733         return -v;
    734     return v;
    735 }
    736 extern uint8_t __attribute__((overloadable)) abs(int8_t v) {
    737     if (v < 0)
    738         return -v;
    739     return v;
    740 }
    741 
    742 /**
    743  * clz
    744  */
    745 extern uint32_t __attribute__((overloadable)) clz(uint32_t v) {
    746     return __builtin_clz(v);
    747 }
    748 extern uint16_t __attribute__((overloadable)) clz(uint16_t v) {
    749     return (uint16_t)__builtin_clz(v);
    750 }
    751 extern uint8_t __attribute__((overloadable)) clz(uint8_t v) {
    752     return (uint8_t)__builtin_clz(v);
    753 }
    754 extern int32_t __attribute__((overloadable)) clz(int32_t v) {
    755     return (int32_t)__builtin_clz((uint32_t)v);
    756 }
    757 extern int16_t __attribute__((overloadable)) clz(int16_t v) {
    758     return (int16_t)__builtin_clz(v);
    759 }
    760 extern int8_t __attribute__((overloadable)) clz(int8_t v) {
    761     return (int8_t)__builtin_clz(v);
    762 }
    763 
    764 
    765 UIN_FUNC_IN(abs)
    766 IN_FUNC_IN(clz)
    767 
    768 
    769 // 6.11.4
    770 
    771 
    772 extern float __attribute__((overloadable)) degrees(float radians) {
    773     return radians * (180.f / M_PI);
    774 }
    775 extern float2 __attribute__((overloadable)) degrees(float2 radians) {
    776     return radians * (180.f / M_PI);
    777 }
    778 extern float3 __attribute__((overloadable)) degrees(float3 radians) {
    779     return radians * (180.f / M_PI);
    780 }
    781 extern float4 __attribute__((overloadable)) degrees(float4 radians) {
    782     return radians * (180.f / M_PI);
    783 }
    784 
    785 extern float __attribute__((overloadable)) mix(float start, float stop, float amount) {
    786     return start + (stop - start) * amount;
    787 }
    788 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount) {
    789     return start + (stop - start) * amount;
    790 }
    791 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount) {
    792     return start + (stop - start) * amount;
    793 }
    794 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount) {
    795     return start + (stop - start) * amount;
    796 }
    797 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount) {
    798     return start + (stop - start) * amount;
    799 }
    800 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount) {
    801     return start + (stop - start) * amount;
    802 }
    803 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount) {
    804     return start + (stop - start) * amount;
    805 }
    806 
    807 extern float __attribute__((overloadable)) radians(float degrees) {
    808     return degrees * (M_PI / 180.f);
    809 }
    810 extern float2 __attribute__((overloadable)) radians(float2 degrees) {
    811     return degrees * (M_PI / 180.f);
    812 }
    813 extern float3 __attribute__((overloadable)) radians(float3 degrees) {
    814     return degrees * (M_PI / 180.f);
    815 }
    816 extern float4 __attribute__((overloadable)) radians(float4 degrees) {
    817     return degrees * (M_PI / 180.f);
    818 }
    819 
    820 extern float __attribute__((overloadable)) step(float edge, float v) {
    821     return (v < edge) ? 0.f : 1.f;
    822 }
    823 extern float2 __attribute__((overloadable)) step(float2 edge, float2 v) {
    824     float2 r;
    825     r.x = (v.x < edge.x) ? 0.f : 1.f;
    826     r.y = (v.y < edge.y) ? 0.f : 1.f;
    827     return r;
    828 }
    829 extern float3 __attribute__((overloadable)) step(float3 edge, float3 v) {
    830     float3 r;
    831     r.x = (v.x < edge.x) ? 0.f : 1.f;
    832     r.y = (v.y < edge.y) ? 0.f : 1.f;
    833     r.z = (v.z < edge.z) ? 0.f : 1.f;
    834     return r;
    835 }
    836 extern float4 __attribute__((overloadable)) step(float4 edge, float4 v) {
    837     float4 r;
    838     r.x = (v.x < edge.x) ? 0.f : 1.f;
    839     r.y = (v.y < edge.y) ? 0.f : 1.f;
    840     r.z = (v.z < edge.z) ? 0.f : 1.f;
    841     r.w = (v.w < edge.w) ? 0.f : 1.f;
    842     return r;
    843 }
    844 extern float2 __attribute__((overloadable)) step(float2 edge, float v) {
    845     float2 r;
    846     r.x = (v < edge.x) ? 0.f : 1.f;
    847     r.y = (v < edge.y) ? 0.f : 1.f;
    848     return r;
    849 }
    850 extern float3 __attribute__((overloadable)) step(float3 edge, float v) {
    851     float3 r;
    852     r.x = (v < edge.x) ? 0.f : 1.f;
    853     r.y = (v < edge.y) ? 0.f : 1.f;
    854     r.z = (v < edge.z) ? 0.f : 1.f;
    855     return r;
    856 }
    857 extern float4 __attribute__((overloadable)) step(float4 edge, float v) {
    858     float4 r;
    859     r.x = (v < edge.x) ? 0.f : 1.f;
    860     r.y = (v < edge.y) ? 0.f : 1.f;
    861     r.z = (v < edge.z) ? 0.f : 1.f;
    862     r.w = (v < edge.w) ? 0.f : 1.f;
    863     return r;
    864 }
    865 
    866 extern float __attribute__((overloadable)) smoothstep(float, float, float);
    867 extern float2 __attribute__((overloadable)) smoothstep(float2, float2, float2);
    868 extern float3 __attribute__((overloadable)) smoothstep(float3, float3, float3);
    869 extern float4 __attribute__((overloadable)) smoothstep(float4, float4, float4);
    870 extern float2 __attribute__((overloadable)) smoothstep(float, float, float2);
    871 extern float3 __attribute__((overloadable)) smoothstep(float, float, float3);
    872 extern float4 __attribute__((overloadable)) smoothstep(float, float, float4);
    873 
    874 extern float __attribute__((overloadable)) sign(float v) {
    875     if (v > 0) return 1.f;
    876     if (v < 0) return -1.f;
    877     return v;
    878 }
    879 FN_FUNC_FN(sign)
    880 
    881 
    882 // 6.11.5
    883 extern float3 __attribute__((overloadable)) cross(float3 lhs, float3 rhs) {
    884     float3 r;
    885     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
    886     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
    887     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
    888     return r;
    889 }
    890 
    891 extern float4 __attribute__((overloadable)) cross(float4 lhs, float4 rhs) {
    892     float4 r;
    893     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
    894     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
    895     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
    896     r.w = 0.f;
    897     return r;
    898 }
    899 
    900 extern float __attribute__((overloadable)) length(float v);
    901 extern float __attribute__((overloadable)) length(float2 v);
    902 extern float __attribute__((overloadable)) length(float3 v);
    903 extern float __attribute__((overloadable)) length(float4 v);
    904 
    905 extern float __attribute__((overloadable)) distance(float lhs, float rhs) {
    906     return length(lhs - rhs);
    907 }
    908 extern float __attribute__((overloadable)) distance(float2 lhs, float2 rhs) {
    909     return length(lhs - rhs);
    910 }
    911 extern float __attribute__((overloadable)) distance(float3 lhs, float3 rhs) {
    912     return length(lhs - rhs);
    913 }
    914 extern float __attribute__((overloadable)) distance(float4 lhs, float4 rhs) {
    915     return length(lhs - rhs);
    916 }
    917 
    918 extern float __attribute__((overloadable)) normalize(float v) {
    919     return 1.f;
    920 }
    921 extern float2 __attribute__((overloadable)) normalize(float2 v) {
    922     return v / length(v);
    923 }
    924 extern float3 __attribute__((overloadable)) normalize(float3 v) {
    925     return v / length(v);
    926 }
    927 extern float4 __attribute__((overloadable)) normalize(float4 v) {
    928     return v / length(v);
    929 }
    930 
    931 extern float __attribute__((overloadable)) half_sqrt(float);
    932 
    933 extern float __attribute__((overloadable)) fast_length(float v) {
    934     return fabs(v);
    935 }
    936 extern float __attribute__((overloadable)) fast_length(float2 v) {
    937     return half_sqrt(v.x*v.x + v.y*v.y);
    938 }
    939 extern float __attribute__((overloadable)) fast_length(float3 v) {
    940     return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
    941 }
    942 extern float __attribute__((overloadable)) fast_length(float4 v) {
    943     return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
    944 }
    945 
    946 extern float __attribute__((overloadable)) fast_distance(float lhs, float rhs) {
    947     return fast_length(lhs - rhs);
    948 }
    949 extern float __attribute__((overloadable)) fast_distance(float2 lhs, float2 rhs) {
    950     return fast_length(lhs - rhs);
    951 }
    952 extern float __attribute__((overloadable)) fast_distance(float3 lhs, float3 rhs) {
    953     return fast_length(lhs - rhs);
    954 }
    955 extern float __attribute__((overloadable)) fast_distance(float4 lhs, float4 rhs) {
    956     return fast_length(lhs - rhs);
    957 }
    958 
    959 extern float __attribute__((overloadable)) half_rsqrt(float);
    960 
    961 extern float __attribute__((overloadable)) fast_normalize(float v) {
    962     return 1.f;
    963 }
    964 extern float2 __attribute__((overloadable)) fast_normalize(float2 v) {
    965     return v * half_rsqrt(v.x*v.x + v.y*v.y);
    966 }
    967 extern float3 __attribute__((overloadable)) fast_normalize(float3 v) {
    968     return v * half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z);
    969 }
    970 extern float4 __attribute__((overloadable)) fast_normalize(float4 v) {
    971     return v * half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
    972 }
    973 
    974 extern float __attribute__((overloadable)) half_recip(float);
    975 
    976 /*
    977 extern float __attribute__((overloadable)) approx_atan(float x) {
    978     if (x == 0.f)
    979         return 0.f;
    980     if (x < 0.f)
    981         return -1.f * approx_atan(-1.f * x);
    982     if (x > 1.f)
    983         return M_PI_2 - approx_atan(approx_recip(x));
    984     return x * approx_recip(1.f + 0.28f * x*x);
    985 }
    986 FN_FUNC_FN(approx_atan)
    987 */
    988 
    989 typedef union
    990 {
    991   float fv;
    992   int32_t iv;
    993 } ieee_float_shape_type;
    994 
    995 /* Get a 32 bit int from a float.  */
    996 
    997 #define GET_FLOAT_WORD(i,d)                 \
    998 do {                                \
    999   ieee_float_shape_type gf_u;                   \
   1000   gf_u.fv = (d);                     \
   1001   (i) = gf_u.iv;                      \
   1002 } while (0)
   1003 
   1004 /* Set a float from a 32 bit int.  */
   1005 
   1006 #define SET_FLOAT_WORD(d,i)                 \
   1007 do {                                \
   1008   ieee_float_shape_type sf_u;                   \
   1009   sf_u.iv = (i);                      \
   1010   (d) = sf_u.fv;                     \
   1011 } while (0)
   1012 
   1013 
   1014 
   1015 // Valid -125 to 125
   1016 extern float __attribute__((overloadable)) native_exp2(float v) {
   1017     int32_t iv = (int)v;
   1018     int32_t x = iv + (iv >> 31); // ~floor(v)
   1019     float r = (v - x);
   1020 
   1021     float fo;
   1022     SET_FLOAT_WORD(fo, (x + 127) << 23);
   1023 
   1024     r *= 0.694f; // ~ log(e) / log(2)
   1025     float r2 = r*r;
   1026     float adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
   1027     return fo * adj;
   1028 }
   1029 
   1030 extern float2 __attribute__((overloadable)) native_exp2(float2 v) {
   1031     int2 iv = convert_int2(v);
   1032     int2 x = iv + (iv >> (int2)31);//floor(v);
   1033     float2 r = (v - convert_float2(x));
   1034 
   1035     x += 127;
   1036 
   1037     float2 fo = (float2)(x << (int2)23);
   1038 
   1039     r *= 0.694f; // ~ log(e) / log(2)
   1040     float2 r2 = r*r;
   1041     float2 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
   1042     return fo * adj;
   1043 }
   1044 
   1045 extern float4 __attribute__((overloadable)) native_exp2(float4 v) {
   1046     int4 iv = convert_int4(v);
   1047     int4 x = iv + (iv >> (int4)31);//floor(v);
   1048     float4 r = (v - convert_float4(x));
   1049 
   1050     x += 127;
   1051 
   1052     float4 fo = (float4)(x << (int4)23);
   1053 
   1054     r *= 0.694f; // ~ log(e) / log(2)
   1055     float4 r2 = r*r;
   1056     float4 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
   1057     return fo * adj;
   1058 }
   1059 
   1060 extern float3 __attribute__((overloadable)) native_exp2(float3 v) {
   1061     float4 t = 1.f;
   1062     t.xyz = v;
   1063     return native_exp2(t).xyz;
   1064 }
   1065 
   1066 
   1067 extern float __attribute__((overloadable)) native_exp(float v) {
   1068     return native_exp2(v * 1.442695041f);
   1069 }
   1070 extern float2 __attribute__((overloadable)) native_exp(float2 v) {
   1071     return native_exp2(v * 1.442695041f);
   1072 }
   1073 extern float3 __attribute__((overloadable)) native_exp(float3 v) {
   1074     return native_exp2(v * 1.442695041f);
   1075 }
   1076 extern float4 __attribute__((overloadable)) native_exp(float4 v) {
   1077     return native_exp2(v * 1.442695041f);
   1078 }
   1079 
   1080 extern float __attribute__((overloadable)) native_exp10(float v) {
   1081     return native_exp2(v * 3.321928095f);
   1082 }
   1083 extern float2 __attribute__((overloadable)) native_exp10(float2 v) {
   1084     return native_exp2(v * 3.321928095f);
   1085 }
   1086 extern float3 __attribute__((overloadable)) native_exp10(float3 v) {
   1087     return native_exp2(v * 3.321928095f);
   1088 }
   1089 extern float4 __attribute__((overloadable)) native_exp10(float4 v) {
   1090     return native_exp2(v * 3.321928095f);
   1091 }
   1092 
   1093 extern float __attribute__((overloadable)) native_log2(float v) {
   1094     int32_t ibits;
   1095     GET_FLOAT_WORD(ibits, v);
   1096 
   1097     int32_t e = (ibits >> 23) & 0xff;
   1098 
   1099     ibits &= 0x7fffff;
   1100     ibits |= 127 << 23;
   1101 
   1102     float ir;
   1103     SET_FLOAT_WORD(ir, ibits);
   1104 
   1105     ir -= 1.5f;
   1106     float ir2 = ir*ir;
   1107     float adj2 = 0.405465108f + // -0.00009f +
   1108                  (0.666666667f * ir) -
   1109                  (0.222222222f * ir2) +
   1110                  (0.098765432f * ir*ir2) -
   1111                  (0.049382716f * ir2*ir2) +
   1112                  (0.026337449f * ir*ir2*ir2) -
   1113                  (0.014631916f * ir2*ir2*ir2);
   1114     adj2 *= (1.f / 0.693147181f);
   1115 
   1116     return (float)(e - 127) + adj2;
   1117 }
   1118 extern float2 __attribute__((overloadable)) native_log2(float2 v) {
   1119     float2 v2 = {native_log2(v.x), native_log2(v.y)};
   1120     return v2;
   1121 }
   1122 extern float3 __attribute__((overloadable)) native_log2(float3 v) {
   1123     float3 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z)};
   1124     return v2;
   1125 }
   1126 extern float4 __attribute__((overloadable)) native_log2(float4 v) {
   1127     float4 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z), native_log2(v.w)};
   1128     return v2;
   1129 }
   1130 
   1131 extern float __attribute__((overloadable)) native_log(float v) {
   1132     return native_log2(v) * (1.f / 1.442695041f);
   1133 }
   1134 extern float2 __attribute__((overloadable)) native_log(float2 v) {
   1135     return native_log2(v) * (1.f / 1.442695041f);
   1136 }
   1137 extern float3 __attribute__((overloadable)) native_log(float3 v) {
   1138     return native_log2(v) * (1.f / 1.442695041f);
   1139 }
   1140 extern float4 __attribute__((overloadable)) native_log(float4 v) {
   1141     return native_log2(v) * (1.f / 1.442695041f);
   1142 }
   1143 
   1144 extern float __attribute__((overloadable)) native_log10(float v) {
   1145     return native_log2(v) * (1.f / 3.321928095f);
   1146 }
   1147 extern float2 __attribute__((overloadable)) native_log10(float2 v) {
   1148     return native_log2(v) * (1.f / 3.321928095f);
   1149 }
   1150 extern float3 __attribute__((overloadable)) native_log10(float3 v) {
   1151     return native_log2(v) * (1.f / 3.321928095f);
   1152 }
   1153 extern float4 __attribute__((overloadable)) native_log10(float4 v) {
   1154     return native_log2(v) * (1.f / 3.321928095f);
   1155 }
   1156 
   1157 
   1158 extern float __attribute__((overloadable)) native_powr(float v, float y) {
   1159     float v2 = native_log2(v);
   1160     v2 = fmax(v2, -125.f);
   1161     return native_exp2(v2 * y);
   1162 }
   1163 extern float2 __attribute__((overloadable)) native_powr(float2 v, float2 y) {
   1164     float2 v2 = native_log2(v);
   1165     v2 = fmax(v2, -125.f);
   1166     return native_exp2(v2 * y);
   1167 }
   1168 extern float3 __attribute__((overloadable)) native_powr(float3 v, float3 y) {
   1169     float3 v2 = native_log2(v);
   1170     v2 = fmax(v2, -125.f);
   1171     return native_exp2(v2 * y);
   1172 }
   1173 extern float4 __attribute__((overloadable)) native_powr(float4 v, float4 y) {
   1174     float4 v2 = native_log2(v);
   1175     v2 = fmax(v2, -125.f);
   1176     return native_exp2(v2 * y);
   1177 }
   1178 
   1179 
   1180 #undef FN_FUNC_FN
   1181 #undef IN_FUNC_FN
   1182 #undef FN_FUNC_FN_FN
   1183 #undef FN_FUNC_FN_F
   1184 #undef FN_FUNC_FN_IN
   1185 #undef FN_FUNC_FN_I
   1186 #undef FN_FUNC_FN_PFN
   1187 #undef FN_FUNC_FN_PIN
   1188 #undef FN_FUNC_FN_FN_FN
   1189 #undef FN_FUNC_FN_FN_PIN
   1190 #undef XN_FUNC_YN
   1191 #undef UIN_FUNC_IN
   1192 #undef IN_FUNC_IN
   1193 #undef XN_FUNC_XN_XN_BODY
   1194 #undef IN_FUNC_IN_IN_BODY
   1195