Home | History | Annotate | Download | only in runtime
      1 #include "rs_types.rsh"
      2 
      3 extern float2 __attribute__((overloadable)) convert_float2(int2 c);
      4 extern float3 __attribute__((overloadable)) convert_float3(int3 c);
      5 extern float4 __attribute__((overloadable)) convert_float4(int4 c);
      6 
      7 extern int2 __attribute__((overloadable)) convert_int2(float2 c);
      8 extern int3 __attribute__((overloadable)) convert_int3(float3 c);
      9 extern int4 __attribute__((overloadable)) convert_int4(float4 c);
     10 
     11 
     12 extern float __attribute__((overloadable)) fmin(float v, float v2);
     13 extern float2 __attribute__((overloadable)) fmin(float2 v, float v2);
     14 extern float3 __attribute__((overloadable)) fmin(float3 v, float v2);
     15 extern float4 __attribute__((overloadable)) fmin(float4 v, float v2);
     16 
     17 extern float __attribute__((overloadable)) fmax(float v, float v2);
     18 extern float2 __attribute__((overloadable)) fmax(float2 v, float v2);
     19 extern float3 __attribute__((overloadable)) fmax(float3 v, float v2);
     20 extern float4 __attribute__((overloadable)) fmax(float4 v, float v2);
     21 
     22 // Float ops, 6.11.2
     23 
     24 #define FN_FUNC_FN(fnc)                                         \
     25 extern float2 __attribute__((overloadable)) fnc(float2 v) { \
     26     float2 r;                                                   \
     27     r.x = fnc(v.x);                                             \
     28     r.y = fnc(v.y);                                             \
     29     return r;                                                   \
     30 }                                                               \
     31 extern float3 __attribute__((overloadable)) fnc(float3 v) { \
     32     float3 r;                                                   \
     33     r.x = fnc(v.x);                                             \
     34     r.y = fnc(v.y);                                             \
     35     r.z = fnc(v.z);                                             \
     36     return r;                                                   \
     37 }                                                               \
     38 extern float4 __attribute__((overloadable)) fnc(float4 v) { \
     39     float4 r;                                                   \
     40     r.x = fnc(v.x);                                             \
     41     r.y = fnc(v.y);                                             \
     42     r.z = fnc(v.z);                                             \
     43     r.w = fnc(v.w);                                             \
     44     return r;                                                   \
     45 }
     46 
     47 #define IN_FUNC_FN(fnc)                                         \
     48 extern int2 __attribute__((overloadable)) fnc(float2 v) {   \
     49     int2 r;                                                     \
     50     r.x = fnc(v.x);                                             \
     51     r.y = fnc(v.y);                                             \
     52     return r;                                                   \
     53 }                                                               \
     54 extern int3 __attribute__((overloadable)) fnc(float3 v) {   \
     55     int3 r;                                                     \
     56     r.x = fnc(v.x);                                             \
     57     r.y = fnc(v.y);                                             \
     58     r.z = fnc(v.z);                                             \
     59     return r;                                                   \
     60 }                                                               \
     61 extern int4 __attribute__((overloadable)) fnc(float4 v) {   \
     62     int4 r;                                                     \
     63     r.x = fnc(v.x);                                             \
     64     r.y = fnc(v.y);                                             \
     65     r.z = fnc(v.z);                                             \
     66     r.w = fnc(v.w);                                             \
     67     return r;                                                   \
     68 }
     69 
     70 #define FN_FUNC_FN_FN(fnc)                                                  \
     71 extern float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2) { \
     72     float2 r;                                                               \
     73     r.x = fnc(v1.x, v2.x);                                                  \
     74     r.y = fnc(v1.y, v2.y);                                                  \
     75     return r;                                                               \
     76 }                                                                           \
     77 extern float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2) { \
     78     float3 r;                                                               \
     79     r.x = fnc(v1.x, v2.x);                                                  \
     80     r.y = fnc(v1.y, v2.y);                                                  \
     81     r.z = fnc(v1.z, v2.z);                                                  \
     82     return r;                                                               \
     83 }                                                                           \
     84 extern float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2) { \
     85     float4 r;                                                               \
     86     r.x = fnc(v1.x, v2.x);                                                  \
     87     r.y = fnc(v1.y, v2.y);                                                  \
     88     r.z = fnc(v1.z, v2.z);                                                  \
     89     r.w = fnc(v1.w, v2.w);                                                  \
     90     return r;                                                               \
     91 }
     92 
     93 #define FN_FUNC_FN_F(fnc)                                                   \
     94 extern float2 __attribute__((overloadable)) fnc(float2 v1, float v2) {  \
     95     float2 r;                                                               \
     96     r.x = fnc(v1.x, v2);                                                    \
     97     r.y = fnc(v1.y, v2);                                                    \
     98     return r;                                                               \
     99 }                                                                           \
    100 extern float3 __attribute__((overloadable)) fnc(float3 v1, float v2) {  \
    101     float3 r;                                                               \
    102     r.x = fnc(v1.x, v2);                                                    \
    103     r.y = fnc(v1.y, v2);                                                    \
    104     r.z = fnc(v1.z, v2);                                                    \
    105     return r;                                                               \
    106 }                                                                           \
    107 extern float4 __attribute__((overloadable)) fnc(float4 v1, float v2) {  \
    108     float4 r;                                                               \
    109     r.x = fnc(v1.x, v2);                                                    \
    110     r.y = fnc(v1.y, v2);                                                    \
    111     r.z = fnc(v1.z, v2);                                                    \
    112     r.w = fnc(v1.w, v2);                                                    \
    113     return r;                                                               \
    114 }
    115 
    116 #define FN_FUNC_FN_IN(fnc)                                                  \
    117 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2) {   \
    118     float2 r;                                                               \
    119     r.x = fnc(v1.x, v2.x);                                                  \
    120     r.y = fnc(v1.y, v2.y);                                                  \
    121     return r;                                                               \
    122 }                                                                           \
    123 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2) {   \
    124     float3 r;                                                               \
    125     r.x = fnc(v1.x, v2.x);                                                  \
    126     r.y = fnc(v1.y, v2.y);                                                  \
    127     r.z = fnc(v1.z, v2.z);                                                  \
    128     return r;                                                               \
    129 }                                                                           \
    130 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2) {   \
    131     float4 r;                                                               \
    132     r.x = fnc(v1.x, v2.x);                                                  \
    133     r.y = fnc(v1.y, v2.y);                                                  \
    134     r.z = fnc(v1.z, v2.z);                                                  \
    135     r.w = fnc(v1.w, v2.w);                                                  \
    136     return r;                                                               \
    137 }
    138 
    139 #define FN_FUNC_FN_I(fnc)                                                   \
    140 extern float2 __attribute__((overloadable)) fnc(float2 v1, int v2) {    \
    141     float2 r;                                                               \
    142     r.x = fnc(v1.x, v2);                                                    \
    143     r.y = fnc(v1.y, v2);                                                    \
    144     return r;                                                               \
    145 }                                                                           \
    146 extern float3 __attribute__((overloadable)) fnc(float3 v1, int v2) {    \
    147     float3 r;                                                               \
    148     r.x = fnc(v1.x, v2);                                                    \
    149     r.y = fnc(v1.y, v2);                                                    \
    150     r.z = fnc(v1.z, v2);                                                    \
    151     return r;                                                               \
    152 }                                                                           \
    153 extern float4 __attribute__((overloadable)) fnc(float4 v1, int v2) {    \
    154     float4 r;                                                               \
    155     r.x = fnc(v1.x, v2);                                                    \
    156     r.y = fnc(v1.y, v2);                                                    \
    157     r.z = fnc(v1.z, v2);                                                    \
    158     r.w = fnc(v1.w, v2);                                                    \
    159     return r;                                                               \
    160 }
    161 
    162 #define FN_FUNC_FN_PFN(fnc)                     \
    163 extern float2 __attribute__((overloadable)) \
    164         fnc(float2 v1, float2 *v2) {            \
    165     float2 r;                                   \
    166     float t[2];                                 \
    167     r.x = fnc(v1.x, &t[0]);                     \
    168     r.y = fnc(v1.y, &t[1]);                     \
    169     v2->x = t[0];                               \
    170     v2->y = t[1];                               \
    171     return r;                                   \
    172 }                                               \
    173 extern float3 __attribute__((overloadable)) \
    174         fnc(float3 v1, float3 *v2) {            \
    175     float3 r;                                   \
    176     float t[3];                                 \
    177     r.x = fnc(v1.x, &t[0]);                     \
    178     r.y = fnc(v1.y, &t[1]);                     \
    179     r.z = fnc(v1.z, &t[2]);                     \
    180     v2->x = t[0];                               \
    181     v2->y = t[1];                               \
    182     v2->z = t[2];                               \
    183     return r;                                   \
    184 }                                               \
    185 extern float4 __attribute__((overloadable)) \
    186         fnc(float4 v1, float4 *v2) {            \
    187     float4 r;                                   \
    188     float t[4];                                 \
    189     r.x = fnc(v1.x, &t[0]);                     \
    190     r.y = fnc(v1.y, &t[1]);                     \
    191     r.z = fnc(v1.z, &t[2]);                     \
    192     r.w = fnc(v1.w, &t[3]);                     \
    193     v2->x = t[0];                               \
    194     v2->y = t[1];                               \
    195     v2->z = t[2];                               \
    196     v2->w = t[3];                               \
    197     return r;                                   \
    198 }
    199 
    200 #define FN_FUNC_FN_PIN(fnc)                                                 \
    201 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2) {  \
    202     float2 r;                                                               \
    203     int t[2];                                                               \
    204     r.x = fnc(v1.x, &t[0]);                                                 \
    205     r.y = fnc(v1.y, &t[1]);                                                 \
    206     v2->x = t[0];                                                           \
    207     v2->y = t[1];                                                           \
    208     return r;                                                               \
    209 }                                                                           \
    210 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2) {  \
    211     float3 r;                                                               \
    212     int t[3];                                                               \
    213     r.x = fnc(v1.x, &t[0]);                                                 \
    214     r.y = fnc(v1.y, &t[1]);                                                 \
    215     r.z = fnc(v1.z, &t[2]);                                                 \
    216     v2->x = t[0];                                                           \
    217     v2->y = t[1];                                                           \
    218     v2->z = t[2];                                                           \
    219     return r;                                                               \
    220 }                                                                           \
    221 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 *v2) {  \
    222     float4 r;                                                               \
    223     int t[4];                                                               \
    224     r.x = fnc(v1.x, &t[0]);                                                 \
    225     r.y = fnc(v1.y, &t[1]);                                                 \
    226     r.z = fnc(v1.z, &t[2]);                                                 \
    227     r.w = fnc(v1.w, &t[3]);                                                 \
    228     v2->x = t[0];                                                           \
    229     v2->y = t[1];                                                           \
    230     v2->z = t[2];                                                           \
    231     v2->w = t[3];                                                           \
    232     return r;                                                               \
    233 }
    234 
    235 #define FN_FUNC_FN_FN_FN(fnc)                   \
    236 extern float2 __attribute__((overloadable)) \
    237         fnc(float2 v1, float2 v2, float2 v3) {  \
    238     float2 r;                                   \
    239     r.x = fnc(v1.x, v2.x, v3.x);                \
    240     r.y = fnc(v1.y, v2.y, v3.y);                \
    241     return r;                                   \
    242 }                                               \
    243 extern float3 __attribute__((overloadable)) \
    244         fnc(float3 v1, float3 v2, float3 v3) {  \
    245     float3 r;                                   \
    246     r.x = fnc(v1.x, v2.x, v3.x);                \
    247     r.y = fnc(v1.y, v2.y, v3.y);                \
    248     r.z = fnc(v1.z, v2.z, v3.z);                \
    249     return r;                                   \
    250 }                                               \
    251 extern float4 __attribute__((overloadable)) \
    252         fnc(float4 v1, float4 v2, float4 v3) {  \
    253     float4 r;                                   \
    254     r.x = fnc(v1.x, v2.x, v3.x);                \
    255     r.y = fnc(v1.y, v2.y, v3.y);                \
    256     r.z = fnc(v1.z, v2.z, v3.z);                \
    257     r.w = fnc(v1.w, v2.w, v3.w);                \
    258     return r;                                   \
    259 }
    260 
    261 #define FN_FUNC_FN_FN_PIN(fnc)                  \
    262 extern float2 __attribute__((overloadable)) \
    263         fnc(float2 v1, float2 v2, int2 *v3) {   \
    264     float2 r;                                   \
    265     int t[2];                                   \
    266     r.x = fnc(v1.x, v2.x, &t[0]);               \
    267     r.y = fnc(v1.y, v2.y, &t[1]);               \
    268     v3->x = t[0];                               \
    269     v3->y = t[1];                               \
    270     return r;                                   \
    271 }                                               \
    272 extern float3 __attribute__((overloadable)) \
    273         fnc(float3 v1, float3 v2, int3 *v3) {   \
    274     float3 r;                                   \
    275     int t[3];                                   \
    276     r.x = fnc(v1.x, v2.x, &t[0]);               \
    277     r.y = fnc(v1.y, v2.y, &t[1]);               \
    278     r.z = fnc(v1.z, v2.z, &t[2]);               \
    279     v3->x = t[0];                               \
    280     v3->y = t[1];                               \
    281     v3->z = t[2];                               \
    282     return r;                                   \
    283 }                                               \
    284 extern float4 __attribute__((overloadable)) \
    285         fnc(float4 v1, float4 v2, int4 *v3) {   \
    286     float4 r;                                   \
    287     int t[4];                                   \
    288     r.x = fnc(v1.x, v2.x, &t[0]);               \
    289     r.y = fnc(v1.y, v2.y, &t[1]);               \
    290     r.z = fnc(v1.z, v2.z, &t[2]);               \
    291     r.w = fnc(v1.w, v2.w, &t[3]);               \
    292     v3->x = t[0];                               \
    293     v3->y = t[1];                               \
    294     v3->z = t[2];                               \
    295     v3->w = t[3];                               \
    296     return r;                                   \
    297 }
    298 
    299 static const int iposinf = 0x7f800000;
    300 static const int ineginf = 0xff800000;
    301 
    302 static const float posinf() {
    303     float f = *((float*)&iposinf);
    304     return f;
    305 }
    306 
    307 static const float neginf() {
    308     float f = *((float*)&ineginf);
    309     return f;
    310 }
    311 
    312 static bool isinf(float f) {
    313     int i = *((int*)(void*)&f);
    314     return (i == iposinf) || (i == ineginf);
    315 }
    316 
    317 static bool isnan(float f) {
    318     int i = *((int*)(void*)&f);
    319     return (((i & 0x7f800000) == 0x7f800000) && (i & 0x007fffff));
    320 }
    321 
    322 static bool isposzero(float f) {
    323     int i = *((int*)(void*)&f);
    324     return (i == 0x00000000);
    325 }
    326 
    327 static bool isnegzero(float f) {
    328     int i = *((int*)(void*)&f);
    329     return (i == 0x80000000);
    330 }
    331 
    332 static bool iszero(float f) {
    333     return isposzero(f) || isnegzero(f);
    334 }
    335 
    336 
    337 extern float __attribute__((overloadable)) acos(float);
    338 FN_FUNC_FN(acos)
    339 
    340 extern float __attribute__((overloadable)) acosh(float);
    341 FN_FUNC_FN(acosh)
    342 
    343 
    344 extern float __attribute__((overloadable)) acospi(float v) {
    345     return acos(v) / M_PI;
    346 }
    347 FN_FUNC_FN(acospi)
    348 
    349 extern float __attribute__((overloadable)) asin(float);
    350 FN_FUNC_FN(asin)
    351 
    352 extern float __attribute__((overloadable)) asinh(float);
    353 FN_FUNC_FN(asinh)
    354 
    355 extern float __attribute__((overloadable)) asinpi(float v) {
    356     return asin(v) / M_PI;
    357 }
    358 FN_FUNC_FN(asinpi)
    359 
    360 extern float __attribute__((overloadable)) atan(float);
    361 FN_FUNC_FN(atan)
    362 
    363 extern float __attribute__((overloadable)) atan2(float, float);
    364 FN_FUNC_FN_FN(atan2)
    365 
    366 extern float __attribute__((overloadable)) atanh(float);
    367 FN_FUNC_FN(atanh)
    368 
    369 extern float __attribute__((overloadable)) atanpi(float v) {
    370     return atan(v) / M_PI;
    371 }
    372 FN_FUNC_FN(atanpi)
    373 
    374 
    375 extern float __attribute__((overloadable)) atan2pi(float y, float x) {
    376     return atan2(y, x) / M_PI;
    377 }
    378 FN_FUNC_FN_FN(atan2pi)
    379 
    380 extern float __attribute__((overloadable)) cbrt(float);
    381 FN_FUNC_FN(cbrt)
    382 
    383 extern float __attribute__((overloadable)) ceil(float);
    384 FN_FUNC_FN(ceil)
    385 
    386 extern float __attribute__((overloadable)) copysign(float, float);
    387 FN_FUNC_FN_FN(copysign)
    388 
    389 extern float __attribute__((overloadable)) cos(float);
    390 FN_FUNC_FN(cos)
    391 
    392 extern float __attribute__((overloadable)) cosh(float);
    393 FN_FUNC_FN(cosh)
    394 
    395 extern float __attribute__((overloadable)) cospi(float v) {
    396     return cos(v * M_PI);
    397 }
    398 FN_FUNC_FN(cospi)
    399 
    400 extern float __attribute__((overloadable)) erfc(float);
    401 FN_FUNC_FN(erfc)
    402 
    403 extern float __attribute__((overloadable)) erf(float);
    404 FN_FUNC_FN(erf)
    405 
    406 extern float __attribute__((overloadable)) exp(float);
    407 FN_FUNC_FN(exp)
    408 
    409 extern float __attribute__((overloadable)) exp2(float);
    410 FN_FUNC_FN(exp2)
    411 
    412 extern float __attribute__((overloadable)) pow(float, float);
    413 
    414 extern float __attribute__((overloadable)) exp10(float v) {
    415     return exp2(v * 3.321928095f);
    416 }
    417 FN_FUNC_FN(exp10)
    418 
    419 extern float __attribute__((overloadable)) expm1(float);
    420 FN_FUNC_FN(expm1)
    421 
    422 extern float __attribute__((overloadable)) fabs(float v) {
    423     int i = *((int*)(void*)&v) & 0x7fffffff;
    424     return  *((float*)(void*)&i);
    425 }
    426 FN_FUNC_FN(fabs)
    427 
    428 extern float __attribute__((overloadable)) fdim(float, float);
    429 FN_FUNC_FN_FN(fdim)
    430 
    431 extern float __attribute__((overloadable)) floor(float);
    432 FN_FUNC_FN(floor)
    433 
    434 extern float __attribute__((overloadable)) fma(float, float, float);
    435 FN_FUNC_FN_FN_FN(fma)
    436 
    437 extern float __attribute__((overloadable)) fmin(float, float);
    438 
    439 extern float __attribute__((overloadable)) fmod(float, float);
    440 FN_FUNC_FN_FN(fmod)
    441 
    442 extern float __attribute__((overloadable)) fract(float v, float *iptr) {
    443     int i = (int)floor(v);
    444     if (iptr) {
    445         iptr[0] = i;
    446     }
    447     return fmin(v - i, 0x1.fffffep-1f);
    448 }
    449 FN_FUNC_FN_PFN(fract)
    450 
    451 extern float __attribute__((overloadable)) frexp(float, int *);
    452 FN_FUNC_FN_PIN(frexp)
    453 
    454 extern float __attribute__((overloadable)) hypot(float, float);
    455 FN_FUNC_FN_FN(hypot)
    456 
    457 extern int __attribute__((overloadable)) ilogb(float);
    458 IN_FUNC_FN(ilogb)
    459 
    460 extern float __attribute__((overloadable)) ldexp(float, int);
    461 FN_FUNC_FN_IN(ldexp)
    462 FN_FUNC_FN_I(ldexp)
    463 
    464 extern float __attribute__((overloadable)) lgamma(float);
    465 FN_FUNC_FN(lgamma)
    466 extern float __attribute__((overloadable)) lgamma(float, int*);
    467 FN_FUNC_FN_PIN(lgamma)
    468 
    469 extern float __attribute__((overloadable)) log(float);
    470 FN_FUNC_FN(log)
    471 
    472 extern float __attribute__((overloadable)) log10(float);
    473 FN_FUNC_FN(log10)
    474 
    475 
    476 extern float __attribute__((overloadable)) log2(float v) {
    477     return log10(v) * 3.321928095f;
    478 }
    479 FN_FUNC_FN(log2)
    480 
    481 extern float __attribute__((overloadable)) log1p(float);
    482 FN_FUNC_FN(log1p)
    483 
    484 extern float __attribute__((overloadable)) logb(float);
    485 FN_FUNC_FN(logb)
    486 
    487 extern float __attribute__((overloadable)) mad(float a, float b, float c) {
    488     return a * b + c;
    489 }
    490 extern float2 __attribute__((overloadable)) mad(float2 a, float2 b, float2 c) {
    491     return a * b + c;
    492 }
    493 extern float3 __attribute__((overloadable)) mad(float3 a, float3 b, float3 c) {
    494     return a * b + c;
    495 }
    496 extern float4 __attribute__((overloadable)) mad(float4 a, float4 b, float4 c) {
    497     return a * b + c;
    498 }
    499 
    500 extern float __attribute__((overloadable)) modf(float, float *);
    501 FN_FUNC_FN_PFN(modf);
    502 
    503 extern float __attribute__((overloadable)) nan(uint v) {
    504     float f[1];
    505     uint32_t *ip = (uint32_t *)f;
    506     *ip = v | 0x7fc00000;
    507     return f[0];
    508 }
    509 
    510 extern float __attribute__((overloadable)) nextafter(float, float);
    511 FN_FUNC_FN_FN(nextafter)
    512 
    513 FN_FUNC_FN_FN(pow)
    514 
    515 extern float __attribute__((overloadable)) pown(float v, int p) {
    516     return pow(v, (float)p);
    517 }
    518 extern float2 __attribute__((overloadable)) pown(float2 v, int2 p) {
    519     float2 f2 = convert_float2(p);
    520     return pow(v, f2);
    521 }
    522 extern float3 __attribute__((overloadable)) pown(float3 v, int3 p) {
    523     float3 f3 = convert_float3(p);
    524     return pow(v, f3);
    525 }
    526 extern float4 __attribute__((overloadable)) pown(float4 v, int4 p) {
    527     float4 f4 = convert_float4(p);
    528     return pow(v, f4);
    529 }
    530 
    531 extern float __attribute__((overloadable)) powr(float v, float p) {
    532     return pow(v, p);
    533 }
    534 extern float2 __attribute__((overloadable)) powr(float2 v, float2 p) {
    535     return pow(v, p);
    536 }
    537 extern float3 __attribute__((overloadable)) powr(float3 v, float3 p) {
    538     return pow(v, p);
    539 }
    540 extern float4 __attribute__((overloadable)) powr(float4 v, float4 p) {
    541     return pow(v, p);
    542 }
    543 
    544 extern float __attribute__((overloadable)) remainder(float, float);
    545 FN_FUNC_FN_FN(remainder)
    546 
    547 extern float __attribute__((overloadable)) remquo(float, float, int *);
    548 FN_FUNC_FN_FN_PIN(remquo)
    549 
    550 extern float __attribute__((overloadable)) rint(float);
    551 FN_FUNC_FN(rint)
    552 
    553 extern float __attribute__((overloadable)) rootn(float v, int r) {
    554     if (r == 0) {
    555         return nan(0);
    556     }
    557 
    558     if (iszero(v)) {
    559         if (r < 0) {
    560             if (r & 1) {
    561                 return copysign(posinf(), v);
    562             } else {
    563                 return posinf();
    564             }
    565         } else {
    566             if (r & 1) {
    567                 return copysign(0.f, v);
    568             } else {
    569                 return 0.f;
    570             }
    571         }
    572     }
    573 
    574     if (!isinf(v) && !isnan(v) && (v < 0.f)) {
    575         if (r & 1) {
    576             return (-1.f * pow(-1.f * v, 1.f / r));
    577         } else {
    578             return nan(0);
    579         }
    580     }
    581 
    582     return pow(v, 1.f / r);
    583 }
    584 FN_FUNC_FN_IN(rootn);
    585 
    586 extern float __attribute__((overloadable)) round(float);
    587 FN_FUNC_FN(round)
    588 
    589 
    590 extern float __attribute__((overloadable)) sqrt(float);
    591 extern float __attribute__((overloadable)) rsqrt(float v) {
    592     return 1.f / sqrt(v);
    593 }
    594 
    595 #if !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
    596 FN_FUNC_FN(sqrt)
    597 #endif // !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
    598 
    599 FN_FUNC_FN(rsqrt)
    600 
    601 extern float __attribute__((overloadable)) sin(float);
    602 FN_FUNC_FN(sin)
    603 
    604 extern float __attribute__((overloadable)) sincos(float v, float *cosptr) {
    605     *cosptr = cos(v);
    606     return sin(v);
    607 }
    608 extern float2 __attribute__((overloadable)) sincos(float2 v, float2 *cosptr) {
    609     *cosptr = cos(v);
    610     return sin(v);
    611 }
    612 extern float3 __attribute__((overloadable)) sincos(float3 v, float3 *cosptr) {
    613     *cosptr = cos(v);
    614     return sin(v);
    615 }
    616 extern float4 __attribute__((overloadable)) sincos(float4 v, float4 *cosptr) {
    617     *cosptr = cos(v);
    618     return sin(v);
    619 }
    620 
    621 extern float __attribute__((overloadable)) sinh(float);
    622 FN_FUNC_FN(sinh)
    623 
    624 extern float __attribute__((overloadable)) sinpi(float v) {
    625     return sin(v * M_PI);
    626 }
    627 FN_FUNC_FN(sinpi)
    628 
    629 extern float __attribute__((overloadable)) tan(float);
    630 FN_FUNC_FN(tan)
    631 
    632 extern float __attribute__((overloadable)) tanh(float);
    633 FN_FUNC_FN(tanh)
    634 
    635 extern float __attribute__((overloadable)) tanpi(float v) {
    636     return tan(v * M_PI);
    637 }
    638 FN_FUNC_FN(tanpi)
    639 
    640 
    641 extern float __attribute__((overloadable)) tgamma(float);
    642 FN_FUNC_FN(tgamma)
    643 
    644 extern float __attribute__((overloadable)) trunc(float);
    645 FN_FUNC_FN(trunc)
    646 
    647 // Int ops (partial), 6.11.3
    648 
    649 #define XN_FUNC_YN(typeout, fnc, typein)                                \
    650 extern typeout __attribute__((overloadable)) fnc(typein);               \
    651 extern typeout##2 __attribute__((overloadable)) fnc(typein##2 v) {  \
    652     typeout##2 r;                                                       \
    653     r.x = fnc(v.x);                                                     \
    654     r.y = fnc(v.y);                                                     \
    655     return r;                                                           \
    656 }                                                                       \
    657 extern typeout##3 __attribute__((overloadable)) fnc(typein##3 v) {  \
    658     typeout##3 r;                                                       \
    659     r.x = fnc(v.x);                                                     \
    660     r.y = fnc(v.y);                                                     \
    661     r.z = fnc(v.z);                                                     \
    662     return r;                                                           \
    663 }                                                                       \
    664 extern typeout##4 __attribute__((overloadable)) fnc(typein##4 v) {  \
    665     typeout##4 r;                                                       \
    666     r.x = fnc(v.x);                                                     \
    667     r.y = fnc(v.y);                                                     \
    668     r.z = fnc(v.z);                                                     \
    669     r.w = fnc(v.w);                                                     \
    670     return r;                                                           \
    671 }
    672 
    673 
    674 #define UIN_FUNC_IN(fnc)          \
    675 XN_FUNC_YN(uchar, fnc, char)      \
    676 XN_FUNC_YN(ushort, fnc, short)    \
    677 XN_FUNC_YN(uint, fnc, int)
    678 
    679 #define IN_FUNC_IN(fnc)           \
    680 XN_FUNC_YN(uchar, fnc, uchar)     \
    681 XN_FUNC_YN(char, fnc, char)       \
    682 XN_FUNC_YN(ushort, fnc, ushort)   \
    683 XN_FUNC_YN(short, fnc, short)     \
    684 XN_FUNC_YN(uint, fnc, uint)       \
    685 XN_FUNC_YN(int, fnc, int)
    686 
    687 
    688 #define XN_FUNC_XN_XN_BODY(type, fnc, body)         \
    689 extern type __attribute__((overloadable))       \
    690         fnc(type v1, type v2) {                     \
    691     return body;                                    \
    692 }                                                   \
    693 extern type##2 __attribute__((overloadable))    \
    694         fnc(type##2 v1, type##2 v2) {               \
    695     type##2 r;                                      \
    696     r.x = fnc(v1.x, v2.x);                          \
    697     r.y = fnc(v1.y, v2.y);                          \
    698     return r;                                       \
    699 }                                                   \
    700 extern type##3 __attribute__((overloadable))    \
    701         fnc(type##3 v1, type##3 v2) {               \
    702     type##3 r;                                      \
    703     r.x = fnc(v1.x, v2.x);                          \
    704     r.y = fnc(v1.y, v2.y);                          \
    705     r.z = fnc(v1.z, v2.z);                          \
    706     return r;                                       \
    707 }                                                   \
    708 extern type##4 __attribute__((overloadable))    \
    709         fnc(type##4 v1, type##4 v2) {               \
    710     type##4 r;                                      \
    711     r.x = fnc(v1.x, v2.x);                          \
    712     r.y = fnc(v1.y, v2.y);                          \
    713     r.z = fnc(v1.z, v2.z);                          \
    714     r.w = fnc(v1.w, v2.w);                          \
    715     return r;                                       \
    716 }
    717 
    718 #define IN_FUNC_IN_IN_BODY(fnc, body) \
    719 XN_FUNC_XN_XN_BODY(uchar, fnc, body)  \
    720 XN_FUNC_XN_XN_BODY(char, fnc, body)   \
    721 XN_FUNC_XN_XN_BODY(ushort, fnc, body) \
    722 XN_FUNC_XN_XN_BODY(short, fnc, body)  \
    723 XN_FUNC_XN_XN_BODY(uint, fnc, body)   \
    724 XN_FUNC_XN_XN_BODY(int, fnc, body)    \
    725 XN_FUNC_XN_XN_BODY(float, fnc, body)
    726 
    727 
    728 /**
    729  * abs
    730  */
    731 extern uint32_t __attribute__((overloadable)) abs(int32_t v) {
    732     if (v < 0)
    733         return -v;
    734     return v;
    735 }
    736 extern uint16_t __attribute__((overloadable)) abs(int16_t v) {
    737     if (v < 0)
    738         return -v;
    739     return v;
    740 }
    741 extern uint8_t __attribute__((overloadable)) abs(int8_t v) {
    742     if (v < 0)
    743         return -v;
    744     return v;
    745 }
    746 
    747 /**
    748  * clz
    749  */
    750 extern uint32_t __attribute__((overloadable)) clz(uint32_t v) {
    751     return __builtin_clz(v);
    752 }
    753 extern uint16_t __attribute__((overloadable)) clz(uint16_t v) {
    754     return (uint16_t)__builtin_clz(v);
    755 }
    756 extern uint8_t __attribute__((overloadable)) clz(uint8_t v) {
    757     return (uint8_t)__builtin_clz(v);
    758 }
    759 extern int32_t __attribute__((overloadable)) clz(int32_t v) {
    760     return (int32_t)__builtin_clz((uint32_t)v);
    761 }
    762 extern int16_t __attribute__((overloadable)) clz(int16_t v) {
    763     return (int16_t)__builtin_clz(v);
    764 }
    765 extern int8_t __attribute__((overloadable)) clz(int8_t v) {
    766     return (int8_t)__builtin_clz(v);
    767 }
    768 
    769 
    770 UIN_FUNC_IN(abs)
    771 IN_FUNC_IN(clz)
    772 
    773 
    774 // 6.11.4
    775 
    776 
    777 extern float __attribute__((overloadable)) degrees(float radians) {
    778     return radians * (180.f / M_PI);
    779 }
    780 extern float2 __attribute__((overloadable)) degrees(float2 radians) {
    781     return radians * (180.f / M_PI);
    782 }
    783 extern float3 __attribute__((overloadable)) degrees(float3 radians) {
    784     return radians * (180.f / M_PI);
    785 }
    786 extern float4 __attribute__((overloadable)) degrees(float4 radians) {
    787     return radians * (180.f / M_PI);
    788 }
    789 
    790 extern float __attribute__((overloadable)) mix(float start, float stop, float amount) {
    791     return start + (stop - start) * amount;
    792 }
    793 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount) {
    794     return start + (stop - start) * amount;
    795 }
    796 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount) {
    797     return start + (stop - start) * amount;
    798 }
    799 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount) {
    800     return start + (stop - start) * amount;
    801 }
    802 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount) {
    803     return start + (stop - start) * amount;
    804 }
    805 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount) {
    806     return start + (stop - start) * amount;
    807 }
    808 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount) {
    809     return start + (stop - start) * amount;
    810 }
    811 
    812 extern float __attribute__((overloadable)) radians(float degrees) {
    813     return degrees * (M_PI / 180.f);
    814 }
    815 extern float2 __attribute__((overloadable)) radians(float2 degrees) {
    816     return degrees * (M_PI / 180.f);
    817 }
    818 extern float3 __attribute__((overloadable)) radians(float3 degrees) {
    819     return degrees * (M_PI / 180.f);
    820 }
    821 extern float4 __attribute__((overloadable)) radians(float4 degrees) {
    822     return degrees * (M_PI / 180.f);
    823 }
    824 
    825 extern float __attribute__((overloadable)) step(float edge, float v) {
    826     return (v < edge) ? 0.f : 1.f;
    827 }
    828 extern float2 __attribute__((overloadable)) step(float2 edge, float2 v) {
    829     float2 r;
    830     r.x = (v.x < edge.x) ? 0.f : 1.f;
    831     r.y = (v.y < edge.y) ? 0.f : 1.f;
    832     return r;
    833 }
    834 extern float3 __attribute__((overloadable)) step(float3 edge, float3 v) {
    835     float3 r;
    836     r.x = (v.x < edge.x) ? 0.f : 1.f;
    837     r.y = (v.y < edge.y) ? 0.f : 1.f;
    838     r.z = (v.z < edge.z) ? 0.f : 1.f;
    839     return r;
    840 }
    841 extern float4 __attribute__((overloadable)) step(float4 edge, float4 v) {
    842     float4 r;
    843     r.x = (v.x < edge.x) ? 0.f : 1.f;
    844     r.y = (v.y < edge.y) ? 0.f : 1.f;
    845     r.z = (v.z < edge.z) ? 0.f : 1.f;
    846     r.w = (v.w < edge.w) ? 0.f : 1.f;
    847     return r;
    848 }
    849 extern float2 __attribute__((overloadable)) step(float2 edge, float v) {
    850     float2 r;
    851     r.x = (v < edge.x) ? 0.f : 1.f;
    852     r.y = (v < edge.y) ? 0.f : 1.f;
    853     return r;
    854 }
    855 extern float3 __attribute__((overloadable)) step(float3 edge, float v) {
    856     float3 r;
    857     r.x = (v < edge.x) ? 0.f : 1.f;
    858     r.y = (v < edge.y) ? 0.f : 1.f;
    859     r.z = (v < edge.z) ? 0.f : 1.f;
    860     return r;
    861 }
    862 extern float4 __attribute__((overloadable)) step(float4 edge, float v) {
    863     float4 r;
    864     r.x = (v < edge.x) ? 0.f : 1.f;
    865     r.y = (v < edge.y) ? 0.f : 1.f;
    866     r.z = (v < edge.z) ? 0.f : 1.f;
    867     r.w = (v < edge.w) ? 0.f : 1.f;
    868     return r;
    869 }
    870 
    871 extern float __attribute__((overloadable)) smoothstep(float, float, float);
    872 extern float2 __attribute__((overloadable)) smoothstep(float2, float2, float2);
    873 extern float3 __attribute__((overloadable)) smoothstep(float3, float3, float3);
    874 extern float4 __attribute__((overloadable)) smoothstep(float4, float4, float4);
    875 extern float2 __attribute__((overloadable)) smoothstep(float, float, float2);
    876 extern float3 __attribute__((overloadable)) smoothstep(float, float, float3);
    877 extern float4 __attribute__((overloadable)) smoothstep(float, float, float4);
    878 
    879 extern float __attribute__((overloadable)) sign(float v) {
    880     if (v > 0) return 1.f;
    881     if (v < 0) return -1.f;
    882     return v;
    883 }
    884 FN_FUNC_FN(sign)
    885 
    886 
    887 // 6.11.5
    888 extern float3 __attribute__((overloadable)) cross(float3 lhs, float3 rhs) {
    889     float3 r;
    890     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
    891     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
    892     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
    893     return r;
    894 }
    895 
    896 extern float4 __attribute__((overloadable)) cross(float4 lhs, float4 rhs) {
    897     float4 r;
    898     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
    899     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
    900     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
    901     r.w = 0.f;
    902     return r;
    903 }
    904 
    905 #if !defined(ARCH_X86_HAVE_SSE3)
    906 
    907 extern float __attribute__((overloadable)) dot(float lhs, float rhs) {
    908     return lhs * rhs;
    909 }
    910 extern float __attribute__((overloadable)) dot(float2 lhs, float2 rhs) {
    911     return lhs.x*rhs.x + lhs.y*rhs.y;
    912 }
    913 extern float __attribute__((overloadable)) dot(float3 lhs, float3 rhs) {
    914     return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z;
    915 }
    916 extern float __attribute__((overloadable)) dot(float4 lhs, float4 rhs) {
    917     return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z + lhs.w*rhs.w;
    918 }
    919 
    920 extern float __attribute__((overloadable)) length(float v) {
    921     return fabs(v);
    922 }
    923 extern float __attribute__((overloadable)) length(float2 v) {
    924     return sqrt(v.x*v.x + v.y*v.y);
    925 }
    926 extern float __attribute__((overloadable)) length(float3 v) {
    927     return sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
    928 }
    929 extern float __attribute__((overloadable)) length(float4 v) {
    930     return sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
    931 }
    932 
    933 #else
    934 
    935 extern float __attribute__((overloadable)) length(float v);
    936 extern float __attribute__((overloadable)) length(float2 v);
    937 extern float __attribute__((overloadable)) length(float3 v);
    938 extern float __attribute__((overloadable)) length(float4 v);
    939 
    940 #endif
    941 
    942 extern float __attribute__((overloadable)) distance(float lhs, float rhs) {
    943     return length(lhs - rhs);
    944 }
    945 extern float __attribute__((overloadable)) distance(float2 lhs, float2 rhs) {
    946     return length(lhs - rhs);
    947 }
    948 extern float __attribute__((overloadable)) distance(float3 lhs, float3 rhs) {
    949     return length(lhs - rhs);
    950 }
    951 extern float __attribute__((overloadable)) distance(float4 lhs, float4 rhs) {
    952     return length(lhs - rhs);
    953 }
    954 
    955 extern float __attribute__((overloadable)) normalize(float v) {
    956     return 1.f;
    957 }
    958 extern float2 __attribute__((overloadable)) normalize(float2 v) {
    959     return v / length(v);
    960 }
    961 extern float3 __attribute__((overloadable)) normalize(float3 v) {
    962     return v / length(v);
    963 }
    964 extern float4 __attribute__((overloadable)) normalize(float4 v) {
    965     return v / length(v);
    966 }
    967 
    968 extern float __attribute__((overloadable)) half_sqrt(float);
    969 
    970 extern float __attribute__((overloadable)) fast_length(float v) {
    971     return fabs(v);
    972 }
    973 extern float __attribute__((overloadable)) fast_length(float2 v) {
    974     return half_sqrt(v.x*v.x + v.y*v.y);
    975 }
    976 extern float __attribute__((overloadable)) fast_length(float3 v) {
    977     return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
    978 }
    979 extern float __attribute__((overloadable)) fast_length(float4 v) {
    980     return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
    981 }
    982 
    983 extern float __attribute__((overloadable)) fast_distance(float lhs, float rhs) {
    984     return fast_length(lhs - rhs);
    985 }
    986 extern float __attribute__((overloadable)) fast_distance(float2 lhs, float2 rhs) {
    987     return fast_length(lhs - rhs);
    988 }
    989 extern float __attribute__((overloadable)) fast_distance(float3 lhs, float3 rhs) {
    990     return fast_length(lhs - rhs);
    991 }
    992 extern float __attribute__((overloadable)) fast_distance(float4 lhs, float4 rhs) {
    993     return fast_length(lhs - rhs);
    994 }
    995 
    996 extern float __attribute__((overloadable)) half_rsqrt(float);
    997 
    998 extern float __attribute__((overloadable)) fast_normalize(float v) {
    999     return 1.f;
   1000 }
   1001 extern float2 __attribute__((overloadable)) fast_normalize(float2 v) {
   1002     return v * half_rsqrt(v.x*v.x + v.y*v.y);
   1003 }
   1004 extern float3 __attribute__((overloadable)) fast_normalize(float3 v) {
   1005     return v * half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z);
   1006 }
   1007 extern float4 __attribute__((overloadable)) fast_normalize(float4 v) {
   1008     return v * half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
   1009 }
   1010 
   1011 extern float __attribute__((overloadable)) half_recip(float);
   1012 
   1013 /*
   1014 extern float __attribute__((overloadable)) approx_atan(float x) {
   1015     if (x == 0.f)
   1016         return 0.f;
   1017     if (x < 0.f)
   1018         return -1.f * approx_atan(-1.f * x);
   1019     if (x > 1.f)
   1020         return M_PI_2 - approx_atan(approx_recip(x));
   1021     return x * approx_recip(1.f + 0.28f * x*x);
   1022 }
   1023 FN_FUNC_FN(approx_atan)
   1024 */
   1025 
   1026 typedef union
   1027 {
   1028   float fv;
   1029   int32_t iv;
   1030 } ieee_float_shape_type;
   1031 
   1032 /* Get a 32 bit int from a float.  */
   1033 
   1034 #define GET_FLOAT_WORD(i,d)                 \
   1035 do {                                \
   1036   ieee_float_shape_type gf_u;                   \
   1037   gf_u.fv = (d);                     \
   1038   (i) = gf_u.iv;                      \
   1039 } while (0)
   1040 
   1041 /* Set a float from a 32 bit int.  */
   1042 
   1043 #define SET_FLOAT_WORD(d,i)                 \
   1044 do {                                \
   1045   ieee_float_shape_type sf_u;                   \
   1046   sf_u.iv = (i);                      \
   1047   (d) = sf_u.fv;                     \
   1048 } while (0)
   1049 
   1050 
   1051 
   1052 // Valid -125 to 125
   1053 extern float __attribute__((overloadable)) native_exp2(float v) {
   1054     int32_t iv = (int)v;
   1055     int32_t x = iv + (iv >> 31); // ~floor(v)
   1056     float r = (v - x);
   1057 
   1058     float fo;
   1059     SET_FLOAT_WORD(fo, (x + 127) << 23);
   1060 
   1061     r *= 0.694f; // ~ log(e) / log(2)
   1062     float r2 = r*r;
   1063     float adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
   1064     return fo * adj;
   1065 }
   1066 
   1067 extern float2 __attribute__((overloadable)) native_exp2(float2 v) {
   1068     int2 iv = convert_int2(v);
   1069     int2 x = iv + (iv >> (int2)31);//floor(v);
   1070     float2 r = (v - convert_float2(x));
   1071 
   1072     x += 127;
   1073 
   1074     float2 fo = (float2)(x << (int2)23);
   1075 
   1076     r *= 0.694f; // ~ log(e) / log(2)
   1077     float2 r2 = r*r;
   1078     float2 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
   1079     return fo * adj;
   1080 }
   1081 
   1082 extern float4 __attribute__((overloadable)) native_exp2(float4 v) {
   1083     int4 iv = convert_int4(v);
   1084     int4 x = iv + (iv >> (int4)31);//floor(v);
   1085     float4 r = (v - convert_float4(x));
   1086 
   1087     x += 127;
   1088 
   1089     float4 fo = (float4)(x << (int4)23);
   1090 
   1091     r *= 0.694f; // ~ log(e) / log(2)
   1092     float4 r2 = r*r;
   1093     float4 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
   1094     return fo * adj;
   1095 }
   1096 
   1097 extern float3 __attribute__((overloadable)) native_exp2(float3 v) {
   1098     float4 t = 1.f;
   1099     t.xyz = v;
   1100     return native_exp2(t).xyz;
   1101 }
   1102 
   1103 
   1104 extern float __attribute__((overloadable)) native_exp(float v) {
   1105     return native_exp2(v * 1.442695041f);
   1106 }
   1107 extern float2 __attribute__((overloadable)) native_exp(float2 v) {
   1108     return native_exp2(v * 1.442695041f);
   1109 }
   1110 extern float3 __attribute__((overloadable)) native_exp(float3 v) {
   1111     return native_exp2(v * 1.442695041f);
   1112 }
   1113 extern float4 __attribute__((overloadable)) native_exp(float4 v) {
   1114     return native_exp2(v * 1.442695041f);
   1115 }
   1116 
   1117 extern float __attribute__((overloadable)) native_exp10(float v) {
   1118     return native_exp2(v * 3.321928095f);
   1119 }
   1120 extern float2 __attribute__((overloadable)) native_exp10(float2 v) {
   1121     return native_exp2(v * 3.321928095f);
   1122 }
   1123 extern float3 __attribute__((overloadable)) native_exp10(float3 v) {
   1124     return native_exp2(v * 3.321928095f);
   1125 }
   1126 extern float4 __attribute__((overloadable)) native_exp10(float4 v) {
   1127     return native_exp2(v * 3.321928095f);
   1128 }
   1129 
   1130 extern float __attribute__((overloadable)) native_log2(float v) {
   1131     int32_t ibits;
   1132     GET_FLOAT_WORD(ibits, v);
   1133 
   1134     int32_t e = (ibits >> 23) & 0xff;
   1135 
   1136     ibits &= 0x7fffff;
   1137     ibits |= 127 << 23;
   1138 
   1139     float ir;
   1140     SET_FLOAT_WORD(ir, ibits);
   1141 
   1142     ir -= 1.5f;
   1143     float ir2 = ir*ir;
   1144     float adj2 = 0.405465108f + // -0.00009f +
   1145                  (0.666666667f * ir) -
   1146                  (0.222222222f * ir2) +
   1147                  (0.098765432f * ir*ir2) -
   1148                  (0.049382716f * ir2*ir2) +
   1149                  (0.026337449f * ir*ir2*ir2) -
   1150                  (0.014631916f * ir2*ir2*ir2);
   1151     adj2 *= (1.f / 0.693147181f);
   1152 
   1153     return (float)(e - 127) + adj2;
   1154 }
   1155 extern float2 __attribute__((overloadable)) native_log2(float2 v) {
   1156     float2 v2 = {native_log2(v.x), native_log2(v.y)};
   1157     return v2;
   1158 }
   1159 extern float3 __attribute__((overloadable)) native_log2(float3 v) {
   1160     float3 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z)};
   1161     return v2;
   1162 }
   1163 extern float4 __attribute__((overloadable)) native_log2(float4 v) {
   1164     float4 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z), native_log2(v.w)};
   1165     return v2;
   1166 }
   1167 
   1168 extern float __attribute__((overloadable)) native_log(float v) {
   1169     return native_log2(v) * (1.f / 1.442695041f);
   1170 }
   1171 extern float2 __attribute__((overloadable)) native_log(float2 v) {
   1172     return native_log2(v) * (1.f / 1.442695041f);
   1173 }
   1174 extern float3 __attribute__((overloadable)) native_log(float3 v) {
   1175     return native_log2(v) * (1.f / 1.442695041f);
   1176 }
   1177 extern float4 __attribute__((overloadable)) native_log(float4 v) {
   1178     return native_log2(v) * (1.f / 1.442695041f);
   1179 }
   1180 
   1181 extern float __attribute__((overloadable)) native_log10(float v) {
   1182     return native_log2(v) * (1.f / 3.321928095f);
   1183 }
   1184 extern float2 __attribute__((overloadable)) native_log10(float2 v) {
   1185     return native_log2(v) * (1.f / 3.321928095f);
   1186 }
   1187 extern float3 __attribute__((overloadable)) native_log10(float3 v) {
   1188     return native_log2(v) * (1.f / 3.321928095f);
   1189 }
   1190 extern float4 __attribute__((overloadable)) native_log10(float4 v) {
   1191     return native_log2(v) * (1.f / 3.321928095f);
   1192 }
   1193 
   1194 
   1195 extern float __attribute__((overloadable)) native_powr(float v, float y) {
   1196     float v2 = native_log2(v);
   1197     v2 = fmax(v2, -125.f);
   1198     return native_exp2(v2 * y);
   1199 }
   1200 extern float2 __attribute__((overloadable)) native_powr(float2 v, float2 y) {
   1201     float2 v2 = native_log2(v);
   1202     v2 = fmax(v2, -125.f);
   1203     return native_exp2(v2 * y);
   1204 }
   1205 extern float3 __attribute__((overloadable)) native_powr(float3 v, float3 y) {
   1206     float3 v2 = native_log2(v);
   1207     v2 = fmax(v2, -125.f);
   1208     return native_exp2(v2 * y);
   1209 }
   1210 extern float4 __attribute__((overloadable)) native_powr(float4 v, float4 y) {
   1211     float4 v2 = native_log2(v);
   1212     v2 = fmax(v2, -125.f);
   1213     return native_exp2(v2 * y);
   1214 }
   1215 
   1216 
   1217 #undef FN_FUNC_FN
   1218 #undef IN_FUNC_FN
   1219 #undef FN_FUNC_FN_FN
   1220 #undef FN_FUNC_FN_F
   1221 #undef FN_FUNC_FN_IN
   1222 #undef FN_FUNC_FN_I
   1223 #undef FN_FUNC_FN_PFN
   1224 #undef FN_FUNC_FN_PIN
   1225 #undef FN_FUNC_FN_FN_FN
   1226 #undef FN_FUNC_FN_FN_PIN
   1227 #undef XN_FUNC_YN
   1228 #undef UIN_FUNC_IN
   1229 #undef IN_FUNC_IN
   1230 #undef XN_FUNC_XN_XN_BODY
   1231 #undef IN_FUNC_IN_IN_BODY
   1232