Home | History | Annotate | Download | only in runtime
      1 #include "rs_types.rsh"
      2 
      3 extern float2 __attribute__((overloadable)) convert_float2(int2 c);
      4 extern float3 __attribute__((overloadable)) convert_float3(int3 c);
      5 extern float4 __attribute__((overloadable)) convert_float4(int4 c);
      6 
      7 extern int2 __attribute__((overloadable)) convert_int2(float2 c);
      8 extern int3 __attribute__((overloadable)) convert_int3(float3 c);
      9 extern int4 __attribute__((overloadable)) convert_int4(float4 c);
     10 
     11 
     12 extern float __attribute__((overloadable)) fmin(float v, float v2);
     13 extern float2 __attribute__((overloadable)) fmin(float2 v, float v2);
     14 extern float3 __attribute__((overloadable)) fmin(float3 v, float v2);
     15 extern float4 __attribute__((overloadable)) fmin(float4 v, float v2);
     16 
     17 extern float __attribute__((overloadable)) fmax(float v, float v2);
     18 extern float2 __attribute__((overloadable)) fmax(float2 v, float v2);
     19 extern float3 __attribute__((overloadable)) fmax(float3 v, float v2);
     20 extern float4 __attribute__((overloadable)) fmax(float4 v, float v2);
     21 
     22 // Float ops, 6.11.2
     23 
     24 #define FN_FUNC_FN(fnc)                                         \
     25 extern float2 __attribute__((overloadable)) fnc(float2 v) { \
     26     float2 r;                                                   \
     27     r.x = fnc(v.x);                                             \
     28     r.y = fnc(v.y);                                             \
     29     return r;                                                   \
     30 }                                                               \
     31 extern float3 __attribute__((overloadable)) fnc(float3 v) { \
     32     float3 r;                                                   \
     33     r.x = fnc(v.x);                                             \
     34     r.y = fnc(v.y);                                             \
     35     r.z = fnc(v.z);                                             \
     36     return r;                                                   \
     37 }                                                               \
     38 extern float4 __attribute__((overloadable)) fnc(float4 v) { \
     39     float4 r;                                                   \
     40     r.x = fnc(v.x);                                             \
     41     r.y = fnc(v.y);                                             \
     42     r.z = fnc(v.z);                                             \
     43     r.w = fnc(v.w);                                             \
     44     return r;                                                   \
     45 }
     46 
     47 #define IN_FUNC_FN(fnc)                                         \
     48 extern int2 __attribute__((overloadable)) fnc(float2 v) {   \
     49     int2 r;                                                     \
     50     r.x = fnc(v.x);                                             \
     51     r.y = fnc(v.y);                                             \
     52     return r;                                                   \
     53 }                                                               \
     54 extern int3 __attribute__((overloadable)) fnc(float3 v) {   \
     55     int3 r;                                                     \
     56     r.x = fnc(v.x);                                             \
     57     r.y = fnc(v.y);                                             \
     58     r.z = fnc(v.z);                                             \
     59     return r;                                                   \
     60 }                                                               \
     61 extern int4 __attribute__((overloadable)) fnc(float4 v) {   \
     62     int4 r;                                                     \
     63     r.x = fnc(v.x);                                             \
     64     r.y = fnc(v.y);                                             \
     65     r.z = fnc(v.z);                                             \
     66     r.w = fnc(v.w);                                             \
     67     return r;                                                   \
     68 }
     69 
     70 #define FN_FUNC_FN_FN(fnc)                                                  \
     71 extern float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2) { \
     72     float2 r;                                                               \
     73     r.x = fnc(v1.x, v2.x);                                                  \
     74     r.y = fnc(v1.y, v2.y);                                                  \
     75     return r;                                                               \
     76 }                                                                           \
     77 extern float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2) { \
     78     float3 r;                                                               \
     79     r.x = fnc(v1.x, v2.x);                                                  \
     80     r.y = fnc(v1.y, v2.y);                                                  \
     81     r.z = fnc(v1.z, v2.z);                                                  \
     82     return r;                                                               \
     83 }                                                                           \
     84 extern float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2) { \
     85     float4 r;                                                               \
     86     r.x = fnc(v1.x, v2.x);                                                  \
     87     r.y = fnc(v1.y, v2.y);                                                  \
     88     r.z = fnc(v1.z, v2.z);                                                  \
     89     r.w = fnc(v1.w, v2.w);                                                  \
     90     return r;                                                               \
     91 }
     92 
     93 #define FN_FUNC_FN_F(fnc)                                                   \
     94 extern float2 __attribute__((overloadable)) fnc(float2 v1, float v2) {  \
     95     float2 r;                                                               \
     96     r.x = fnc(v1.x, v2);                                                    \
     97     r.y = fnc(v1.y, v2);                                                    \
     98     return r;                                                               \
     99 }                                                                           \
    100 extern float3 __attribute__((overloadable)) fnc(float3 v1, float v2) {  \
    101     float3 r;                                                               \
    102     r.x = fnc(v1.x, v2);                                                    \
    103     r.y = fnc(v1.y, v2);                                                    \
    104     r.z = fnc(v1.z, v2);                                                    \
    105     return r;                                                               \
    106 }                                                                           \
    107 extern float4 __attribute__((overloadable)) fnc(float4 v1, float v2) {  \
    108     float4 r;                                                               \
    109     r.x = fnc(v1.x, v2);                                                    \
    110     r.y = fnc(v1.y, v2);                                                    \
    111     r.z = fnc(v1.z, v2);                                                    \
    112     r.w = fnc(v1.w, v2);                                                    \
    113     return r;                                                               \
    114 }
    115 
    116 #define FN_FUNC_FN_IN(fnc)                                                  \
    117 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2) {   \
    118     float2 r;                                                               \
    119     r.x = fnc(v1.x, v2.x);                                                  \
    120     r.y = fnc(v1.y, v2.y);                                                  \
    121     return r;                                                               \
    122 }                                                                           \
    123 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2) {   \
    124     float3 r;                                                               \
    125     r.x = fnc(v1.x, v2.x);                                                  \
    126     r.y = fnc(v1.y, v2.y);                                                  \
    127     r.z = fnc(v1.z, v2.z);                                                  \
    128     return r;                                                               \
    129 }                                                                           \
    130 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2) {   \
    131     float4 r;                                                               \
    132     r.x = fnc(v1.x, v2.x);                                                  \
    133     r.y = fnc(v1.y, v2.y);                                                  \
    134     r.z = fnc(v1.z, v2.z);                                                  \
    135     r.w = fnc(v1.w, v2.w);                                                  \
    136     return r;                                                               \
    137 }
    138 
    139 #define FN_FUNC_FN_I(fnc)                                                   \
    140 extern float2 __attribute__((overloadable)) fnc(float2 v1, int v2) {    \
    141     float2 r;                                                               \
    142     r.x = fnc(v1.x, v2);                                                    \
    143     r.y = fnc(v1.y, v2);                                                    \
    144     return r;                                                               \
    145 }                                                                           \
    146 extern float3 __attribute__((overloadable)) fnc(float3 v1, int v2) {    \
    147     float3 r;                                                               \
    148     r.x = fnc(v1.x, v2);                                                    \
    149     r.y = fnc(v1.y, v2);                                                    \
    150     r.z = fnc(v1.z, v2);                                                    \
    151     return r;                                                               \
    152 }                                                                           \
    153 extern float4 __attribute__((overloadable)) fnc(float4 v1, int v2) {    \
    154     float4 r;                                                               \
    155     r.x = fnc(v1.x, v2);                                                    \
    156     r.y = fnc(v1.y, v2);                                                    \
    157     r.z = fnc(v1.z, v2);                                                    \
    158     r.w = fnc(v1.w, v2);                                                    \
    159     return r;                                                               \
    160 }
    161 
    162 #define FN_FUNC_FN_PFN(fnc)                     \
    163 extern float2 __attribute__((overloadable)) \
    164         fnc(float2 v1, float2 *v2) {            \
    165     float2 r;                                   \
    166     float t[2];                                 \
    167     r.x = fnc(v1.x, &t[0]);                     \
    168     r.y = fnc(v1.y, &t[1]);                     \
    169     v2->x = t[0];                               \
    170     v2->y = t[1];                               \
    171     return r;                                   \
    172 }                                               \
    173 extern float3 __attribute__((overloadable)) \
    174         fnc(float3 v1, float3 *v2) {            \
    175     float3 r;                                   \
    176     float t[3];                                 \
    177     r.x = fnc(v1.x, &t[0]);                     \
    178     r.y = fnc(v1.y, &t[1]);                     \
    179     r.z = fnc(v1.z, &t[2]);                     \
    180     v2->x = t[0];                               \
    181     v2->y = t[1];                               \
    182     v2->z = t[2];                               \
    183     return r;                                   \
    184 }                                               \
    185 extern float4 __attribute__((overloadable)) \
    186         fnc(float4 v1, float4 *v2) {            \
    187     float4 r;                                   \
    188     float t[4];                                 \
    189     r.x = fnc(v1.x, &t[0]);                     \
    190     r.y = fnc(v1.y, &t[1]);                     \
    191     r.z = fnc(v1.z, &t[2]);                     \
    192     r.w = fnc(v1.w, &t[3]);                     \
    193     v2->x = t[0];                               \
    194     v2->y = t[1];                               \
    195     v2->z = t[2];                               \
    196     v2->w = t[3];                               \
    197     return r;                                   \
    198 }
    199 
    200 #define FN_FUNC_FN_PIN(fnc)                                                 \
    201 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2) {  \
    202     float2 r;                                                               \
    203     int t[2];                                                               \
    204     r.x = fnc(v1.x, &t[0]);                                                 \
    205     r.y = fnc(v1.y, &t[1]);                                                 \
    206     v2->x = t[0];                                                           \
    207     v2->y = t[1];                                                           \
    208     return r;                                                               \
    209 }                                                                           \
    210 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2) {  \
    211     float3 r;                                                               \
    212     int t[3];                                                               \
    213     r.x = fnc(v1.x, &t[0]);                                                 \
    214     r.y = fnc(v1.y, &t[1]);                                                 \
    215     r.z = fnc(v1.z, &t[2]);                                                 \
    216     v2->x = t[0];                                                           \
    217     v2->y = t[1];                                                           \
    218     v2->z = t[2];                                                           \
    219     return r;                                                               \
    220 }                                                                           \
    221 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 *v2) {  \
    222     float4 r;                                                               \
    223     int t[4];                                                               \
    224     r.x = fnc(v1.x, &t[0]);                                                 \
    225     r.y = fnc(v1.y, &t[1]);                                                 \
    226     r.z = fnc(v1.z, &t[2]);                                                 \
    227     r.w = fnc(v1.w, &t[3]);                                                 \
    228     v2->x = t[0];                                                           \
    229     v2->y = t[1];                                                           \
    230     v2->z = t[2];                                                           \
    231     v2->w = t[3];                                                           \
    232     return r;                                                               \
    233 }
    234 
    235 #define FN_FUNC_FN_FN_FN(fnc)                   \
    236 extern float2 __attribute__((overloadable)) \
    237         fnc(float2 v1, float2 v2, float2 v3) {  \
    238     float2 r;                                   \
    239     r.x = fnc(v1.x, v2.x, v3.x);                \
    240     r.y = fnc(v1.y, v2.y, v3.y);                \
    241     return r;                                   \
    242 }                                               \
    243 extern float3 __attribute__((overloadable)) \
    244         fnc(float3 v1, float3 v2, float3 v3) {  \
    245     float3 r;                                   \
    246     r.x = fnc(v1.x, v2.x, v3.x);                \
    247     r.y = fnc(v1.y, v2.y, v3.y);                \
    248     r.z = fnc(v1.z, v2.z, v3.z);                \
    249     return r;                                   \
    250 }                                               \
    251 extern float4 __attribute__((overloadable)) \
    252         fnc(float4 v1, float4 v2, float4 v3) {  \
    253     float4 r;                                   \
    254     r.x = fnc(v1.x, v2.x, v3.x);                \
    255     r.y = fnc(v1.y, v2.y, v3.y);                \
    256     r.z = fnc(v1.z, v2.z, v3.z);                \
    257     r.w = fnc(v1.w, v2.w, v3.w);                \
    258     return r;                                   \
    259 }
    260 
    261 #define FN_FUNC_FN_FN_PIN(fnc)                  \
    262 extern float2 __attribute__((overloadable)) \
    263         fnc(float2 v1, float2 v2, int2 *v3) {   \
    264     float2 r;                                   \
    265     int t[2];                                   \
    266     r.x = fnc(v1.x, v2.x, &t[0]);               \
    267     r.y = fnc(v1.y, v2.y, &t[1]);               \
    268     v3->x = t[0];                               \
    269     v3->y = t[1];                               \
    270     return r;                                   \
    271 }                                               \
    272 extern float3 __attribute__((overloadable)) \
    273         fnc(float3 v1, float3 v2, int3 *v3) {   \
    274     float3 r;                                   \
    275     int t[3];                                   \
    276     r.x = fnc(v1.x, v2.x, &t[0]);               \
    277     r.y = fnc(v1.y, v2.y, &t[1]);               \
    278     r.z = fnc(v1.z, v2.z, &t[2]);               \
    279     v3->x = t[0];                               \
    280     v3->y = t[1];                               \
    281     v3->z = t[2];                               \
    282     return r;                                   \
    283 }                                               \
    284 extern float4 __attribute__((overloadable)) \
    285         fnc(float4 v1, float4 v2, int4 *v3) {   \
    286     float4 r;                                   \
    287     int t[4];                                   \
    288     r.x = fnc(v1.x, v2.x, &t[0]);               \
    289     r.y = fnc(v1.y, v2.y, &t[1]);               \
    290     r.z = fnc(v1.z, v2.z, &t[2]);               \
    291     r.w = fnc(v1.w, v2.w, &t[3]);               \
    292     v3->x = t[0];                               \
    293     v3->y = t[1];                               \
    294     v3->z = t[2];                               \
    295     v3->w = t[3];                               \
    296     return r;                                   \
    297 }
    298 
    299 static const int iposinf = 0x7f800000;
    300 static const int ineginf = 0xff800000;
    301 
    302 static const float posinf() {
    303     float f = *((float*)&iposinf);
    304     return f;
    305 }
    306 
    307 static const float neginf() {
    308     float f = *((float*)&ineginf);
    309     return f;
    310 }
    311 
    312 static bool isinf(float f) {
    313     int i = *((int*)(void*)&f);
    314     return (i == iposinf) || (i == ineginf);
    315 }
    316 
    317 static bool isnan(float f) {
    318     int i = *((int*)(void*)&f);
    319     return (((i & 0x7f800000) == 0x7f800000) && (i & 0x007fffff));
    320 }
    321 
    322 static bool isposzero(float f) {
    323     int i = *((int*)(void*)&f);
    324     return (i == 0x00000000);
    325 }
    326 
    327 static bool isnegzero(float f) {
    328     int i = *((int*)(void*)&f);
    329     return (i == 0x80000000);
    330 }
    331 
    332 static bool iszero(float f) {
    333     return isposzero(f) || isnegzero(f);
    334 }
    335 
    336 
    337 extern float __attribute__((overloadable)) acos(float);
    338 FN_FUNC_FN(acos)
    339 
    340 extern float __attribute__((overloadable)) acosh(float);
    341 FN_FUNC_FN(acosh)
    342 
    343 
    344 extern float __attribute__((overloadable)) acospi(float v) {
    345     return acos(v) / M_PI;
    346 }
    347 FN_FUNC_FN(acospi)
    348 
    349 extern float __attribute__((overloadable)) asin(float);
    350 FN_FUNC_FN(asin)
    351 
    352 extern float __attribute__((overloadable)) asinh(float);
    353 FN_FUNC_FN(asinh)
    354 
    355 extern float __attribute__((overloadable)) asinpi(float v) {
    356     return asin(v) / M_PI;
    357 }
    358 FN_FUNC_FN(asinpi)
    359 
    360 extern float __attribute__((overloadable)) atan(float);
    361 FN_FUNC_FN(atan)
    362 
    363 extern float __attribute__((overloadable)) atan2(float, float);
    364 FN_FUNC_FN_FN(atan2)
    365 
    366 extern float __attribute__((overloadable)) atanh(float);
    367 FN_FUNC_FN(atanh)
    368 
    369 extern float __attribute__((overloadable)) atanpi(float v) {
    370     return atan(v) / M_PI;
    371 }
    372 FN_FUNC_FN(atanpi)
    373 
    374 
    375 extern float __attribute__((overloadable)) atan2pi(float y, float x) {
    376     return atan2(y, x) / M_PI;
    377 }
    378 FN_FUNC_FN_FN(atan2pi)
    379 
    380 extern float __attribute__((overloadable)) cbrt(float);
    381 FN_FUNC_FN(cbrt)
    382 
    383 extern float __attribute__((overloadable)) ceil(float);
    384 FN_FUNC_FN(ceil)
    385 
    386 extern float __attribute__((overloadable)) copysign(float, float);
    387 FN_FUNC_FN_FN(copysign)
    388 
    389 extern float __attribute__((overloadable)) cos(float);
    390 FN_FUNC_FN(cos)
    391 
    392 extern float __attribute__((overloadable)) cosh(float);
    393 FN_FUNC_FN(cosh)
    394 
    395 extern float __attribute__((overloadable)) cospi(float v) {
    396     return cos(v * M_PI);
    397 }
    398 FN_FUNC_FN(cospi)
    399 
    400 extern float __attribute__((overloadable)) erfc(float);
    401 FN_FUNC_FN(erfc)
    402 
    403 extern float __attribute__((overloadable)) erf(float);
    404 FN_FUNC_FN(erf)
    405 
    406 extern float __attribute__((overloadable)) exp(float);
    407 FN_FUNC_FN(exp)
    408 
    409 extern float __attribute__((overloadable)) exp2(float);
    410 FN_FUNC_FN(exp2)
    411 
    412 extern float __attribute__((overloadable)) pow(float, float);
    413 
    414 extern float __attribute__((overloadable)) exp10(float v) {
    415     return exp2(v * 3.321928095f);
    416 }
    417 FN_FUNC_FN(exp10)
    418 
    419 extern float __attribute__((overloadable)) expm1(float);
    420 FN_FUNC_FN(expm1)
    421 
    422 extern float __attribute__((overloadable)) fabs(float v) {
    423     int i = *((int*)(void*)&v) & 0x7fffffff;
    424     return  *((float*)(void*)&i);
    425 }
    426 FN_FUNC_FN(fabs)
    427 
    428 extern float __attribute__((overloadable)) fdim(float, float);
    429 FN_FUNC_FN_FN(fdim)
    430 
    431 extern float __attribute__((overloadable)) floor(float);
    432 FN_FUNC_FN(floor)
    433 
    434 extern float __attribute__((overloadable)) fma(float, float, float);
    435 FN_FUNC_FN_FN_FN(fma)
    436 
    437 extern float __attribute__((overloadable)) fmin(float, float);
    438 
    439 extern float __attribute__((overloadable)) fmod(float, float);
    440 FN_FUNC_FN_FN(fmod)
    441 
    442 extern float __attribute__((overloadable)) fract(float v, float *iptr) {
    443     int i = (int)floor(v);
    444     if (iptr) {
    445         iptr[0] = i;
    446     }
    447     return fmin(v - i, 0x1.fffffep-1f);
    448 }
    449 FN_FUNC_FN_PFN(fract)
    450 
    451 extern float __attribute__((overloadable)) frexp(float, int *);
    452 FN_FUNC_FN_PIN(frexp)
    453 
    454 extern float __attribute__((overloadable)) hypot(float, float);
    455 FN_FUNC_FN_FN(hypot)
    456 
    457 extern int __attribute__((overloadable)) ilogb(float);
    458 IN_FUNC_FN(ilogb)
    459 
    460 extern float __attribute__((overloadable)) ldexp(float, int);
    461 FN_FUNC_FN_IN(ldexp)
    462 FN_FUNC_FN_I(ldexp)
    463 
    464 extern float __attribute__((overloadable)) lgamma(float);
    465 FN_FUNC_FN(lgamma)
    466 extern float __attribute__((overloadable)) lgamma(float, int*);
    467 FN_FUNC_FN_PIN(lgamma)
    468 
    469 extern float __attribute__((overloadable)) log(float);
    470 FN_FUNC_FN(log)
    471 
    472 extern float __attribute__((overloadable)) log10(float);
    473 FN_FUNC_FN(log10)
    474 
    475 
    476 extern float __attribute__((overloadable)) log2(float v) {
    477     return log10(v) * 3.321928095f;
    478 }
    479 FN_FUNC_FN(log2)
    480 
    481 extern float __attribute__((overloadable)) log1p(float);
    482 FN_FUNC_FN(log1p)
    483 
    484 extern float __attribute__((overloadable)) logb(float);
    485 FN_FUNC_FN(logb)
    486 
    487 extern float __attribute__((overloadable)) mad(float a, float b, float c) {
    488     return a * b + c;
    489 }
    490 extern float2 __attribute__((overloadable)) mad(float2 a, float2 b, float2 c) {
    491     return a * b + c;
    492 }
    493 extern float3 __attribute__((overloadable)) mad(float3 a, float3 b, float3 c) {
    494     return a * b + c;
    495 }
    496 extern float4 __attribute__((overloadable)) mad(float4 a, float4 b, float4 c) {
    497     return a * b + c;
    498 }
    499 
    500 extern float __attribute__((overloadable)) modf(float, float *);
    501 FN_FUNC_FN_PFN(modf);
    502 
    503 extern float __attribute__((overloadable)) nan(uint v) {
    504     float f[1];
    505     uint32_t *ip = (uint32_t *)f;
    506     *ip = v | 0x7fc00000;
    507     return f[0];
    508 }
    509 
    510 extern float __attribute__((overloadable)) nextafter(float, float);
    511 FN_FUNC_FN_FN(nextafter)
    512 
    513 FN_FUNC_FN_FN(pow)
    514 
    515 extern float __attribute__((overloadable)) pown(float v, int p) {
    516     /* The mantissa of a float has fewer bits than an int (24 effective vs. 31).
    517      * For very large ints, we'll lose whether the exponent is even or odd, making
    518      * the selection of a correct sign incorrect.  We correct this.  Use copysign
    519      * to handle the negative zero case.
    520      */
    521     float sign = (p & 0x1) ? copysign(1.f, v) : 1.f;
    522     float f = pow(v, (float)p);
    523     return copysign(f, sign);
    524 }
    525 FN_FUNC_FN_IN(pown)
    526 
    527 extern float __attribute__((overloadable)) powr(float v, float p) {
    528     return pow(v, p);
    529 }
    530 extern float2 __attribute__((overloadable)) powr(float2 v, float2 p) {
    531     return pow(v, p);
    532 }
    533 extern float3 __attribute__((overloadable)) powr(float3 v, float3 p) {
    534     return pow(v, p);
    535 }
    536 extern float4 __attribute__((overloadable)) powr(float4 v, float4 p) {
    537     return pow(v, p);
    538 }
    539 
    540 extern float __attribute__((overloadable)) remainder(float, float);
    541 FN_FUNC_FN_FN(remainder)
    542 
    543 extern float __attribute__((overloadable)) remquo(float, float, int *);
    544 FN_FUNC_FN_FN_PIN(remquo)
    545 
    546 extern float __attribute__((overloadable)) rint(float);
    547 FN_FUNC_FN(rint)
    548 
    549 extern float __attribute__((overloadable)) rootn(float v, int r) {
    550     if (r == 0) {
    551         return posinf(0);
    552     }
    553 
    554     if (iszero(v)) {
    555         if (r < 0) {
    556             if (r & 1) {
    557                 return copysign(posinf(), v);
    558             } else {
    559                 return posinf();
    560             }
    561         } else {
    562             if (r & 1) {
    563                 return copysign(0.f, v);
    564             } else {
    565                 return 0.f;
    566             }
    567         }
    568     }
    569 
    570     if (!isinf(v) && !isnan(v) && (v < 0.f)) {
    571         if (r & 1) {
    572             return (-1.f * pow(-1.f * v, 1.f / r));
    573         } else {
    574             return nan(0);
    575         }
    576     }
    577 
    578     return pow(v, 1.f / r);
    579 }
    580 FN_FUNC_FN_IN(rootn);
    581 
    582 extern float __attribute__((overloadable)) round(float);
    583 FN_FUNC_FN(round)
    584 
    585 
    586 extern float __attribute__((overloadable)) sqrt(float);
    587 extern float __attribute__((overloadable)) rsqrt(float v) {
    588     return 1.f / sqrt(v);
    589 }
    590 
    591 #if !defined(__i386__) && !defined(__x86_64__)
    592 FN_FUNC_FN(sqrt)
    593 #else
    594 extern float2 __attribute__((overloadable)) sqrt(float2);
    595 extern float3 __attribute__((overloadable)) sqrt(float3);
    596 extern float4 __attribute__((overloadable)) sqrt(float4);
    597 #endif // !defined(__i386__) && !defined(__x86_64__)
    598 
    599 FN_FUNC_FN(rsqrt)
    600 
    601 extern float __attribute__((overloadable)) sin(float);
    602 FN_FUNC_FN(sin)
    603 
    604 extern float __attribute__((overloadable)) sincos(float v, float *cosptr) {
    605     *cosptr = cos(v);
    606     return sin(v);
    607 }
    608 extern float2 __attribute__((overloadable)) sincos(float2 v, float2 *cosptr) {
    609     *cosptr = cos(v);
    610     return sin(v);
    611 }
    612 extern float3 __attribute__((overloadable)) sincos(float3 v, float3 *cosptr) {
    613     *cosptr = cos(v);
    614     return sin(v);
    615 }
    616 extern float4 __attribute__((overloadable)) sincos(float4 v, float4 *cosptr) {
    617     *cosptr = cos(v);
    618     return sin(v);
    619 }
    620 
    621 extern float __attribute__((overloadable)) sinh(float);
    622 FN_FUNC_FN(sinh)
    623 
    624 extern float __attribute__((overloadable)) sinpi(float v) {
    625     return sin(v * M_PI);
    626 }
    627 FN_FUNC_FN(sinpi)
    628 
    629 extern float __attribute__((overloadable)) tan(float);
    630 FN_FUNC_FN(tan)
    631 
    632 extern float __attribute__((overloadable)) tanh(float);
    633 FN_FUNC_FN(tanh)
    634 
    635 extern float __attribute__((overloadable)) tanpi(float v) {
    636     return tan(v * M_PI);
    637 }
    638 FN_FUNC_FN(tanpi)
    639 
    640 
    641 extern float __attribute__((overloadable)) tgamma(float);
    642 FN_FUNC_FN(tgamma)
    643 
    644 extern float __attribute__((overloadable)) trunc(float);
    645 FN_FUNC_FN(trunc)
    646 
    647 // Int ops (partial), 6.11.3
    648 
    649 #define XN_FUNC_YN(typeout, fnc, typein)                                \
    650 extern typeout __attribute__((overloadable)) fnc(typein);               \
    651 extern typeout##2 __attribute__((overloadable)) fnc(typein##2 v) {  \
    652     typeout##2 r;                                                       \
    653     r.x = fnc(v.x);                                                     \
    654     r.y = fnc(v.y);                                                     \
    655     return r;                                                           \
    656 }                                                                       \
    657 extern typeout##3 __attribute__((overloadable)) fnc(typein##3 v) {  \
    658     typeout##3 r;                                                       \
    659     r.x = fnc(v.x);                                                     \
    660     r.y = fnc(v.y);                                                     \
    661     r.z = fnc(v.z);                                                     \
    662     return r;                                                           \
    663 }                                                                       \
    664 extern typeout##4 __attribute__((overloadable)) fnc(typein##4 v) {  \
    665     typeout##4 r;                                                       \
    666     r.x = fnc(v.x);                                                     \
    667     r.y = fnc(v.y);                                                     \
    668     r.z = fnc(v.z);                                                     \
    669     r.w = fnc(v.w);                                                     \
    670     return r;                                                           \
    671 }
    672 
    673 
    674 #define UIN_FUNC_IN(fnc)          \
    675 XN_FUNC_YN(uchar, fnc, char)      \
    676 XN_FUNC_YN(ushort, fnc, short)    \
    677 XN_FUNC_YN(uint, fnc, int)
    678 
    679 #define IN_FUNC_IN(fnc)           \
    680 XN_FUNC_YN(uchar, fnc, uchar)     \
    681 XN_FUNC_YN(char, fnc, char)       \
    682 XN_FUNC_YN(ushort, fnc, ushort)   \
    683 XN_FUNC_YN(short, fnc, short)     \
    684 XN_FUNC_YN(uint, fnc, uint)       \
    685 XN_FUNC_YN(int, fnc, int)
    686 
    687 
    688 #define XN_FUNC_XN_XN_BODY(type, fnc, body)         \
    689 extern type __attribute__((overloadable))       \
    690         fnc(type v1, type v2) {                     \
    691     return body;                                    \
    692 }                                                   \
    693 extern type##2 __attribute__((overloadable))    \
    694         fnc(type##2 v1, type##2 v2) {               \
    695     type##2 r;                                      \
    696     r.x = fnc(v1.x, v2.x);                          \
    697     r.y = fnc(v1.y, v2.y);                          \
    698     return r;                                       \
    699 }                                                   \
    700 extern type##3 __attribute__((overloadable))    \
    701         fnc(type##3 v1, type##3 v2) {               \
    702     type##3 r;                                      \
    703     r.x = fnc(v1.x, v2.x);                          \
    704     r.y = fnc(v1.y, v2.y);                          \
    705     r.z = fnc(v1.z, v2.z);                          \
    706     return r;                                       \
    707 }                                                   \
    708 extern type##4 __attribute__((overloadable))    \
    709         fnc(type##4 v1, type##4 v2) {               \
    710     type##4 r;                                      \
    711     r.x = fnc(v1.x, v2.x);                          \
    712     r.y = fnc(v1.y, v2.y);                          \
    713     r.z = fnc(v1.z, v2.z);                          \
    714     r.w = fnc(v1.w, v2.w);                          \
    715     return r;                                       \
    716 }
    717 
    718 #define IN_FUNC_IN_IN_BODY(fnc, body) \
    719 XN_FUNC_XN_XN_BODY(uchar, fnc, body)  \
    720 XN_FUNC_XN_XN_BODY(char, fnc, body)   \
    721 XN_FUNC_XN_XN_BODY(ushort, fnc, body) \
    722 XN_FUNC_XN_XN_BODY(short, fnc, body)  \
    723 XN_FUNC_XN_XN_BODY(uint, fnc, body)   \
    724 XN_FUNC_XN_XN_BODY(int, fnc, body)    \
    725 XN_FUNC_XN_XN_BODY(float, fnc, body)
    726 
    727 
    728 /**
    729  * abs
    730  */
    731 extern uint32_t __attribute__((overloadable)) abs(int32_t v) {
    732     if (v < 0)
    733         return -v;
    734     return v;
    735 }
    736 extern uint16_t __attribute__((overloadable)) abs(int16_t v) {
    737     if (v < 0)
    738         return -v;
    739     return v;
    740 }
    741 extern uint8_t __attribute__((overloadable)) abs(int8_t v) {
    742     if (v < 0)
    743         return -v;
    744     return v;
    745 }
    746 
    747 /**
    748  * clz
    749  * __builtin_clz only accepts a 32-bit unsigned int, so every input will be
    750  * expanded to 32 bits. For our smaller data types, we need to subtract off
    751  * these unused top bits (that will be always be composed of zeros).
    752  */
    753 extern uint32_t __attribute__((overloadable)) clz(uint32_t v) {
    754     return __builtin_clz(v);
    755 }
    756 extern uint16_t __attribute__((overloadable)) clz(uint16_t v) {
    757     return __builtin_clz(v) - 16;
    758 }
    759 extern uint8_t __attribute__((overloadable)) clz(uint8_t v) {
    760     return __builtin_clz(v) - 24;
    761 }
    762 extern int32_t __attribute__((overloadable)) clz(int32_t v) {
    763     return __builtin_clz(v);
    764 }
    765 extern int16_t __attribute__((overloadable)) clz(int16_t v) {
    766     return __builtin_clz(((uint32_t)v) & 0x0000ffff) - 16;
    767 }
    768 extern int8_t __attribute__((overloadable)) clz(int8_t v) {
    769     return __builtin_clz(((uint32_t)v) & 0x000000ff) - 24;
    770 }
    771 
    772 
    773 UIN_FUNC_IN(abs)
    774 IN_FUNC_IN(clz)
    775 
    776 
    777 // 6.11.4
    778 
    779 
    780 extern float __attribute__((overloadable)) degrees(float radians) {
    781     return radians * (180.f / M_PI);
    782 }
    783 extern float2 __attribute__((overloadable)) degrees(float2 radians) {
    784     return radians * (180.f / M_PI);
    785 }
    786 extern float3 __attribute__((overloadable)) degrees(float3 radians) {
    787     return radians * (180.f / M_PI);
    788 }
    789 extern float4 __attribute__((overloadable)) degrees(float4 radians) {
    790     return radians * (180.f / M_PI);
    791 }
    792 
    793 extern float __attribute__((overloadable)) mix(float start, float stop, float amount) {
    794     return start + (stop - start) * amount;
    795 }
    796 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount) {
    797     return start + (stop - start) * amount;
    798 }
    799 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount) {
    800     return start + (stop - start) * amount;
    801 }
    802 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount) {
    803     return start + (stop - start) * amount;
    804 }
    805 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount) {
    806     return start + (stop - start) * amount;
    807 }
    808 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount) {
    809     return start + (stop - start) * amount;
    810 }
    811 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount) {
    812     return start + (stop - start) * amount;
    813 }
    814 
    815 extern float __attribute__((overloadable)) radians(float degrees) {
    816     return degrees * (M_PI / 180.f);
    817 }
    818 extern float2 __attribute__((overloadable)) radians(float2 degrees) {
    819     return degrees * (M_PI / 180.f);
    820 }
    821 extern float3 __attribute__((overloadable)) radians(float3 degrees) {
    822     return degrees * (M_PI / 180.f);
    823 }
    824 extern float4 __attribute__((overloadable)) radians(float4 degrees) {
    825     return degrees * (M_PI / 180.f);
    826 }
    827 
    828 extern float __attribute__((overloadable)) step(float edge, float v) {
    829     return (v < edge) ? 0.f : 1.f;
    830 }
    831 extern float2 __attribute__((overloadable)) step(float2 edge, float2 v) {
    832     float2 r;
    833     r.x = (v.x < edge.x) ? 0.f : 1.f;
    834     r.y = (v.y < edge.y) ? 0.f : 1.f;
    835     return r;
    836 }
    837 extern float3 __attribute__((overloadable)) step(float3 edge, float3 v) {
    838     float3 r;
    839     r.x = (v.x < edge.x) ? 0.f : 1.f;
    840     r.y = (v.y < edge.y) ? 0.f : 1.f;
    841     r.z = (v.z < edge.z) ? 0.f : 1.f;
    842     return r;
    843 }
    844 extern float4 __attribute__((overloadable)) step(float4 edge, float4 v) {
    845     float4 r;
    846     r.x = (v.x < edge.x) ? 0.f : 1.f;
    847     r.y = (v.y < edge.y) ? 0.f : 1.f;
    848     r.z = (v.z < edge.z) ? 0.f : 1.f;
    849     r.w = (v.w < edge.w) ? 0.f : 1.f;
    850     return r;
    851 }
    852 extern float2 __attribute__((overloadable)) step(float2 edge, float v) {
    853     float2 r;
    854     r.x = (v < edge.x) ? 0.f : 1.f;
    855     r.y = (v < edge.y) ? 0.f : 1.f;
    856     return r;
    857 }
    858 extern float3 __attribute__((overloadable)) step(float3 edge, float v) {
    859     float3 r;
    860     r.x = (v < edge.x) ? 0.f : 1.f;
    861     r.y = (v < edge.y) ? 0.f : 1.f;
    862     r.z = (v < edge.z) ? 0.f : 1.f;
    863     return r;
    864 }
    865 extern float4 __attribute__((overloadable)) step(float4 edge, float v) {
    866     float4 r;
    867     r.x = (v < edge.x) ? 0.f : 1.f;
    868     r.y = (v < edge.y) ? 0.f : 1.f;
    869     r.z = (v < edge.z) ? 0.f : 1.f;
    870     r.w = (v < edge.w) ? 0.f : 1.f;
    871     return r;
    872 }
    873 extern float2 __attribute__((overloadable)) step(float edge, float2 v) {
    874     float2 r;
    875     r.x = (v.x < edge) ? 0.f : 1.f;
    876     r.y = (v.y < edge) ? 0.f : 1.f;
    877     return r;
    878 }
    879 extern float3 __attribute__((overloadable)) step(float edge, float3 v) {
    880     float3 r;
    881     r.x = (v.x < edge) ? 0.f : 1.f;
    882     r.y = (v.y < edge) ? 0.f : 1.f;
    883     r.z = (v.z < edge) ? 0.f : 1.f;
    884     return r;
    885 }
    886 extern float4 __attribute__((overloadable)) step(float edge, float4 v) {
    887     float4 r;
    888     r.x = (v.x < edge) ? 0.f : 1.f;
    889     r.y = (v.y < edge) ? 0.f : 1.f;
    890     r.z = (v.z < edge) ? 0.f : 1.f;
    891     r.w = (v.w < edge) ? 0.f : 1.f;
    892     return r;
    893 }
    894 
    895 extern float __attribute__((overloadable)) smoothstep(float, float, float);
    896 extern float2 __attribute__((overloadable)) smoothstep(float2, float2, float2);
    897 extern float3 __attribute__((overloadable)) smoothstep(float3, float3, float3);
    898 extern float4 __attribute__((overloadable)) smoothstep(float4, float4, float4);
    899 extern float2 __attribute__((overloadable)) smoothstep(float, float, float2);
    900 extern float3 __attribute__((overloadable)) smoothstep(float, float, float3);
    901 extern float4 __attribute__((overloadable)) smoothstep(float, float, float4);
    902 
    903 extern float __attribute__((overloadable)) sign(float v) {
    904     if (v > 0) return 1.f;
    905     if (v < 0) return -1.f;
    906     return v;
    907 }
    908 FN_FUNC_FN(sign)
    909 
    910 
    911 // 6.11.5
    912 extern float3 __attribute__((overloadable)) cross(float3 lhs, float3 rhs) {
    913     float3 r;
    914     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
    915     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
    916     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
    917     return r;
    918 }
    919 
    920 extern float4 __attribute__((overloadable)) cross(float4 lhs, float4 rhs) {
    921     float4 r;
    922     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
    923     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
    924     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
    925     r.w = 0.f;
    926     return r;
    927 }
    928 
    929 #if !defined(__i386__) && !defined(__x86_64__)
    930 
    931 extern float __attribute__((overloadable)) dot(float lhs, float rhs) {
    932     return lhs * rhs;
    933 }
    934 extern float __attribute__((overloadable)) dot(float2 lhs, float2 rhs) {
    935     return lhs.x*rhs.x + lhs.y*rhs.y;
    936 }
    937 extern float __attribute__((overloadable)) dot(float3 lhs, float3 rhs) {
    938     return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z;
    939 }
    940 extern float __attribute__((overloadable)) dot(float4 lhs, float4 rhs) {
    941     return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z + lhs.w*rhs.w;
    942 }
    943 
    944 extern float __attribute__((overloadable)) length(float v) {
    945     return fabs(v);
    946 }
    947 extern float __attribute__((overloadable)) length(float2 v) {
    948     return sqrt(v.x*v.x + v.y*v.y);
    949 }
    950 extern float __attribute__((overloadable)) length(float3 v) {
    951     return sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
    952 }
    953 extern float __attribute__((overloadable)) length(float4 v) {
    954     return sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
    955 }
    956 
    957 #else
    958 
    959 extern float __attribute__((overloadable)) length(float v);
    960 extern float __attribute__((overloadable)) length(float2 v);
    961 extern float __attribute__((overloadable)) length(float3 v);
    962 extern float __attribute__((overloadable)) length(float4 v);
    963 
    964 #endif // !defined(__i386__) && !defined(__x86_64__)
    965 
    966 extern float __attribute__((overloadable)) distance(float lhs, float rhs) {
    967     return length(lhs - rhs);
    968 }
    969 extern float __attribute__((overloadable)) distance(float2 lhs, float2 rhs) {
    970     return length(lhs - rhs);
    971 }
    972 extern float __attribute__((overloadable)) distance(float3 lhs, float3 rhs) {
    973     return length(lhs - rhs);
    974 }
    975 extern float __attribute__((overloadable)) distance(float4 lhs, float4 rhs) {
    976     return length(lhs - rhs);
    977 }
    978 
    979 /* For the normalization functions, vectors of length 0 should simply be
    980  * returned (i.e. all the components of that vector are 0).
    981  */
    982 extern float __attribute__((overloadable)) normalize(float v) {
    983     if (v == 0.0f) {
    984         return 0.0f;
    985     } else if (v < 0.0f) {
    986         return -1.0f;
    987     } else {
    988         return 1.0f;
    989     }
    990 }
    991 extern float2 __attribute__((overloadable)) normalize(float2 v) {
    992     float l = length(v);
    993     return l == 0.0f ? v : v / l;
    994 }
    995 extern float3 __attribute__((overloadable)) normalize(float3 v) {
    996     float l = length(v);
    997     return l == 0.0f ? v : v / l;
    998 }
    999 extern float4 __attribute__((overloadable)) normalize(float4 v) {
   1000     float l = length(v);
   1001     return l == 0.0f ? v : v / l;
   1002 }
   1003 
   1004 extern float __attribute__((overloadable)) half_sqrt(float v) {
   1005     return sqrt(v);
   1006 }
   1007 FN_FUNC_FN(half_sqrt)
   1008 
   1009 extern float __attribute__((overloadable)) fast_length(float v) {
   1010     return fabs(v);
   1011 }
   1012 extern float __attribute__((overloadable)) fast_length(float2 v) {
   1013     return half_sqrt(v.x*v.x + v.y*v.y);
   1014 }
   1015 extern float __attribute__((overloadable)) fast_length(float3 v) {
   1016     return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
   1017 }
   1018 extern float __attribute__((overloadable)) fast_length(float4 v) {
   1019     return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
   1020 }
   1021 
   1022 extern float __attribute__((overloadable)) fast_distance(float lhs, float rhs) {
   1023     return fast_length(lhs - rhs);
   1024 }
   1025 extern float __attribute__((overloadable)) fast_distance(float2 lhs, float2 rhs) {
   1026     return fast_length(lhs - rhs);
   1027 }
   1028 extern float __attribute__((overloadable)) fast_distance(float3 lhs, float3 rhs) {
   1029     return fast_length(lhs - rhs);
   1030 }
   1031 extern float __attribute__((overloadable)) fast_distance(float4 lhs, float4 rhs) {
   1032     return fast_length(lhs - rhs);
   1033 }
   1034 
   1035 extern float __attribute__((overloadable)) half_rsqrt(float);
   1036 
   1037 /* For the normalization functions, vectors of length 0 should simply be
   1038  * returned (i.e. all the components of that vector are 0).
   1039  */
   1040 extern float __attribute__((overloadable)) fast_normalize(float v) {
   1041     if (v == 0.0f) {
   1042         return 0.0f;
   1043     } else if (v < 0.0f) {
   1044         return -1.0f;
   1045     } else {
   1046         return 1.0f;
   1047     }
   1048 }
   1049 // If the length is 0, then rlength should be NaN.
   1050 extern float2 __attribute__((overloadable)) fast_normalize(float2 v) {
   1051     float rlength = half_rsqrt(v.x*v.x + v.y*v.y);
   1052     return (rlength == rlength) ? v * rlength : v;
   1053 }
   1054 extern float3 __attribute__((overloadable)) fast_normalize(float3 v) {
   1055     float rlength = half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z);
   1056     return (rlength == rlength) ? v * rlength : v;
   1057 }
   1058 extern float4 __attribute__((overloadable)) fast_normalize(float4 v) {
   1059     float rlength = half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
   1060     return (rlength == rlength) ? v * rlength : v;
   1061 }
   1062 
   1063 extern float __attribute__((overloadable)) half_recip(float v) {
   1064     return 1.f / v;
   1065 }
   1066 
   1067 /*
   1068 extern float __attribute__((overloadable)) approx_atan(float x) {
   1069     if (x == 0.f)
   1070         return 0.f;
   1071     if (x < 0.f)
   1072         return -1.f * approx_atan(-1.f * x);
   1073     if (x > 1.f)
   1074         return M_PI_2 - approx_atan(approx_recip(x));
   1075     return x * approx_recip(1.f + 0.28f * x*x);
   1076 }
   1077 FN_FUNC_FN(approx_atan)
   1078 */
   1079 
   1080 typedef union
   1081 {
   1082   float fv;
   1083   int32_t iv;
   1084 } ieee_float_shape_type;
   1085 
   1086 /* Get a 32 bit int from a float.  */
   1087 
   1088 #define GET_FLOAT_WORD(i,d)                 \
   1089 do {                                \
   1090   ieee_float_shape_type gf_u;                   \
   1091   gf_u.fv = (d);                     \
   1092   (i) = gf_u.iv;                      \
   1093 } while (0)
   1094 
   1095 /* Set a float from a 32 bit int.  */
   1096 
   1097 #define SET_FLOAT_WORD(d,i)                 \
   1098 do {                                \
   1099   ieee_float_shape_type sf_u;                   \
   1100   sf_u.iv = (i);                      \
   1101   (d) = sf_u.fv;                     \
   1102 } while (0)
   1103 
   1104 
   1105 
   1106 // Valid -125 to 125
   1107 extern float __attribute__((overloadable)) native_exp2(float v) {
   1108     int32_t iv = (int)v;
   1109     int32_t x = iv + (iv >> 31); // ~floor(v)
   1110     float r = (v - x);
   1111 
   1112     float fo;
   1113     SET_FLOAT_WORD(fo, (x + 127) << 23);
   1114 
   1115     r *= 0.694f; // ~ log(e) / log(2)
   1116     float r2 = r*r;
   1117     float adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
   1118     return fo * adj;
   1119 }
   1120 
   1121 extern float2 __attribute__((overloadable)) native_exp2(float2 v) {
   1122     int2 iv = convert_int2(v);
   1123     int2 x = iv + (iv >> (int2)31);//floor(v);
   1124     float2 r = (v - convert_float2(x));
   1125 
   1126     x += 127;
   1127 
   1128     float2 fo = (float2)(x << (int2)23);
   1129 
   1130     r *= 0.694f; // ~ log(e) / log(2)
   1131     float2 r2 = r*r;
   1132     float2 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
   1133     return fo * adj;
   1134 }
   1135 
   1136 extern float4 __attribute__((overloadable)) native_exp2(float4 v) {
   1137     int4 iv = convert_int4(v);
   1138     int4 x = iv + (iv >> (int4)31);//floor(v);
   1139     float4 r = (v - convert_float4(x));
   1140 
   1141     x += 127;
   1142 
   1143     float4 fo = (float4)(x << (int4)23);
   1144 
   1145     r *= 0.694f; // ~ log(e) / log(2)
   1146     float4 r2 = r*r;
   1147     float4 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
   1148     return fo * adj;
   1149 }
   1150 
   1151 extern float3 __attribute__((overloadable)) native_exp2(float3 v) {
   1152     float4 t = 1.f;
   1153     t.xyz = v;
   1154     return native_exp2(t).xyz;
   1155 }
   1156 
   1157 
   1158 extern float __attribute__((overloadable)) native_exp(float v) {
   1159     return native_exp2(v * 1.442695041f);
   1160 }
   1161 extern float2 __attribute__((overloadable)) native_exp(float2 v) {
   1162     return native_exp2(v * 1.442695041f);
   1163 }
   1164 extern float3 __attribute__((overloadable)) native_exp(float3 v) {
   1165     return native_exp2(v * 1.442695041f);
   1166 }
   1167 extern float4 __attribute__((overloadable)) native_exp(float4 v) {
   1168     return native_exp2(v * 1.442695041f);
   1169 }
   1170 
   1171 extern float __attribute__((overloadable)) native_exp10(float v) {
   1172     return native_exp2(v * 3.321928095f);
   1173 }
   1174 extern float2 __attribute__((overloadable)) native_exp10(float2 v) {
   1175     return native_exp2(v * 3.321928095f);
   1176 }
   1177 extern float3 __attribute__((overloadable)) native_exp10(float3 v) {
   1178     return native_exp2(v * 3.321928095f);
   1179 }
   1180 extern float4 __attribute__((overloadable)) native_exp10(float4 v) {
   1181     return native_exp2(v * 3.321928095f);
   1182 }
   1183 
   1184 extern float __attribute__((overloadable)) native_log2(float v) {
   1185     int32_t ibits;
   1186     GET_FLOAT_WORD(ibits, v);
   1187 
   1188     int32_t e = (ibits >> 23) & 0xff;
   1189 
   1190     ibits &= 0x7fffff;
   1191     ibits |= 127 << 23;
   1192 
   1193     float ir;
   1194     SET_FLOAT_WORD(ir, ibits);
   1195     ir -= 1.5f;
   1196     float ir2 = ir*ir;
   1197     float adj2 = (0.405465108f / 0.693147181f) +
   1198                  ((0.666666667f / 0.693147181f) * ir) -
   1199                  ((0.222222222f / 0.693147181f) * ir2) +
   1200                  ((0.098765432f / 0.693147181f) * ir*ir2) -
   1201                  ((0.049382716f / 0.693147181f) * ir2*ir2) +
   1202                  ((0.026337449f / 0.693147181f) * ir*ir2*ir2) -
   1203                  ((0.014631916f / 0.693147181f) * ir2*ir2*ir2);
   1204     return (float)(e - 127) + adj2;
   1205 }
   1206 extern float2 __attribute__((overloadable)) native_log2(float2 v) {
   1207     float2 v2 = {native_log2(v.x), native_log2(v.y)};
   1208     return v2;
   1209 }
   1210 extern float3 __attribute__((overloadable)) native_log2(float3 v) {
   1211     float3 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z)};
   1212     return v2;
   1213 }
   1214 extern float4 __attribute__((overloadable)) native_log2(float4 v) {
   1215     float4 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z), native_log2(v.w)};
   1216     return v2;
   1217 }
   1218 
   1219 extern float __attribute__((overloadable)) native_log(float v) {
   1220     return native_log2(v) * (1.f / 1.442695041f);
   1221 }
   1222 extern float2 __attribute__((overloadable)) native_log(float2 v) {
   1223     return native_log2(v) * (1.f / 1.442695041f);
   1224 }
   1225 extern float3 __attribute__((overloadable)) native_log(float3 v) {
   1226     return native_log2(v) * (1.f / 1.442695041f);
   1227 }
   1228 extern float4 __attribute__((overloadable)) native_log(float4 v) {
   1229     return native_log2(v) * (1.f / 1.442695041f);
   1230 }
   1231 
   1232 extern float __attribute__((overloadable)) native_log10(float v) {
   1233     return native_log2(v) * (1.f / 3.321928095f);
   1234 }
   1235 extern float2 __attribute__((overloadable)) native_log10(float2 v) {
   1236     return native_log2(v) * (1.f / 3.321928095f);
   1237 }
   1238 extern float3 __attribute__((overloadable)) native_log10(float3 v) {
   1239     return native_log2(v) * (1.f / 3.321928095f);
   1240 }
   1241 extern float4 __attribute__((overloadable)) native_log10(float4 v) {
   1242     return native_log2(v) * (1.f / 3.321928095f);
   1243 }
   1244 
   1245 
   1246 extern float __attribute__((overloadable)) native_powr(float v, float y) {
   1247     float v2 = native_log2(v);
   1248     v2 = fmax(v2 * y, -125.f);
   1249     return native_exp2(v2);
   1250 }
   1251 extern float2 __attribute__((overloadable)) native_powr(float2 v, float2 y) {
   1252     float2 v2 = native_log2(v);
   1253     v2 = fmax(v2 * y, -125.f);
   1254     return native_exp2(v2);
   1255 }
   1256 extern float3 __attribute__((overloadable)) native_powr(float3 v, float3 y) {
   1257     float3 v2 = native_log2(v);
   1258     v2 = fmax(v2 * y, -125.f);
   1259     return native_exp2(v2);
   1260 }
   1261 extern float4 __attribute__((overloadable)) native_powr(float4 v, float4 y) {
   1262     float4 v2 = native_log2(v);
   1263     v2 = fmax(v2 * y, -125.f);
   1264     return native_exp2(v2);
   1265 }
   1266 
   1267 extern double __attribute__((overloadable)) min(double v1, double v2) {
   1268     return v1 < v2 ? v1 : v2;
   1269 }
   1270 
   1271 extern double2 __attribute__((overloadable)) min(double2 v1, double2 v2) {
   1272     double2 r;
   1273     r.x = v1.x < v2.x ? v1.x : v2.x;
   1274     r.y = v1.y < v2.y ? v1.y : v2.y;
   1275     return r;
   1276 }
   1277 
   1278 extern double3 __attribute__((overloadable)) min(double3 v1, double3 v2) {
   1279     double3 r;
   1280     r.x = v1.x < v2.x ? v1.x : v2.x;
   1281     r.y = v1.y < v2.y ? v1.y : v2.y;
   1282     r.z = v1.z < v2.z ? v1.z : v2.z;
   1283     return r;
   1284 }
   1285 
   1286 extern double4 __attribute__((overloadable)) min(double4 v1, double4 v2) {
   1287     double4 r;
   1288     r.x = v1.x < v2.x ? v1.x : v2.x;
   1289     r.y = v1.y < v2.y ? v1.y : v2.y;
   1290     r.z = v1.z < v2.z ? v1.z : v2.z;
   1291     r.w = v1.w < v2.w ? v1.w : v2.w;
   1292     return r;
   1293 }
   1294 
   1295 extern long __attribute__((overloadable)) min(long v1, long v2) {
   1296     return v1 < v2 ? v1 : v2;
   1297 }
   1298 extern long2 __attribute__((overloadable)) min(long2 v1, long2 v2) {
   1299     long2 r;
   1300     r.x = v1.x < v2.x ? v1.x : v2.x;
   1301     r.y = v1.y < v2.y ? v1.y : v2.y;
   1302     return r;
   1303 }
   1304 extern long3 __attribute__((overloadable)) min(long3 v1, long3 v2) {
   1305     long3 r;
   1306     r.x = v1.x < v2.x ? v1.x : v2.x;
   1307     r.y = v1.y < v2.y ? v1.y : v2.y;
   1308     r.z = v1.z < v2.z ? v1.z : v2.z;
   1309     return r;
   1310 }
   1311 extern long4 __attribute__((overloadable)) min(long4 v1, long4 v2) {
   1312     long4 r;
   1313     r.x = v1.x < v2.x ? v1.x : v2.x;
   1314     r.y = v1.y < v2.y ? v1.y : v2.y;
   1315     r.z = v1.z < v2.z ? v1.z : v2.z;
   1316     r.w = v1.w < v2.w ? v1.w : v2.w;
   1317     return r;
   1318 }
   1319 
   1320 extern ulong __attribute__((overloadable)) min(ulong v1, ulong v2) {
   1321     return v1 < v2 ? v1 : v2;
   1322 }
   1323 extern ulong2 __attribute__((overloadable)) min(ulong2 v1, ulong2 v2) {
   1324     ulong2 r;
   1325     r.x = v1.x < v2.x ? v1.x : v2.x;
   1326     r.y = v1.y < v2.y ? v1.y : v2.y;
   1327     return r;
   1328 }
   1329 extern ulong3 __attribute__((overloadable)) min(ulong3 v1, ulong3 v2) {
   1330     ulong3 r;
   1331     r.x = v1.x < v2.x ? v1.x : v2.x;
   1332     r.y = v1.y < v2.y ? v1.y : v2.y;
   1333     r.z = v1.z < v2.z ? v1.z : v2.z;
   1334     return r;
   1335 }
   1336 extern ulong4 __attribute__((overloadable)) min(ulong4 v1, ulong4 v2) {
   1337     ulong4 r;
   1338     r.x = v1.x < v2.x ? v1.x : v2.x;
   1339     r.y = v1.y < v2.y ? v1.y : v2.y;
   1340     r.z = v1.z < v2.z ? v1.z : v2.z;
   1341     r.w = v1.w < v2.w ? v1.w : v2.w;
   1342     return r;
   1343 }
   1344 
   1345 extern double __attribute__((overloadable)) max(double v1, double v2) {
   1346     return v1 > v2 ? v1 : v2;
   1347 }
   1348 
   1349 extern double2 __attribute__((overloadable)) max(double2 v1, double2 v2) {
   1350     double2 r;
   1351     r.x = v1.x > v2.x ? v1.x : v2.x;
   1352     r.y = v1.y > v2.y ? v1.y : v2.y;
   1353     return r;
   1354 }
   1355 
   1356 extern double3 __attribute__((overloadable)) max(double3 v1, double3 v2) {
   1357     double3 r;
   1358     r.x = v1.x > v2.x ? v1.x : v2.x;
   1359     r.y = v1.y > v2.y ? v1.y : v2.y;
   1360     r.z = v1.z > v2.z ? v1.z : v2.z;
   1361     return r;
   1362 }
   1363 
   1364 extern double4 __attribute__((overloadable)) max(double4 v1, double4 v2) {
   1365     double4 r;
   1366     r.x = v1.x > v2.x ? v1.x : v2.x;
   1367     r.y = v1.y > v2.y ? v1.y : v2.y;
   1368     r.z = v1.z > v2.z ? v1.z : v2.z;
   1369     r.w = v1.w > v2.w ? v1.w : v2.w;
   1370     return r;
   1371 }
   1372 
   1373 extern long __attribute__((overloadable)) max(long v1, long v2) {
   1374     return v1 > v2 ? v1 : v2;
   1375 }
   1376 extern long2 __attribute__((overloadable)) max(long2 v1, long2 v2) {
   1377     long2 r;
   1378     r.x = v1.x > v2.x ? v1.x : v2.x;
   1379     r.y = v1.y > v2.y ? v1.y : v2.y;
   1380     return r;
   1381 }
   1382 extern long3 __attribute__((overloadable)) max(long3 v1, long3 v2) {
   1383     long3 r;
   1384     r.x = v1.x > v2.x ? v1.x : v2.x;
   1385     r.y = v1.y > v2.y ? v1.y : v2.y;
   1386     r.z = v1.z > v2.z ? v1.z : v2.z;
   1387     return r;
   1388 }
   1389 extern long4 __attribute__((overloadable)) max(long4 v1, long4 v2) {
   1390     long4 r;
   1391     r.x = v1.x > v2.x ? v1.x : v2.x;
   1392     r.y = v1.y > v2.y ? v1.y : v2.y;
   1393     r.z = v1.z > v2.z ? v1.z : v2.z;
   1394     r.w = v1.w > v2.w ? v1.w : v2.w;
   1395     return r;
   1396 }
   1397 
   1398 extern ulong __attribute__((overloadable)) max(ulong v1, ulong v2) {
   1399     return v1 > v2 ? v1 : v2;
   1400 }
   1401 extern ulong2 __attribute__((overloadable)) max(ulong2 v1, ulong2 v2) {
   1402     ulong2 r;
   1403     r.x = v1.x > v2.x ? v1.x : v2.x;
   1404     r.y = v1.y > v2.y ? v1.y : v2.y;
   1405     return r;
   1406 }
   1407 extern ulong3 __attribute__((overloadable)) max(ulong3 v1, ulong3 v2) {
   1408     ulong3 r;
   1409     r.x = v1.x > v2.x ? v1.x : v2.x;
   1410     r.y = v1.y > v2.y ? v1.y : v2.y;
   1411     r.z = v1.z > v2.z ? v1.z : v2.z;
   1412     return r;
   1413 }
   1414 extern ulong4 __attribute__((overloadable)) max(ulong4 v1, ulong4 v2) {
   1415     ulong4 r;
   1416     r.x = v1.x > v2.x ? v1.x : v2.x;
   1417     r.y = v1.y > v2.y ? v1.y : v2.y;
   1418     r.z = v1.z > v2.z ? v1.z : v2.z;
   1419     r.w = v1.w > v2.w ? v1.w : v2.w;
   1420     return r;
   1421 }
   1422 
   1423 #define THUNK_NATIVE_F(fn) \
   1424     float __attribute__((overloadable)) native_##fn(float v) { return fn(v);} \
   1425     float2 __attribute__((overloadable)) native_##fn(float2 v) { return fn(v);} \
   1426     float3 __attribute__((overloadable)) native_##fn(float3 v) { return fn(v);} \
   1427     float4 __attribute__((overloadable)) native_##fn(float4 v) { return fn(v);}
   1428 
   1429 #define THUNK_NATIVE_F_F(fn) \
   1430     float __attribute__((overloadable)) native_##fn(float v1, float v2) { return fn(v1, v2);} \
   1431     float2 __attribute__((overloadable)) native_##fn(float2 v1, float2 v2) { return fn(v1, v2);} \
   1432     float3 __attribute__((overloadable)) native_##fn(float3 v1, float3 v2) { return fn(v1, v2);} \
   1433     float4 __attribute__((overloadable)) native_##fn(float4 v1, float4 v2) { return fn(v1, v2);}
   1434 
   1435 #define THUNK_NATIVE_F_FP(fn) \
   1436     float __attribute__((overloadable)) native_##fn(float v1, float *v2) { return fn(v1, v2);} \
   1437     float2 __attribute__((overloadable)) native_##fn(float2 v1, float2 *v2) { return fn(v1, v2);} \
   1438     float3 __attribute__((overloadable)) native_##fn(float3 v1, float3 *v2) { return fn(v1, v2);} \
   1439     float4 __attribute__((overloadable)) native_##fn(float4 v1, float4 *v2) { return fn(v1, v2);}
   1440 
   1441 #define THUNK_NATIVE_F_I(fn) \
   1442     float __attribute__((overloadable)) native_##fn(float v1, int v2) { return fn(v1, v2);} \
   1443     float2 __attribute__((overloadable)) native_##fn(float2 v1, int2 v2) { return fn(v1, v2);} \
   1444     float3 __attribute__((overloadable)) native_##fn(float3 v1, int3 v2) { return fn(v1, v2);} \
   1445     float4 __attribute__((overloadable)) native_##fn(float4 v1, int4 v2) { return fn(v1, v2);}
   1446 
   1447 THUNK_NATIVE_F(acos)
   1448 THUNK_NATIVE_F(acosh)
   1449 THUNK_NATIVE_F(acospi)
   1450 THUNK_NATIVE_F(asin)
   1451 THUNK_NATIVE_F(asinh)
   1452 THUNK_NATIVE_F(asinpi)
   1453 THUNK_NATIVE_F(atan)
   1454 THUNK_NATIVE_F_F(atan2)
   1455 THUNK_NATIVE_F(atanh)
   1456 THUNK_NATIVE_F(atanpi)
   1457 THUNK_NATIVE_F_F(atan2pi)
   1458 THUNK_NATIVE_F(cbrt)
   1459 THUNK_NATIVE_F(cos)
   1460 THUNK_NATIVE_F(cosh)
   1461 THUNK_NATIVE_F(cospi)
   1462 THUNK_NATIVE_F(expm1)
   1463 THUNK_NATIVE_F_F(hypot)
   1464 THUNK_NATIVE_F(log1p)
   1465 THUNK_NATIVE_F_I(rootn)
   1466 THUNK_NATIVE_F(rsqrt)
   1467 THUNK_NATIVE_F(sqrt)
   1468 THUNK_NATIVE_F(sin)
   1469 THUNK_NATIVE_F_FP(sincos)
   1470 THUNK_NATIVE_F(sinh)
   1471 THUNK_NATIVE_F(sinpi)
   1472 THUNK_NATIVE_F(tan)
   1473 THUNK_NATIVE_F(tanh)
   1474 THUNK_NATIVE_F(tanpi)
   1475 
   1476 #undef THUNK_NATIVE_F
   1477 #undef THUNK_NATIVE_F_F
   1478 #undef THUNK_NATIVE_F_I
   1479 #undef THUNK_NATIVE_F_FP
   1480 
   1481 float __attribute__((overloadable)) native_normalize(float v) { return fast_normalize(v);}
   1482 float2 __attribute__((overloadable)) native_normalize(float2 v) { return fast_normalize(v);}
   1483 float3 __attribute__((overloadable)) native_normalize(float3 v) { return fast_normalize(v);}
   1484 float4 __attribute__((overloadable)) native_normalize(float4 v) { return fast_normalize(v);}
   1485 
   1486 float __attribute__((overloadable)) native_distance(float v1, float v2) { return fast_distance(v1, v2);}
   1487 float __attribute__((overloadable)) native_distance(float2 v1, float2 v2) { return fast_distance(v1, v2);}
   1488 float __attribute__((overloadable)) native_distance(float3 v1, float3 v2) { return fast_distance(v1, v2);}
   1489 float __attribute__((overloadable)) native_distance(float4 v1, float4 v2) { return fast_distance(v1, v2);}
   1490 
   1491 float __attribute__((overloadable)) native_length(float v) { return fast_length(v);}
   1492 float __attribute__((overloadable)) native_length(float2 v) { return fast_length(v);}
   1493 float __attribute__((overloadable)) native_length(float3 v) { return fast_length(v);}
   1494 float __attribute__((overloadable)) native_length(float4 v) { return fast_length(v);}
   1495 
   1496 float __attribute__((overloadable)) native_divide(float v1, float v2) { return v1 / v2;}
   1497 float2 __attribute__((overloadable)) native_divide(float2 v1, float2 v2) { return v1 / v2;}
   1498 float3 __attribute__((overloadable)) native_divide(float3 v1, float3 v2) { return v1 / v2;}
   1499 float4 __attribute__((overloadable)) native_divide(float4 v1, float4 v2) { return v1 / v2;}
   1500 
   1501 float __attribute__((overloadable)) native_recip(float v) { return 1.f / v;}
   1502 float2 __attribute__((overloadable)) native_recip(float2 v) { return ((float2)1.f) / v;}
   1503 float3 __attribute__((overloadable)) native_recip(float3 v) { return ((float3)1.f) / v;}
   1504 float4 __attribute__((overloadable)) native_recip(float4 v) { return ((float4)1.f) / v;}
   1505 
   1506 
   1507 
   1508 
   1509 
   1510 #undef FN_FUNC_FN
   1511 #undef IN_FUNC_FN
   1512 #undef FN_FUNC_FN_FN
   1513 #undef FN_FUNC_FN_F
   1514 #undef FN_FUNC_FN_IN
   1515 #undef FN_FUNC_FN_I
   1516 #undef FN_FUNC_FN_PFN
   1517 #undef FN_FUNC_FN_PIN
   1518 #undef FN_FUNC_FN_FN_FN
   1519 #undef FN_FUNC_FN_FN_PIN
   1520 #undef XN_FUNC_YN
   1521 #undef UIN_FUNC_IN
   1522 #undef IN_FUNC_IN
   1523 #undef XN_FUNC_XN_XN_BODY
   1524 #undef IN_FUNC_IN_IN_BODY
   1525