Home | History | Annotate | Download | only in runtime
      1 #include "rs_types.rsh"
      2 
      3 extern float2 __attribute__((overloadable)) convert_float2(int2 c);
      4 extern float3 __attribute__((overloadable)) convert_float3(int3 c);
      5 extern float4 __attribute__((overloadable)) convert_float4(int4 c);
      6 
      7 // Float ops, 6.11.2
      8 
      9 #define FN_FUNC_FN(fnc)                                         \
     10 extern float2 __attribute__((overloadable)) fnc(float2 v) { \
     11     float2 r;                                                   \
     12     r.x = fnc(v.x);                                             \
     13     r.y = fnc(v.y);                                             \
     14     return r;                                                   \
     15 }                                                               \
     16 extern float3 __attribute__((overloadable)) fnc(float3 v) { \
     17     float3 r;                                                   \
     18     r.x = fnc(v.x);                                             \
     19     r.y = fnc(v.y);                                             \
     20     r.z = fnc(v.z);                                             \
     21     return r;                                                   \
     22 }                                                               \
     23 extern float4 __attribute__((overloadable)) fnc(float4 v) { \
     24     float4 r;                                                   \
     25     r.x = fnc(v.x);                                             \
     26     r.y = fnc(v.y);                                             \
     27     r.z = fnc(v.z);                                             \
     28     r.w = fnc(v.w);                                             \
     29     return r;                                                   \
     30 }
     31 
     32 #define IN_FUNC_FN(fnc)                                         \
     33 extern int2 __attribute__((overloadable)) fnc(float2 v) {   \
     34     int2 r;                                                     \
     35     r.x = fnc(v.x);                                             \
     36     r.y = fnc(v.y);                                             \
     37     return r;                                                   \
     38 }                                                               \
     39 extern int3 __attribute__((overloadable)) fnc(float3 v) {   \
     40     int3 r;                                                     \
     41     r.x = fnc(v.x);                                             \
     42     r.y = fnc(v.y);                                             \
     43     r.z = fnc(v.z);                                             \
     44     return r;                                                   \
     45 }                                                               \
     46 extern int4 __attribute__((overloadable)) fnc(float4 v) {   \
     47     int4 r;                                                     \
     48     r.x = fnc(v.x);                                             \
     49     r.y = fnc(v.y);                                             \
     50     r.z = fnc(v.z);                                             \
     51     r.w = fnc(v.w);                                             \
     52     return r;                                                   \
     53 }
     54 
     55 #define FN_FUNC_FN_FN(fnc)                                                  \
     56 extern float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2) { \
     57     float2 r;                                                               \
     58     r.x = fnc(v1.x, v2.x);                                                  \
     59     r.y = fnc(v1.y, v2.y);                                                  \
     60     return r;                                                               \
     61 }                                                                           \
     62 extern float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2) { \
     63     float3 r;                                                               \
     64     r.x = fnc(v1.x, v2.x);                                                  \
     65     r.y = fnc(v1.y, v2.y);                                                  \
     66     r.z = fnc(v1.z, v2.z);                                                  \
     67     return r;                                                               \
     68 }                                                                           \
     69 extern float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2) { \
     70     float4 r;                                                               \
     71     r.x = fnc(v1.x, v2.x);                                                  \
     72     r.y = fnc(v1.y, v2.y);                                                  \
     73     r.z = fnc(v1.z, v2.z);                                                  \
     74     r.w = fnc(v1.w, v2.w);                                                  \
     75     return r;                                                               \
     76 }
     77 
     78 #define FN_FUNC_FN_F(fnc)                                                   \
     79 extern float2 __attribute__((overloadable)) fnc(float2 v1, float v2) {  \
     80     float2 r;                                                               \
     81     r.x = fnc(v1.x, v2);                                                    \
     82     r.y = fnc(v1.y, v2);                                                    \
     83     return r;                                                               \
     84 }                                                                           \
     85 extern float3 __attribute__((overloadable)) fnc(float3 v1, float v2) {  \
     86     float3 r;                                                               \
     87     r.x = fnc(v1.x, v2);                                                    \
     88     r.y = fnc(v1.y, v2);                                                    \
     89     r.z = fnc(v1.z, v2);                                                    \
     90     return r;                                                               \
     91 }                                                                           \
     92 extern float4 __attribute__((overloadable)) fnc(float4 v1, float v2) {  \
     93     float4 r;                                                               \
     94     r.x = fnc(v1.x, v2);                                                    \
     95     r.y = fnc(v1.y, v2);                                                    \
     96     r.z = fnc(v1.z, v2);                                                    \
     97     r.w = fnc(v1.w, v2);                                                    \
     98     return r;                                                               \
     99 }
    100 
    101 #define FN_FUNC_FN_IN(fnc)                                                  \
    102 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2) {   \
    103     float2 r;                                                               \
    104     r.x = fnc(v1.x, v2.x);                                                  \
    105     r.y = fnc(v1.y, v2.y);                                                  \
    106     return r;                                                               \
    107 }                                                                           \
    108 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2) {   \
    109     float3 r;                                                               \
    110     r.x = fnc(v1.x, v2.x);                                                  \
    111     r.y = fnc(v1.y, v2.y);                                                  \
    112     r.z = fnc(v1.z, v2.z);                                                  \
    113     return r;                                                               \
    114 }                                                                           \
    115 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2) {   \
    116     float4 r;                                                               \
    117     r.x = fnc(v1.x, v2.x);                                                  \
    118     r.y = fnc(v1.y, v2.y);                                                  \
    119     r.z = fnc(v1.z, v2.z);                                                  \
    120     r.w = fnc(v1.w, v2.w);                                                  \
    121     return r;                                                               \
    122 }
    123 
    124 #define FN_FUNC_FN_I(fnc)                                                   \
    125 extern float2 __attribute__((overloadable)) fnc(float2 v1, int v2) {    \
    126     float2 r;                                                               \
    127     r.x = fnc(v1.x, v2);                                                    \
    128     r.y = fnc(v1.y, v2);                                                    \
    129     return r;                                                               \
    130 }                                                                           \
    131 extern float3 __attribute__((overloadable)) fnc(float3 v1, int v2) {    \
    132     float3 r;                                                               \
    133     r.x = fnc(v1.x, v2);                                                    \
    134     r.y = fnc(v1.y, v2);                                                    \
    135     r.z = fnc(v1.z, v2);                                                    \
    136     return r;                                                               \
    137 }                                                                           \
    138 extern float4 __attribute__((overloadable)) fnc(float4 v1, int v2) {    \
    139     float4 r;                                                               \
    140     r.x = fnc(v1.x, v2);                                                    \
    141     r.y = fnc(v1.y, v2);                                                    \
    142     r.z = fnc(v1.z, v2);                                                    \
    143     r.w = fnc(v1.w, v2);                                                    \
    144     return r;                                                               \
    145 }
    146 
    147 #define FN_FUNC_FN_PFN(fnc)                     \
    148 extern float2 __attribute__((overloadable)) \
    149         fnc(float2 v1, float2 *v2) {            \
    150     float2 r;                                   \
    151     float t[2];                                 \
    152     r.x = fnc(v1.x, &t[0]);                     \
    153     r.y = fnc(v1.y, &t[1]);                     \
    154     v2->x = t[0];                               \
    155     v2->y = t[1];                               \
    156     return r;                                   \
    157 }                                               \
    158 extern float3 __attribute__((overloadable)) \
    159         fnc(float3 v1, float3 *v2) {            \
    160     float3 r;                                   \
    161     float t[3];                                 \
    162     r.x = fnc(v1.x, &t[0]);                     \
    163     r.y = fnc(v1.y, &t[1]);                     \
    164     r.z = fnc(v1.z, &t[2]);                     \
    165     v2->x = t[0];                               \
    166     v2->y = t[1];                               \
    167     v2->z = t[2];                               \
    168     return r;                                   \
    169 }                                               \
    170 extern float4 __attribute__((overloadable)) \
    171         fnc(float4 v1, float4 *v2) {            \
    172     float4 r;                                   \
    173     float t[4];                                 \
    174     r.x = fnc(v1.x, &t[0]);                     \
    175     r.y = fnc(v1.y, &t[1]);                     \
    176     r.z = fnc(v1.z, &t[2]);                     \
    177     r.w = fnc(v1.w, &t[3]);                     \
    178     v2->x = t[0];                               \
    179     v2->y = t[1];                               \
    180     v2->z = t[2];                               \
    181     v2->w = t[3];                               \
    182     return r;                                   \
    183 }
    184 
    185 #define FN_FUNC_FN_PIN(fnc)                                                 \
    186 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2) {  \
    187     float2 r;                                                               \
    188     int t[2];                                                               \
    189     r.x = fnc(v1.x, &t[0]);                                                 \
    190     r.y = fnc(v1.y, &t[1]);                                                 \
    191     v2->x = t[0];                                                           \
    192     v2->y = t[1];                                                           \
    193     return r;                                                               \
    194 }                                                                           \
    195 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2) {  \
    196     float3 r;                                                               \
    197     int t[3];                                                               \
    198     r.x = fnc(v1.x, &t[0]);                                                 \
    199     r.y = fnc(v1.y, &t[1]);                                                 \
    200     r.z = fnc(v1.z, &t[2]);                                                 \
    201     v2->x = t[0];                                                           \
    202     v2->y = t[1];                                                           \
    203     v2->z = t[2];                                                           \
    204     return r;                                                               \
    205 }                                                                           \
    206 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 *v2) {  \
    207     float4 r;                                                               \
    208     int t[4];                                                               \
    209     r.x = fnc(v1.x, &t[0]);                                                 \
    210     r.y = fnc(v1.y, &t[1]);                                                 \
    211     r.z = fnc(v1.z, &t[2]);                                                 \
    212     r.w = fnc(v1.w, &t[3]);                                                 \
    213     v2->x = t[0];                                                           \
    214     v2->y = t[1];                                                           \
    215     v2->z = t[2];                                                           \
    216     v2->w = t[3];                                                           \
    217     return r;                                                               \
    218 }
    219 
    220 #define FN_FUNC_FN_FN_FN(fnc)                   \
    221 extern float2 __attribute__((overloadable)) \
    222         fnc(float2 v1, float2 v2, float2 v3) {  \
    223     float2 r;                                   \
    224     r.x = fnc(v1.x, v2.x, v3.x);                \
    225     r.y = fnc(v1.y, v2.y, v3.y);                \
    226     return r;                                   \
    227 }                                               \
    228 extern float3 __attribute__((overloadable)) \
    229         fnc(float3 v1, float3 v2, float3 v3) {  \
    230     float3 r;                                   \
    231     r.x = fnc(v1.x, v2.x, v3.x);                \
    232     r.y = fnc(v1.y, v2.y, v3.y);                \
    233     r.z = fnc(v1.z, v2.z, v3.z);                \
    234     return r;                                   \
    235 }                                               \
    236 extern float4 __attribute__((overloadable)) \
    237         fnc(float4 v1, float4 v2, float4 v3) {  \
    238     float4 r;                                   \
    239     r.x = fnc(v1.x, v2.x, v3.x);                \
    240     r.y = fnc(v1.y, v2.y, v3.y);                \
    241     r.z = fnc(v1.z, v2.z, v3.z);                \
    242     r.w = fnc(v1.w, v2.w, v3.w);                \
    243     return r;                                   \
    244 }
    245 
    246 #define FN_FUNC_FN_FN_PIN(fnc)                  \
    247 extern float2 __attribute__((overloadable)) \
    248         fnc(float2 v1, float2 v2, int2 *v3) {   \
    249     float2 r;                                   \
    250     int t[2];                                   \
    251     r.x = fnc(v1.x, v2.x, &t[0]);               \
    252     r.y = fnc(v1.y, v2.y, &t[1]);               \
    253     v3->x = t[0];                               \
    254     v3->y = t[1];                               \
    255     return r;                                   \
    256 }                                               \
    257 extern float3 __attribute__((overloadable)) \
    258         fnc(float3 v1, float3 v2, int3 *v3) {   \
    259     float3 r;                                   \
    260     int t[3];                                   \
    261     r.x = fnc(v1.x, v2.x, &t[0]);               \
    262     r.y = fnc(v1.y, v2.y, &t[1]);               \
    263     r.z = fnc(v1.z, v2.z, &t[2]);               \
    264     v3->x = t[0];                               \
    265     v3->y = t[1];                               \
    266     v3->z = t[2];                               \
    267     return r;                                   \
    268 }                                               \
    269 extern float4 __attribute__((overloadable)) \
    270         fnc(float4 v1, float4 v2, int4 *v3) {   \
    271     float4 r;                                   \
    272     int t[4];                                   \
    273     r.x = fnc(v1.x, v2.x, &t[0]);               \
    274     r.y = fnc(v1.y, v2.y, &t[1]);               \
    275     r.z = fnc(v1.z, v2.z, &t[2]);               \
    276     r.w = fnc(v1.w, v2.w, &t[3]);               \
    277     v3->x = t[0];                               \
    278     v3->y = t[1];                               \
    279     v3->z = t[2];                               \
    280     v3->w = t[3];                               \
    281     return r;                                   \
    282 }
    283 
    284 static const int iposinf = 0x7f800000;
    285 static const int ineginf = 0xff800000;
    286 
    287 static const float posinf() {
    288     float f = *((float*)&iposinf);
    289     return f;
    290 }
    291 
    292 static const float neginf() {
    293     float f = *((float*)&ineginf);
    294     return f;
    295 }
    296 
    297 static bool isinf(float f) {
    298     int i = *((int*)(void*)&f);
    299     return (i == iposinf) || (i == ineginf);
    300 }
    301 
    302 static bool isnan(float f) {
    303     int i = *((int*)(void*)&f);
    304     return (((i & 0x7f800000) == 0x7f800000) && (i & 0x007fffff));
    305 }
    306 
    307 static bool isposzero(float f) {
    308     int i = *((int*)(void*)&f);
    309     return (i == 0x00000000);
    310 }
    311 
    312 static bool isnegzero(float f) {
    313     int i = *((int*)(void*)&f);
    314     return (i == 0x80000000);
    315 }
    316 
    317 static bool iszero(float f) {
    318     return isposzero(f) || isnegzero(f);
    319 }
    320 
    321 
    322 extern float __attribute__((overloadable)) acos(float);
    323 FN_FUNC_FN(acos)
    324 
    325 extern float __attribute__((overloadable)) acosh(float);
    326 FN_FUNC_FN(acosh)
    327 
    328 
    329 extern float __attribute__((overloadable)) acospi(float v) {
    330     return acos(v) / M_PI;
    331 }
    332 FN_FUNC_FN(acospi)
    333 
    334 extern float __attribute__((overloadable)) asin(float);
    335 FN_FUNC_FN(asin)
    336 
    337 extern float __attribute__((overloadable)) asinh(float);
    338 FN_FUNC_FN(asinh)
    339 
    340 extern float __attribute__((overloadable)) asinpi(float v) {
    341     return asin(v) / M_PI;
    342 }
    343 FN_FUNC_FN(asinpi)
    344 
    345 extern float __attribute__((overloadable)) atan(float);
    346 FN_FUNC_FN(atan)
    347 
    348 extern float __attribute__((overloadable)) atan2(float, float);
    349 FN_FUNC_FN_FN(atan2)
    350 
    351 extern float __attribute__((overloadable)) atanh(float);
    352 FN_FUNC_FN(atanh)
    353 
    354 extern float __attribute__((overloadable)) atanpi(float v) {
    355     return atan(v) / M_PI;
    356 }
    357 FN_FUNC_FN(atanpi)
    358 
    359 
    360 extern float __attribute__((overloadable)) atan2pi(float y, float x) {
    361     return atan2(y, x) / M_PI;
    362 }
    363 FN_FUNC_FN_FN(atan2pi)
    364 
    365 extern float __attribute__((overloadable)) cbrt(float);
    366 FN_FUNC_FN(cbrt)
    367 
    368 extern float __attribute__((overloadable)) ceil(float);
    369 FN_FUNC_FN(ceil)
    370 
    371 extern float __attribute__((overloadable)) copysign(float, float);
    372 FN_FUNC_FN_FN(copysign)
    373 
    374 extern float __attribute__((overloadable)) cos(float);
    375 FN_FUNC_FN(cos)
    376 
    377 extern float __attribute__((overloadable)) cosh(float);
    378 FN_FUNC_FN(cosh)
    379 
    380 extern float __attribute__((overloadable)) cospi(float v) {
    381     return cos(v * M_PI);
    382 }
    383 FN_FUNC_FN(cospi)
    384 
    385 extern float __attribute__((overloadable)) erfc(float);
    386 FN_FUNC_FN(erfc)
    387 
    388 extern float __attribute__((overloadable)) erf(float);
    389 FN_FUNC_FN(erf)
    390 
    391 extern float __attribute__((overloadable)) exp(float);
    392 FN_FUNC_FN(exp)
    393 
    394 extern float __attribute__((overloadable)) exp2(float);
    395 FN_FUNC_FN(exp2)
    396 
    397 extern float __attribute__((overloadable)) pow(float, float);
    398 
    399 extern float __attribute__((overloadable)) exp10(float v) {
    400     return pow(10.f, v);
    401 }
    402 FN_FUNC_FN(exp10)
    403 
    404 extern float __attribute__((overloadable)) expm1(float);
    405 FN_FUNC_FN(expm1)
    406 
    407 extern float __attribute__((overloadable)) fabs(float);
    408 FN_FUNC_FN(fabs)
    409 
    410 extern float __attribute__((overloadable)) fdim(float, float);
    411 FN_FUNC_FN_FN(fdim)
    412 
    413 extern float __attribute__((overloadable)) floor(float);
    414 FN_FUNC_FN(floor)
    415 
    416 extern float __attribute__((overloadable)) fma(float, float, float);
    417 FN_FUNC_FN_FN_FN(fma)
    418 
    419 extern float __attribute__((overloadable)) fmin(float, float);
    420 
    421 extern float __attribute__((overloadable)) fmod(float, float);
    422 FN_FUNC_FN_FN(fmod)
    423 
    424 extern float __attribute__((overloadable)) fract(float v, float *iptr) {
    425     int i = (int)floor(v);
    426     iptr[0] = i;
    427     return fmin(v - i, 0x1.fffffep-1f);
    428 }
    429 FN_FUNC_FN_PFN(fract)
    430 
    431 extern float __attribute__((overloadable)) frexp(float, int *);
    432 FN_FUNC_FN_PIN(frexp)
    433 
    434 extern float __attribute__((overloadable)) hypot(float, float);
    435 FN_FUNC_FN_FN(hypot)
    436 
    437 extern int __attribute__((overloadable)) ilogb(float);
    438 IN_FUNC_FN(ilogb)
    439 
    440 extern float __attribute__((overloadable)) ldexp(float, int);
    441 FN_FUNC_FN_IN(ldexp)
    442 FN_FUNC_FN_I(ldexp)
    443 
    444 extern float __attribute__((overloadable)) lgamma(float);
    445 FN_FUNC_FN(lgamma)
    446 extern float __attribute__((overloadable)) lgamma(float, int*);
    447 FN_FUNC_FN_PIN(lgamma)
    448 
    449 extern float __attribute__((overloadable)) log(float);
    450 FN_FUNC_FN(log)
    451 
    452 extern float __attribute__((overloadable)) log10(float);
    453 FN_FUNC_FN(log10)
    454 
    455 
    456 extern float __attribute__((overloadable)) log2(float v) {
    457     return log10(v) / log10(2.f);
    458 }
    459 FN_FUNC_FN(log2)
    460 
    461 extern float __attribute__((overloadable)) log1p(float);
    462 FN_FUNC_FN(log1p)
    463 
    464 extern float __attribute__((overloadable)) logb(float);
    465 FN_FUNC_FN(logb)
    466 
    467 extern float __attribute__((overloadable)) mad(float a, float b, float c) {
    468     return a * b + c;
    469 }
    470 extern float2 __attribute__((overloadable)) mad(float2 a, float2 b, float2 c) {
    471     return a * b + c;
    472 }
    473 extern float3 __attribute__((overloadable)) mad(float3 a, float3 b, float3 c) {
    474     return a * b + c;
    475 }
    476 extern float4 __attribute__((overloadable)) mad(float4 a, float4 b, float4 c) {
    477     return a * b + c;
    478 }
    479 
    480 extern float __attribute__((overloadable)) modf(float, float *);
    481 FN_FUNC_FN_PFN(modf);
    482 
    483 extern float __attribute__((overloadable)) nan(uint v) {
    484     float f[1];
    485     uint32_t *ip = (uint32_t *)f;
    486     *ip = v | 0x7fc00000;
    487     return f[0];
    488 }
    489 
    490 extern float __attribute__((overloadable)) nextafter(float, float);
    491 FN_FUNC_FN_FN(nextafter)
    492 
    493 FN_FUNC_FN_FN(pow)
    494 
    495 extern float __attribute__((overloadable)) pown(float v, int p) {
    496     return pow(v, (float)p);
    497 }
    498 extern float2 __attribute__((overloadable)) pown(float2 v, int2 p) {
    499     float2 f2 = convert_float2(p);
    500     return pow(v, f2);
    501 }
    502 extern float3 __attribute__((overloadable)) pown(float3 v, int3 p) {
    503     float3 f3 = convert_float3(p);
    504     return pow(v, f3);
    505 }
    506 extern float4 __attribute__((overloadable)) pown(float4 v, int4 p) {
    507     float4 f4 = convert_float4(p);
    508     return pow(v, f4);
    509 }
    510 
    511 extern float __attribute__((overloadable)) powr(float v, float p) {
    512     return pow(v, p);
    513 }
    514 extern float2 __attribute__((overloadable)) powr(float2 v, float2 p) {
    515     return pow(v, p);
    516 }
    517 extern float3 __attribute__((overloadable)) powr(float3 v, float3 p) {
    518     return pow(v, p);
    519 }
    520 extern float4 __attribute__((overloadable)) powr(float4 v, float4 p) {
    521     return pow(v, p);
    522 }
    523 
    524 extern float __attribute__((overloadable)) remainder(float, float);
    525 FN_FUNC_FN_FN(remainder)
    526 
    527 extern float __attribute__((overloadable)) remquo(float, float, int *);
    528 FN_FUNC_FN_FN_PIN(remquo)
    529 
    530 extern float __attribute__((overloadable)) rint(float);
    531 FN_FUNC_FN(rint)
    532 
    533 extern float __attribute__((overloadable)) rootn(float v, int r) {
    534     if (r == 0) {
    535         return nan(0);
    536     }
    537 
    538     if (iszero(v)) {
    539         if (r < 0) {
    540             if (r & 1) {
    541                 return copysign(posinf(), v);
    542             } else {
    543                 return posinf();
    544             }
    545         } else {
    546             if (r & 1) {
    547                 return copysign(0.f, v);
    548             } else {
    549                 return 0.f;
    550             }
    551         }
    552     }
    553 
    554     if (!isinf(v) && !isnan(v) && (v < 0.f)) {
    555         if (r & 1) {
    556             return (-1.f * pow(-1.f * v, 1.f / r));
    557         } else {
    558             return nan(0);
    559         }
    560     }
    561 
    562     return pow(v, 1.f / r);
    563 }
    564 FN_FUNC_FN_IN(rootn);
    565 
    566 extern float __attribute__((overloadable)) round(float);
    567 FN_FUNC_FN(round)
    568 
    569 
    570 extern float __attribute__((overloadable)) sqrt(float);
    571 extern float __attribute__((overloadable)) rsqrt(float v) {
    572     return 1.f / sqrt(v);
    573 }
    574 FN_FUNC_FN(rsqrt)
    575 
    576 extern float __attribute__((overloadable)) sin(float);
    577 FN_FUNC_FN(sin)
    578 
    579 extern float __attribute__((overloadable)) sincos(float v, float *cosptr) {
    580     *cosptr = cos(v);
    581     return sin(v);
    582 }
    583 extern float2 __attribute__((overloadable)) sincos(float2 v, float2 *cosptr) {
    584     *cosptr = cos(v);
    585     return sin(v);
    586 }
    587 extern float3 __attribute__((overloadable)) sincos(float3 v, float3 *cosptr) {
    588     *cosptr = cos(v);
    589     return sin(v);
    590 }
    591 extern float4 __attribute__((overloadable)) sincos(float4 v, float4 *cosptr) {
    592     *cosptr = cos(v);
    593     return sin(v);
    594 }
    595 
    596 extern float __attribute__((overloadable)) sinh(float);
    597 FN_FUNC_FN(sinh)
    598 
    599 extern float __attribute__((overloadable)) sinpi(float v) {
    600     return sin(v * M_PI);
    601 }
    602 FN_FUNC_FN(sinpi)
    603 
    604 FN_FUNC_FN(sqrt)
    605 
    606 extern float __attribute__((overloadable)) tan(float);
    607 FN_FUNC_FN(tan)
    608 
    609 extern float __attribute__((overloadable)) tanh(float);
    610 FN_FUNC_FN(tanh)
    611 
    612 extern float __attribute__((overloadable)) tanpi(float v) {
    613     return tan(v * M_PI);
    614 }
    615 FN_FUNC_FN(tanpi)
    616 
    617 
    618 extern float __attribute__((overloadable)) tgamma(float);
    619 FN_FUNC_FN(tgamma)
    620 
    621 extern float __attribute__((overloadable)) trunc(float);
    622 FN_FUNC_FN(trunc)
    623 
    624 // Int ops (partial), 6.11.3
    625 
    626 #define XN_FUNC_YN(typeout, fnc, typein)                                \
    627 extern typeout __attribute__((overloadable)) fnc(typein);               \
    628 extern typeout##2 __attribute__((overloadable)) fnc(typein##2 v) {  \
    629     typeout##2 r;                                                       \
    630     r.x = fnc(v.x);                                                     \
    631     r.y = fnc(v.y);                                                     \
    632     return r;                                                           \
    633 }                                                                       \
    634 extern typeout##3 __attribute__((overloadable)) fnc(typein##3 v) {  \
    635     typeout##3 r;                                                       \
    636     r.x = fnc(v.x);                                                     \
    637     r.y = fnc(v.y);                                                     \
    638     r.z = fnc(v.z);                                                     \
    639     return r;                                                           \
    640 }                                                                       \
    641 extern typeout##4 __attribute__((overloadable)) fnc(typein##4 v) {  \
    642     typeout##4 r;                                                       \
    643     r.x = fnc(v.x);                                                     \
    644     r.y = fnc(v.y);                                                     \
    645     r.z = fnc(v.z);                                                     \
    646     r.w = fnc(v.w);                                                     \
    647     return r;                                                           \
    648 }
    649 
    650 
    651 #define UIN_FUNC_IN(fnc)          \
    652 XN_FUNC_YN(uchar, fnc, char)      \
    653 XN_FUNC_YN(ushort, fnc, short)    \
    654 XN_FUNC_YN(uint, fnc, int)
    655 
    656 #define IN_FUNC_IN(fnc)           \
    657 XN_FUNC_YN(uchar, fnc, uchar)     \
    658 XN_FUNC_YN(char, fnc, char)       \
    659 XN_FUNC_YN(ushort, fnc, ushort)   \
    660 XN_FUNC_YN(short, fnc, short)     \
    661 XN_FUNC_YN(uint, fnc, uint)       \
    662 XN_FUNC_YN(int, fnc, int)
    663 
    664 
    665 #define XN_FUNC_XN_XN_BODY(type, fnc, body)         \
    666 extern type __attribute__((overloadable))       \
    667         fnc(type v1, type v2) {                     \
    668     return body;                                    \
    669 }                                                   \
    670 extern type##2 __attribute__((overloadable))    \
    671         fnc(type##2 v1, type##2 v2) {               \
    672     type##2 r;                                      \
    673     r.x = fnc(v1.x, v2.x);                          \
    674     r.y = fnc(v1.y, v2.y);                          \
    675     return r;                                       \
    676 }                                                   \
    677 extern type##3 __attribute__((overloadable))    \
    678         fnc(type##3 v1, type##3 v2) {               \
    679     type##3 r;                                      \
    680     r.x = fnc(v1.x, v2.x);                          \
    681     r.y = fnc(v1.y, v2.y);                          \
    682     r.z = fnc(v1.z, v2.z);                          \
    683     return r;                                       \
    684 }                                                   \
    685 extern type##4 __attribute__((overloadable))    \
    686         fnc(type##4 v1, type##4 v2) {               \
    687     type##4 r;                                      \
    688     r.x = fnc(v1.x, v2.x);                          \
    689     r.y = fnc(v1.y, v2.y);                          \
    690     r.z = fnc(v1.z, v2.z);                          \
    691     r.w = fnc(v1.w, v2.w);                          \
    692     return r;                                       \
    693 }
    694 
    695 #define IN_FUNC_IN_IN_BODY(fnc, body) \
    696 XN_FUNC_XN_XN_BODY(uchar, fnc, body)  \
    697 XN_FUNC_XN_XN_BODY(char, fnc, body)   \
    698 XN_FUNC_XN_XN_BODY(ushort, fnc, body) \
    699 XN_FUNC_XN_XN_BODY(short, fnc, body)  \
    700 XN_FUNC_XN_XN_BODY(uint, fnc, body)   \
    701 XN_FUNC_XN_XN_BODY(int, fnc, body)    \
    702 XN_FUNC_XN_XN_BODY(float, fnc, body)
    703 
    704 UIN_FUNC_IN(abs)
    705 IN_FUNC_IN(clz)
    706 
    707 
    708 // 6.11.4
    709 
    710 
    711 extern float __attribute__((overloadable)) degrees(float radians) {
    712     return radians * (180.f / M_PI);
    713 }
    714 extern float2 __attribute__((overloadable)) degrees(float2 radians) {
    715     return radians * (180.f / M_PI);
    716 }
    717 extern float3 __attribute__((overloadable)) degrees(float3 radians) {
    718     return radians * (180.f / M_PI);
    719 }
    720 extern float4 __attribute__((overloadable)) degrees(float4 radians) {
    721     return radians * (180.f / M_PI);
    722 }
    723 
    724 extern float __attribute__((overloadable)) mix(float start, float stop, float amount) {
    725     return start + (stop - start) * amount;
    726 }
    727 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount) {
    728     return start + (stop - start) * amount;
    729 }
    730 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount) {
    731     return start + (stop - start) * amount;
    732 }
    733 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount) {
    734     return start + (stop - start) * amount;
    735 }
    736 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount) {
    737     return start + (stop - start) * amount;
    738 }
    739 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount) {
    740     return start + (stop - start) * amount;
    741 }
    742 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount) {
    743     return start + (stop - start) * amount;
    744 }
    745 
    746 extern float __attribute__((overloadable)) radians(float degrees) {
    747     return degrees * (M_PI / 180.f);
    748 }
    749 extern float2 __attribute__((overloadable)) radians(float2 degrees) {
    750     return degrees * (M_PI / 180.f);
    751 }
    752 extern float3 __attribute__((overloadable)) radians(float3 degrees) {
    753     return degrees * (M_PI / 180.f);
    754 }
    755 extern float4 __attribute__((overloadable)) radians(float4 degrees) {
    756     return degrees * (M_PI / 180.f);
    757 }
    758 
    759 extern float __attribute__((overloadable)) step(float edge, float v) {
    760     return (v < edge) ? 0.f : 1.f;
    761 }
    762 extern float2 __attribute__((overloadable)) step(float2 edge, float2 v) {
    763     float2 r;
    764     r.x = (v.x < edge.x) ? 0.f : 1.f;
    765     r.y = (v.y < edge.y) ? 0.f : 1.f;
    766     return r;
    767 }
    768 extern float3 __attribute__((overloadable)) step(float3 edge, float3 v) {
    769     float3 r;
    770     r.x = (v.x < edge.x) ? 0.f : 1.f;
    771     r.y = (v.y < edge.y) ? 0.f : 1.f;
    772     r.z = (v.z < edge.z) ? 0.f : 1.f;
    773     return r;
    774 }
    775 extern float4 __attribute__((overloadable)) step(float4 edge, float4 v) {
    776     float4 r;
    777     r.x = (v.x < edge.x) ? 0.f : 1.f;
    778     r.y = (v.y < edge.y) ? 0.f : 1.f;
    779     r.z = (v.z < edge.z) ? 0.f : 1.f;
    780     r.w = (v.w < edge.w) ? 0.f : 1.f;
    781     return r;
    782 }
    783 extern float2 __attribute__((overloadable)) step(float2 edge, float v) {
    784     float2 r;
    785     r.x = (v < edge.x) ? 0.f : 1.f;
    786     r.y = (v < edge.y) ? 0.f : 1.f;
    787     return r;
    788 }
    789 extern float3 __attribute__((overloadable)) step(float3 edge, float v) {
    790     float3 r;
    791     r.x = (v < edge.x) ? 0.f : 1.f;
    792     r.y = (v < edge.y) ? 0.f : 1.f;
    793     r.z = (v < edge.z) ? 0.f : 1.f;
    794     return r;
    795 }
    796 extern float4 __attribute__((overloadable)) step(float4 edge, float v) {
    797     float4 r;
    798     r.x = (v < edge.x) ? 0.f : 1.f;
    799     r.y = (v < edge.y) ? 0.f : 1.f;
    800     r.z = (v < edge.z) ? 0.f : 1.f;
    801     r.w = (v < edge.w) ? 0.f : 1.f;
    802     return r;
    803 }
    804 
    805 extern float __attribute__((overloadable)) smoothstep(float, float, float);
    806 extern float2 __attribute__((overloadable)) smoothstep(float2, float2, float2);
    807 extern float3 __attribute__((overloadable)) smoothstep(float3, float3, float3);
    808 extern float4 __attribute__((overloadable)) smoothstep(float4, float4, float4);
    809 extern float2 __attribute__((overloadable)) smoothstep(float, float, float2);
    810 extern float3 __attribute__((overloadable)) smoothstep(float, float, float3);
    811 extern float4 __attribute__((overloadable)) smoothstep(float, float, float4);
    812 
    813 extern float __attribute__((overloadable)) sign(float v) {
    814     if (v > 0) return 1.f;
    815     if (v < 0) return -1.f;
    816     return v;
    817 }
    818 FN_FUNC_FN(sign)
    819 
    820 
    821 // 6.11.5
    822 extern float3 __attribute__((overloadable)) cross(float3 lhs, float3 rhs) {
    823     float3 r;
    824     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
    825     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
    826     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
    827     return r;
    828 }
    829 
    830 extern float4 __attribute__((overloadable)) cross(float4 lhs, float4 rhs) {
    831     float4 r;
    832     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
    833     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
    834     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
    835     r.w = 0.f;
    836     return r;
    837 }
    838 
    839 extern float __attribute__((overloadable)) dot(float lhs, float rhs) {
    840     return lhs * rhs;
    841 }
    842 extern float __attribute__((overloadable)) dot(float2 lhs, float2 rhs) {
    843     return lhs.x*rhs.x + lhs.y*rhs.y;
    844 }
    845 extern float __attribute__((overloadable)) dot(float3 lhs, float3 rhs) {
    846     return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z;
    847 }
    848 extern float __attribute__((overloadable)) dot(float4 lhs, float4 rhs) {
    849     return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z + lhs.w*rhs.w;
    850 }
    851 
    852 extern float __attribute__((overloadable)) length(float v) {
    853     return fabs(v);
    854 }
    855 extern float __attribute__((overloadable)) length(float2 v) {
    856     return sqrt(v.x*v.x + v.y*v.y);
    857 }
    858 extern float __attribute__((overloadable)) length(float3 v) {
    859     return sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
    860 }
    861 extern float __attribute__((overloadable)) length(float4 v) {
    862     return sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
    863 }
    864 
    865 extern float __attribute__((overloadable)) distance(float lhs, float rhs) {
    866     return length(lhs - rhs);
    867 }
    868 extern float __attribute__((overloadable)) distance(float2 lhs, float2 rhs) {
    869     return length(lhs - rhs);
    870 }
    871 extern float __attribute__((overloadable)) distance(float3 lhs, float3 rhs) {
    872     return length(lhs - rhs);
    873 }
    874 extern float __attribute__((overloadable)) distance(float4 lhs, float4 rhs) {
    875     return length(lhs - rhs);
    876 }
    877 
    878 extern float __attribute__((overloadable)) normalize(float v) {
    879     return 1.f;
    880 }
    881 extern float2 __attribute__((overloadable)) normalize(float2 v) {
    882     return v / length(v);
    883 }
    884 extern float3 __attribute__((overloadable)) normalize(float3 v) {
    885     return v / length(v);
    886 }
    887 extern float4 __attribute__((overloadable)) normalize(float4 v) {
    888     return v / length(v);
    889 }
    890 
    891 extern float __attribute__((overloadable)) half_sqrt(float);
    892 
    893 extern float __attribute__((overloadable)) fast_length(float v) {
    894     return v;
    895 }
    896 extern float __attribute__((overloadable)) fast_length(float2 v) {
    897     return half_sqrt(v.x*v.x + v.y*v.y);
    898 }
    899 extern float __attribute__((overloadable)) fast_length(float3 v) {
    900     return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
    901 }
    902 extern float __attribute__((overloadable)) fast_length(float4 v) {
    903     return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
    904 }
    905 
    906 extern float __attribute__((overloadable)) fast_distance(float lhs, float rhs) {
    907     return fast_length(lhs - rhs);
    908 }
    909 extern float __attribute__((overloadable)) fast_distance(float2 lhs, float2 rhs) {
    910     return fast_length(lhs - rhs);
    911 }
    912 extern float __attribute__((overloadable)) fast_distance(float3 lhs, float3 rhs) {
    913     return fast_length(lhs - rhs);
    914 }
    915 extern float __attribute__((overloadable)) fast_distance(float4 lhs, float4 rhs) {
    916     return fast_length(lhs - rhs);
    917 }
    918 
    919 extern float __attribute__((overloadable)) half_rsqrt(float);
    920 
    921 extern float __attribute__((overloadable)) fast_normalize(float v) {
    922     return 1.f;
    923 }
    924 extern float2 __attribute__((overloadable)) fast_normalize(float2 v) {
    925     return v * half_rsqrt(v.x*v.x + v.y*v.y);
    926 }
    927 extern float3 __attribute__((overloadable)) fast_normalize(float3 v) {
    928     return v * half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z);
    929 }
    930 extern float4 __attribute__((overloadable)) fast_normalize(float4 v) {
    931     return v * half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
    932 }
    933 
    934 extern float __attribute__((overloadable)) half_recip(float);
    935 
    936 /*
    937 extern float __attribute__((overloadable)) approx_atan(float x) {
    938     if (x == 0.f)
    939         return 0.f;
    940     if (x < 0.f)
    941         return -1.f * approx_atan(-1.f * x);
    942     if (x > 1.f)
    943         return M_PI_2 - approx_atan(approx_recip(x));
    944     return x * approx_recip(1.f + 0.28f * x*x);
    945 }
    946 FN_FUNC_FN(approx_atan)
    947 */
    948 
    949 #undef FN_FUNC_FN
    950 #undef IN_FUNC_FN
    951 #undef FN_FUNC_FN_FN
    952 #undef FN_FUNC_FN_F
    953 #undef FN_FUNC_FN_IN
    954 #undef FN_FUNC_FN_I
    955 #undef FN_FUNC_FN_PFN
    956 #undef FN_FUNC_FN_PIN
    957 #undef FN_FUNC_FN_FN_FN
    958 #undef FN_FUNC_FN_FN_PIN
    959 #undef XN_FUNC_YN
    960 #undef UIN_FUNC_IN
    961 #undef IN_FUNC_IN
    962 #undef XN_FUNC_XN_XN_BODY
    963 #undef IN_FUNC_IN_IN_BODY
    964