Home | History | Annotate | Download | only in arch
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rs_types.rsh"
     19 
     20 extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
     21 extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
     22 extern uchar4 __attribute__((overloadable)) convert_uchar4(float4);
     23 extern float4 __attribute__((overloadable)) convert_float4(uchar4);
     24 extern float __attribute__((overloadable)) sqrt(float);
     25 
     26 /*
     27  * CLAMP
     28  */
     29 #define _CLAMP(T) \
     30 extern T __attribute__((overloadable)) clamp(T amount, T low, T high) {             \
     31     return amount < low ? low : (amount > high ? high : amount);                    \
     32 }                                                                                   \
     33                                                                                     \
     34 extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T##2 low, T##2 high) { \
     35     T##2 r;                                                                         \
     36     r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
     37     r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
     38     return r;                                                                       \
     39 }                                                                                   \
     40                                                                                     \
     41 extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T##3 low, T##3 high) { \
     42     T##3 r;                                                                         \
     43     r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
     44     r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
     45     r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
     46     return r;                                                                       \
     47 }                                                                                   \
     48                                                                                     \
     49 extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T##4 low, T##4 high) { \
     50     T##4 r;                                                                         \
     51     r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
     52     r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
     53     r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
     54     r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w);       \
     55     return r;                                                                       \
     56 }                                                                                   \
     57                                                                                     \
     58 extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T low, T high) {       \
     59     T##2 r;                                                                         \
     60     r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
     61     r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
     62     return r;                                                                       \
     63 }                                                                                   \
     64                                                                                     \
     65 extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T low, T high) {       \
     66     T##3 r;                                                                         \
     67     r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
     68     r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
     69     r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
     70     return r;                                                                       \
     71 }                                                                                   \
     72                                                                                     \
     73 extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T low, T high) {       \
     74     T##4 r;                                                                         \
     75     r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
     76     r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
     77     r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
     78     r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);               \
     79     return r;                                                                       \
     80 }
     81 
     82 #if !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
     83 
     84 _CLAMP(float);
     85 
     86 #else
     87 
     88 extern float __attribute__((overloadable)) clamp(float amount, float low, float high);
     89 extern float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high);
     90 extern float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high);
     91 extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
     92 extern float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high);
     93 extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
     94 extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
     95 
     96 #endif // !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
     97 
     98 _CLAMP(double);
     99 _CLAMP(char);
    100 _CLAMP(uchar);
    101 _CLAMP(short);
    102 _CLAMP(ushort);
    103 _CLAMP(int);
    104 _CLAMP(uint);
    105 _CLAMP(long);
    106 _CLAMP(ulong);
    107 
    108 #undef _CLAMP
    109 
    110 /*
    111  * FMAX
    112  */
    113 
    114 extern float __attribute__((overloadable)) fmax(float v1, float v2) {
    115     return v1 > v2 ? v1 : v2;
    116 }
    117 
    118 extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) {
    119     float2 r;
    120     r.x = v1.x > v2.x ? v1.x : v2.x;
    121     r.y = v1.y > v2.y ? v1.y : v2.y;
    122     return r;
    123 }
    124 
    125 extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) {
    126     float3 r;
    127     r.x = v1.x > v2.x ? v1.x : v2.x;
    128     r.y = v1.y > v2.y ? v1.y : v2.y;
    129     r.z = v1.z > v2.z ? v1.z : v2.z;
    130     return r;
    131 }
    132 
    133 extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) {
    134     float4 r;
    135     r.x = v1.x > v2.x ? v1.x : v2.x;
    136     r.y = v1.y > v2.y ? v1.y : v2.y;
    137     r.z = v1.z > v2.z ? v1.z : v2.z;
    138     r.w = v1.w > v2.w ? v1.w : v2.w;
    139     return r;
    140 }
    141 
    142 extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) {
    143     float2 r;
    144     r.x = v1.x > v2 ? v1.x : v2;
    145     r.y = v1.y > v2 ? v1.y : v2;
    146     return r;
    147 }
    148 
    149 extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) {
    150     float3 r;
    151     r.x = v1.x > v2 ? v1.x : v2;
    152     r.y = v1.y > v2 ? v1.y : v2;
    153     r.z = v1.z > v2 ? v1.z : v2;
    154     return r;
    155 }
    156 
    157 extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) {
    158     float4 r;
    159     r.x = v1.x > v2 ? v1.x : v2;
    160     r.y = v1.y > v2 ? v1.y : v2;
    161     r.z = v1.z > v2 ? v1.z : v2;
    162     r.w = v1.w > v2 ? v1.w : v2;
    163     return r;
    164 }
    165 
    166 extern float __attribute__((overloadable)) fmin(float v1, float v2) {
    167     return v1 < v2 ? v1 : v2;
    168 }
    169 
    170 
    171 /*
    172  * FMIN
    173  */
    174 extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) {
    175     float2 r;
    176     r.x = v1.x < v2.x ? v1.x : v2.x;
    177     r.y = v1.y < v2.y ? v1.y : v2.y;
    178     return r;
    179 }
    180 
    181 extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) {
    182     float3 r;
    183     r.x = v1.x < v2.x ? v1.x : v2.x;
    184     r.y = v1.y < v2.y ? v1.y : v2.y;
    185     r.z = v1.z < v2.z ? v1.z : v2.z;
    186     return r;
    187 }
    188 
    189 extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) {
    190     float4 r;
    191     r.x = v1.x < v2.x ? v1.x : v2.x;
    192     r.y = v1.y < v2.y ? v1.y : v2.y;
    193     r.z = v1.z < v2.z ? v1.z : v2.z;
    194     r.w = v1.w < v2.w ? v1.w : v2.w;
    195     return r;
    196 }
    197 
    198 extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) {
    199     float2 r;
    200     r.x = v1.x < v2 ? v1.x : v2;
    201     r.y = v1.y < v2 ? v1.y : v2;
    202     return r;
    203 }
    204 
    205 extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) {
    206     float3 r;
    207     r.x = v1.x < v2 ? v1.x : v2;
    208     r.y = v1.y < v2 ? v1.y : v2;
    209     r.z = v1.z < v2 ? v1.z : v2;
    210     return r;
    211 }
    212 
    213 extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) {
    214     float4 r;
    215     r.x = v1.x < v2 ? v1.x : v2;
    216     r.y = v1.y < v2 ? v1.y : v2;
    217     r.z = v1.z < v2 ? v1.z : v2;
    218     r.w = v1.w < v2 ? v1.w : v2;
    219     return r;
    220 }
    221 
    222 
    223 /*
    224  * MAX
    225  */
    226 
    227 extern char __attribute__((overloadable)) max(char v1, char v2) {
    228     return v1 > v2 ? v1 : v2;
    229 }
    230 
    231 extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) {
    232     char2 r;
    233     r.x = v1.x > v2.x ? v1.x : v2.x;
    234     r.y = v1.y > v2.y ? v1.y : v2.y;
    235     return r;
    236 }
    237 
    238 extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) {
    239     char3 r;
    240     r.x = v1.x > v2.x ? v1.x : v2.x;
    241     r.y = v1.y > v2.y ? v1.y : v2.y;
    242     r.z = v1.z > v2.z ? v1.z : v2.z;
    243     return r;
    244 }
    245 
    246 extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) {
    247     char4 r;
    248     r.x = v1.x > v2.x ? v1.x : v2.x;
    249     r.y = v1.y > v2.y ? v1.y : v2.y;
    250     r.z = v1.z > v2.z ? v1.z : v2.z;
    251     r.w = v1.w > v2.w ? v1.w : v2.w;
    252     return r;
    253 }
    254 
    255 extern short __attribute__((overloadable)) max(short v1, short v2) {
    256     return v1 > v2 ? v1 : v2;
    257 }
    258 
    259 extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) {
    260     short2 r;
    261     r.x = v1.x > v2.x ? v1.x : v2.x;
    262     r.y = v1.y > v2.y ? v1.y : v2.y;
    263     return r;
    264 }
    265 
    266 extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) {
    267     short3 r;
    268     r.x = v1.x > v2.x ? v1.x : v2.x;
    269     r.y = v1.y > v2.y ? v1.y : v2.y;
    270     r.z = v1.z > v2.z ? v1.z : v2.z;
    271     return r;
    272 }
    273 
    274 extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) {
    275     short4 r;
    276     r.x = v1.x > v2.x ? v1.x : v2.x;
    277     r.y = v1.y > v2.y ? v1.y : v2.y;
    278     r.z = v1.z > v2.z ? v1.z : v2.z;
    279     r.w = v1.w > v2.w ? v1.w : v2.w;
    280     return r;
    281 }
    282 
    283 extern int __attribute__((overloadable)) max(int v1, int v2) {
    284     return v1 > v2 ? v1 : v2;
    285 }
    286 
    287 extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) {
    288     int2 r;
    289     r.x = v1.x > v2.x ? v1.x : v2.x;
    290     r.y = v1.y > v2.y ? v1.y : v2.y;
    291     return r;
    292 }
    293 
    294 extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) {
    295     int3 r;
    296     r.x = v1.x > v2.x ? v1.x : v2.x;
    297     r.y = v1.y > v2.y ? v1.y : v2.y;
    298     r.z = v1.z > v2.z ? v1.z : v2.z;
    299     return r;
    300 }
    301 
    302 extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) {
    303     int4 r;
    304     r.x = v1.x > v2.x ? v1.x : v2.x;
    305     r.y = v1.y > v2.y ? v1.y : v2.y;
    306     r.z = v1.z > v2.z ? v1.z : v2.z;
    307     r.w = v1.w > v2.w ? v1.w : v2.w;
    308     return r;
    309 }
    310 
    311 extern int64_t __attribute__((overloadable)) max(int64_t v1, int64_t v2) {
    312     return v1 > v2 ? v1 : v2;
    313 }
    314 
    315 extern long2 __attribute__((overloadable)) max(long2 v1, long2 v2) {
    316     long2 r;
    317     r.x = v1.x > v2.x ? v1.x : v2.x;
    318     r.y = v1.y > v2.y ? v1.y : v2.y;
    319     return r;
    320 }
    321 
    322 extern long3 __attribute__((overloadable)) max(long3 v1, long3 v2) {
    323     long3 r;
    324     r.x = v1.x > v2.x ? v1.x : v2.x;
    325     r.y = v1.y > v2.y ? v1.y : v2.y;
    326     r.z = v1.z > v2.z ? v1.z : v2.z;
    327     return r;
    328 }
    329 
    330 extern long4 __attribute__((overloadable)) max(long4 v1, long4 v2) {
    331     long4 r;
    332     r.x = v1.x > v2.x ? v1.x : v2.x;
    333     r.y = v1.y > v2.y ? v1.y : v2.y;
    334     r.z = v1.z > v2.z ? v1.z : v2.z;
    335     r.w = v1.w > v2.w ? v1.w : v2.w;
    336     return r;
    337 }
    338 
    339 extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
    340     return v1 > v2 ? v1 : v2;
    341 }
    342 
    343 extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) {
    344     uchar2 r;
    345     r.x = v1.x > v2.x ? v1.x : v2.x;
    346     r.y = v1.y > v2.y ? v1.y : v2.y;
    347     return r;
    348 }
    349 
    350 extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) {
    351     uchar3 r;
    352     r.x = v1.x > v2.x ? v1.x : v2.x;
    353     r.y = v1.y > v2.y ? v1.y : v2.y;
    354     r.z = v1.z > v2.z ? v1.z : v2.z;
    355     return r;
    356 }
    357 
    358 extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) {
    359     uchar4 r;
    360     r.x = v1.x > v2.x ? v1.x : v2.x;
    361     r.y = v1.y > v2.y ? v1.y : v2.y;
    362     r.z = v1.z > v2.z ? v1.z : v2.z;
    363     r.w = v1.w > v2.w ? v1.w : v2.w;
    364     return r;
    365 }
    366 
    367 extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
    368     return v1 > v2 ? v1 : v2;
    369 }
    370 
    371 extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) {
    372     ushort2 r;
    373     r.x = v1.x > v2.x ? v1.x : v2.x;
    374     r.y = v1.y > v2.y ? v1.y : v2.y;
    375     return r;
    376 }
    377 
    378 extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) {
    379     ushort3 r;
    380     r.x = v1.x > v2.x ? v1.x : v2.x;
    381     r.y = v1.y > v2.y ? v1.y : v2.y;
    382     r.z = v1.z > v2.z ? v1.z : v2.z;
    383     return r;
    384 }
    385 
    386 extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) {
    387     ushort4 r;
    388     r.x = v1.x > v2.x ? v1.x : v2.x;
    389     r.y = v1.y > v2.y ? v1.y : v2.y;
    390     r.z = v1.z > v2.z ? v1.z : v2.z;
    391     r.w = v1.w > v2.w ? v1.w : v2.w;
    392     return r;
    393 }
    394 
    395 extern uint __attribute__((overloadable)) max(uint v1, uint v2) {
    396     return v1 > v2 ? v1 : v2;
    397 }
    398 
    399 extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) {
    400     uint2 r;
    401     r.x = v1.x > v2.x ? v1.x : v2.x;
    402     r.y = v1.y > v2.y ? v1.y : v2.y;
    403     return r;
    404 }
    405 
    406 extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) {
    407     uint3 r;
    408     r.x = v1.x > v2.x ? v1.x : v2.x;
    409     r.y = v1.y > v2.y ? v1.y : v2.y;
    410     r.z = v1.z > v2.z ? v1.z : v2.z;
    411     return r;
    412 }
    413 
    414 extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) {
    415     uint4 r;
    416     r.x = v1.x > v2.x ? v1.x : v2.x;
    417     r.y = v1.y > v2.y ? v1.y : v2.y;
    418     r.z = v1.z > v2.z ? v1.z : v2.z;
    419     r.w = v1.w > v2.w ? v1.w : v2.w;
    420     return r;
    421 }
    422 
    423 extern ulong __attribute__((overloadable)) max(ulong v1, ulong v2) {
    424     return v1 > v2 ? v1 : v2;
    425 }
    426 
    427 extern ulong2 __attribute__((overloadable)) max(ulong2 v1, ulong2 v2) {
    428     ulong2 r;
    429     r.x = v1.x > v2.x ? v1.x : v2.x;
    430     r.y = v1.y > v2.y ? v1.y : v2.y;
    431     return r;
    432 }
    433 
    434 extern ulong3 __attribute__((overloadable)) max(ulong3 v1, ulong3 v2) {
    435     ulong3 r;
    436     r.x = v1.x > v2.x ? v1.x : v2.x;
    437     r.y = v1.y > v2.y ? v1.y : v2.y;
    438     r.z = v1.z > v2.z ? v1.z : v2.z;
    439     return r;
    440 }
    441 
    442 extern ulong4 __attribute__((overloadable)) max(ulong4 v1, ulong4 v2) {
    443     ulong4 r;
    444     r.x = v1.x > v2.x ? v1.x : v2.x;
    445     r.y = v1.y > v2.y ? v1.y : v2.y;
    446     r.z = v1.z > v2.z ? v1.z : v2.z;
    447     r.w = v1.w > v2.w ? v1.w : v2.w;
    448     return r;
    449 }
    450 
    451 extern float __attribute__((overloadable)) max(float v1, float v2) {
    452     return fmax(v1, v2);
    453 }
    454 
    455 extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
    456     return fmax(v1, v2);
    457 }
    458 
    459 extern float2 __attribute__((overloadable)) max(float2 v1, float v2) {
    460     return fmax(v1, v2);
    461 }
    462 
    463 extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
    464     return fmax(v1, v2);
    465 }
    466 
    467 extern float3 __attribute__((overloadable)) max(float3 v1, float v2) {
    468     return fmax(v1, v2);
    469 }
    470 
    471 extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
    472     return fmax(v1, v2);
    473 }
    474 
    475 extern float4 __attribute__((overloadable)) max(float4 v1, float v2) {
    476     return fmax(v1, v2);
    477 }
    478 
    479 
    480 /*
    481  * MIN
    482  */
    483 
    484 extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) {
    485     return v1 < v2 ? v1 : v2;
    486 }
    487 
    488 extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) {
    489     char2 r;
    490     r.x = v1.x < v2.x ? v1.x : v2.x;
    491     r.y = v1.y < v2.y ? v1.y : v2.y;
    492     return r;
    493 }
    494 
    495 extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) {
    496     char3 r;
    497     r.x = v1.x < v2.x ? v1.x : v2.x;
    498     r.y = v1.y < v2.y ? v1.y : v2.y;
    499     r.z = v1.z < v2.z ? v1.z : v2.z;
    500     return r;
    501 }
    502 
    503 extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) {
    504     char4 r;
    505     r.x = v1.x < v2.x ? v1.x : v2.x;
    506     r.y = v1.y < v2.y ? v1.y : v2.y;
    507     r.z = v1.z < v2.z ? v1.z : v2.z;
    508     r.w = v1.w < v2.w ? v1.w : v2.w;
    509     return r;
    510 }
    511 
    512 extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) {
    513     return v1 < v2 ? v1 : v2;
    514 }
    515 
    516 extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) {
    517     short2 r;
    518     r.x = v1.x < v2.x ? v1.x : v2.x;
    519     r.y = v1.y < v2.y ? v1.y : v2.y;
    520     return r;
    521 }
    522 
    523 extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) {
    524     short3 r;
    525     r.x = v1.x < v2.x ? v1.x : v2.x;
    526     r.y = v1.y < v2.y ? v1.y : v2.y;
    527     r.z = v1.z < v2.z ? v1.z : v2.z;
    528     return r;
    529 }
    530 
    531 extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) {
    532     short4 r;
    533     r.x = v1.x < v2.x ? v1.x : v2.x;
    534     r.y = v1.y < v2.y ? v1.y : v2.y;
    535     r.z = v1.z < v2.z ? v1.z : v2.z;
    536     r.w = v1.w < v2.w ? v1.w : v2.w;
    537     return r;
    538 }
    539 
    540 extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) {
    541     return v1 < v2 ? v1 : v2;
    542 }
    543 
    544 extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) {
    545     int2 r;
    546     r.x = v1.x < v2.x ? v1.x : v2.x;
    547     r.y = v1.y < v2.y ? v1.y : v2.y;
    548     return r;
    549 }
    550 
    551 extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) {
    552     int3 r;
    553     r.x = v1.x < v2.x ? v1.x : v2.x;
    554     r.y = v1.y < v2.y ? v1.y : v2.y;
    555     r.z = v1.z < v2.z ? v1.z : v2.z;
    556     return r;
    557 }
    558 
    559 extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) {
    560     int4 r;
    561     r.x = v1.x < v2.x ? v1.x : v2.x;
    562     r.y = v1.y < v2.y ? v1.y : v2.y;
    563     r.z = v1.z < v2.z ? v1.z : v2.z;
    564     r.w = v1.w < v2.w ? v1.w : v2.w;
    565     return r;
    566 }
    567 
    568 extern int64_t __attribute__((overloadable)) min(int64_t v1, int64_t v2) {
    569     return v1 < v2 ? v1 : v2;
    570 }
    571 
    572 extern long2 __attribute__((overloadable)) min(long2 v1, long2 v2) {
    573     long2 r;
    574     r.x = v1.x < v2.x ? v1.x : v2.x;
    575     r.y = v1.y < v2.y ? v1.y : v2.y;
    576     return r;
    577 }
    578 
    579 extern long3 __attribute__((overloadable)) min(long3 v1, long3 v2) {
    580     long3 r;
    581     r.x = v1.x < v2.x ? v1.x : v2.x;
    582     r.y = v1.y < v2.y ? v1.y : v2.y;
    583     r.z = v1.z < v2.z ? v1.z : v2.z;
    584     return r;
    585 }
    586 
    587 extern long4 __attribute__((overloadable)) min(long4 v1, long4 v2) {
    588     long4 r;
    589     r.x = v1.x < v2.x ? v1.x : v2.x;
    590     r.y = v1.y < v2.y ? v1.y : v2.y;
    591     r.z = v1.z < v2.z ? v1.z : v2.z;
    592     r.w = v1.w < v2.w ? v1.w : v2.w;
    593     return r;
    594 }
    595 
    596 extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
    597     return v1 < v2 ? v1 : v2;
    598 }
    599 
    600 extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) {
    601     uchar2 r;
    602     r.x = v1.x < v2.x ? v1.x : v2.x;
    603     r.y = v1.y < v2.y ? v1.y : v2.y;
    604     return r;
    605 }
    606 
    607 extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) {
    608     uchar3 r;
    609     r.x = v1.x < v2.x ? v1.x : v2.x;
    610     r.y = v1.y < v2.y ? v1.y : v2.y;
    611     r.z = v1.z < v2.z ? v1.z : v2.z;
    612     return r;
    613 }
    614 
    615 extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) {
    616     uchar4 r;
    617     r.x = v1.x < v2.x ? v1.x : v2.x;
    618     r.y = v1.y < v2.y ? v1.y : v2.y;
    619     r.z = v1.z < v2.z ? v1.z : v2.z;
    620     r.w = v1.w < v2.w ? v1.w : v2.w;
    621     return r;
    622 }
    623 
    624 extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
    625     return v1 < v2 ? v1 : v2;
    626 }
    627 
    628 extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) {
    629     ushort2 r;
    630     r.x = v1.x < v2.x ? v1.x : v2.x;
    631     r.y = v1.y < v2.y ? v1.y : v2.y;
    632     return r;
    633 }
    634 
    635 extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) {
    636     ushort3 r;
    637     r.x = v1.x < v2.x ? v1.x : v2.x;
    638     r.y = v1.y < v2.y ? v1.y : v2.y;
    639     r.z = v1.z < v2.z ? v1.z : v2.z;
    640     return r;
    641 }
    642 
    643 extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) {
    644     ushort4 r;
    645     r.x = v1.x < v2.x ? v1.x : v2.x;
    646     r.y = v1.y < v2.y ? v1.y : v2.y;
    647     r.z = v1.z < v2.z ? v1.z : v2.z;
    648     r.w = v1.w < v2.w ? v1.w : v2.w;
    649     return r;
    650 }
    651 
    652 extern uint __attribute__((overloadable)) min(uint v1, uint v2) {
    653     return v1 < v2 ? v1 : v2;
    654 }
    655 
    656 extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) {
    657     uint2 r;
    658     r.x = v1.x < v2.x ? v1.x : v2.x;
    659     r.y = v1.y < v2.y ? v1.y : v2.y;
    660     return r;
    661 }
    662 
    663 extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) {
    664     uint3 r;
    665     r.x = v1.x < v2.x ? v1.x : v2.x;
    666     r.y = v1.y < v2.y ? v1.y : v2.y;
    667     r.z = v1.z < v2.z ? v1.z : v2.z;
    668     return r;
    669 }
    670 
    671 extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) {
    672     uint4 r;
    673     r.x = v1.x < v2.x ? v1.x : v2.x;
    674     r.y = v1.y < v2.y ? v1.y : v2.y;
    675     r.z = v1.z < v2.z ? v1.z : v2.z;
    676     r.w = v1.w < v2.w ? v1.w : v2.w;
    677     return r;
    678 }
    679 
    680 extern ulong __attribute__((overloadable)) min(ulong v1, ulong v2) {
    681     return v1 < v2 ? v1 : v2;
    682 }
    683 
    684 extern ulong2 __attribute__((overloadable)) min(ulong2 v1, ulong2 v2) {
    685     ulong2 r;
    686     r.x = v1.x < v2.x ? v1.x : v2.x;
    687     r.y = v1.y < v2.y ? v1.y : v2.y;
    688     return r;
    689 }
    690 
    691 extern ulong3 __attribute__((overloadable)) min(ulong3 v1, ulong3 v2) {
    692     ulong3 r;
    693     r.x = v1.x < v2.x ? v1.x : v2.x;
    694     r.y = v1.y < v2.y ? v1.y : v2.y;
    695     r.z = v1.z < v2.z ? v1.z : v2.z;
    696     return r;
    697 }
    698 
    699 extern ulong4 __attribute__((overloadable)) min(ulong4 v1, ulong4 v2) {
    700     ulong4 r;
    701     r.x = v1.x < v2.x ? v1.x : v2.x;
    702     r.y = v1.y < v2.y ? v1.y : v2.y;
    703     r.z = v1.z < v2.z ? v1.z : v2.z;
    704     r.w = v1.w < v2.w ? v1.w : v2.w;
    705     return r;
    706 }
    707 
    708 extern float __attribute__((overloadable)) min(float v1, float v2) {
    709     return fmin(v1, v2);
    710 }
    711 
    712 extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
    713     return fmin(v1, v2);
    714 }
    715 
    716 extern float2 __attribute__((overloadable)) min(float2 v1, float v2) {
    717     return fmin(v1, v2);
    718 }
    719 
    720 extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
    721     return fmin(v1, v2);
    722 }
    723 
    724 extern float3 __attribute__((overloadable)) min(float3 v1, float v2) {
    725     return fmin(v1, v2);
    726 }
    727 
    728 extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
    729     return fmin(v1, v2);
    730 }
    731 
    732 extern float4 __attribute__((overloadable)) min(float4 v1, float v2) {
    733     return fmin(v1, v2);
    734 }
    735 
    736 /*
    737  * YUV
    738  */
    739 
    740 extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
    741     short Y = ((short)y) - 16;
    742     short U = ((short)u) - 128;
    743     short V = ((short)v) - 128;
    744 
    745     short4 p;
    746     p.r = (Y * 298 + V * 409 + 128) >> 8;
    747     p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
    748     p.b = (Y * 298 + U * 516 + 128) >> 8;
    749     p.a = 255;
    750     p.r = rsClamp(p.r, (short)0, (short)255);
    751     p.g = rsClamp(p.g, (short)0, (short)255);
    752     p.b = rsClamp(p.b, (short)0, (short)255);
    753 
    754     return convert_uchar4(p);
    755 }
    756 
    757 static float4 yuv_U_values = {0.f, -0.392f * 0.003921569f, +2.02 * 0.003921569f, 0.f};
    758 static float4 yuv_V_values = {1.603f * 0.003921569f, -0.815f * 0.003921569f, 0.f, 0.f};
    759 
    760 extern float4 __attribute__((overloadable)) rsYuvToRGBA_float4(uchar y, uchar u, uchar v) {
    761     float4 color = (float)y * 0.003921569f;
    762     float4 fU = ((float)u) - 128.f;
    763     float4 fV = ((float)v) - 128.f;
    764 
    765     color += fU * yuv_U_values;
    766     color += fV * yuv_V_values;
    767     color = clamp(color, 0.f, 1.f);
    768     return color;
    769 }
    770 
    771 
    772 /*
    773  * half_RECIP
    774  */
    775 
    776 extern float __attribute__((overloadable)) half_recip(float v) {
    777     // FIXME:  actual algorithm for generic approximate reciprocal
    778     return 1.f / v;
    779 }
    780 
    781 extern float2 __attribute__((overloadable)) half_recip(float2 v) {
    782     float2 r;
    783     r.x = half_recip(r.x);
    784     r.y = half_recip(r.y);
    785     return r;
    786 }
    787 
    788 extern float3 __attribute__((overloadable)) half_recip(float3 v) {
    789     float3 r;
    790     r.x = half_recip(r.x);
    791     r.y = half_recip(r.y);
    792     r.z = half_recip(r.z);
    793     return r;
    794 }
    795 
    796 extern float4 __attribute__((overloadable)) half_recip(float4 v) {
    797     float4 r;
    798     r.x = half_recip(r.x);
    799     r.y = half_recip(r.y);
    800     r.z = half_recip(r.z);
    801     r.w = half_recip(r.w);
    802     return r;
    803 }
    804 
    805 
    806 /*
    807  * half_SQRT
    808  */
    809 
    810 extern float __attribute__((overloadable)) half_sqrt(float v) {
    811     return sqrt(v);
    812 }
    813 
    814 extern float2 __attribute__((overloadable)) half_sqrt(float2 v) {
    815     float2 r;
    816     r.x = half_sqrt(v.x);
    817     r.y = half_sqrt(v.y);
    818     return r;
    819 }
    820 
    821 extern float3 __attribute__((overloadable)) half_sqrt(float3 v) {
    822     float3 r;
    823     r.x = half_sqrt(v.x);
    824     r.y = half_sqrt(v.y);
    825     r.z = half_sqrt(v.z);
    826     return r;
    827 }
    828 
    829 extern float4 __attribute__((overloadable)) half_sqrt(float4 v) {
    830     float4 r;
    831     r.x = half_sqrt(v.x);
    832     r.y = half_sqrt(v.y);
    833     r.z = half_sqrt(v.z);
    834     r.w = half_sqrt(v.w);
    835     return r;
    836 }
    837 
    838 
    839 /*
    840  * half_rsqrt
    841  */
    842 
    843 extern float __attribute__((overloadable)) half_rsqrt(float v) {
    844     return 1.f / sqrt(v);
    845 }
    846 
    847 extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
    848     float2 r;
    849     r.x = half_rsqrt(v.x);
    850     r.y = half_rsqrt(v.y);
    851     return r;
    852 }
    853 
    854 extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
    855     float3 r;
    856     r.x = half_rsqrt(v.x);
    857     r.y = half_rsqrt(v.y);
    858     r.z = half_rsqrt(v.z);
    859     return r;
    860 }
    861 
    862 extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
    863     float4 r;
    864     r.x = half_rsqrt(v.x);
    865     r.y = half_rsqrt(v.y);
    866     r.z = half_rsqrt(v.z);
    867     r.w = half_rsqrt(v.w);
    868     return r;
    869 }
    870 
    871 /**
    872  * matrix ops
    873  */
    874 
    875 extern float4 __attribute__((overloadable))
    876 rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
    877     float4 ret;
    878     ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
    879     ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
    880     ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
    881     ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
    882     return ret;
    883 }
    884 
    885 extern float4 __attribute__((overloadable))
    886 rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
    887     float4 ret;
    888     ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
    889     ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
    890     ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
    891     ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
    892     return ret;
    893 }
    894 
    895 extern float4 __attribute__((overloadable))
    896 rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
    897     float4 ret;
    898     ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
    899     ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
    900     ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
    901     ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
    902     return ret;
    903 }
    904 
    905 extern float3 __attribute__((overloadable))
    906 rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
    907     float3 ret;
    908     ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
    909     ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
    910     ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
    911     return ret;
    912 }
    913 
    914 extern float3 __attribute__((overloadable))
    915 rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
    916     float3 ret;
    917     ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
    918     ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
    919     ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
    920     return ret;
    921 }
    922 
    923 /**
    924  * Pixel Ops
    925  */
    926 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
    927 {
    928     uchar4 c;
    929     c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
    930     c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
    931     c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
    932     c.w = 255;
    933     return c;
    934 }
    935 
    936 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
    937 {
    938     uchar4 c;
    939     c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
    940     c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
    941     c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
    942     c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f);
    943     return c;
    944 }
    945 
    946 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
    947 {
    948     color *= 255.f;
    949     color += 0.5f;
    950     color = clamp(color, 0.f, 255.f);
    951     uchar4 c = {color.x, color.y, color.z, 255};
    952     return c;
    953 }
    954 
    955 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
    956 {
    957     color *= 255.f;
    958     color += 0.5f;
    959     color = clamp(color, 0.f, 255.f);
    960     uchar4 c = {color.x, color.y, color.z, color.w};
    961     return c;
    962 }
    963