Home | History | Annotate | Download | only in arch
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rs_core.rsh"
     19 
     20 extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
     21 extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
     22 extern uchar4 __attribute__((overloadable)) convert_uchar4(float4);
     23 extern float4 __attribute__((overloadable)) convert_float4(uchar4);
     24 extern float __attribute__((overloadable)) sqrt(float);
     25 
     26 /*
     27  * CLAMP
     28  */
     29 #define _CLAMP(T) \
     30 extern T __attribute__((overloadable)) clamp(T amount, T low, T high) {             \
     31     return amount < low ? low : (amount > high ? high : amount);                    \
     32 }                                                                                   \
     33                                                                                     \
     34 extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T##2 low, T##2 high) { \
     35     T##2 r;                                                                         \
     36     r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
     37     r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
     38     return r;                                                                       \
     39 }                                                                                   \
     40                                                                                     \
     41 extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T##3 low, T##3 high) { \
     42     T##3 r;                                                                         \
     43     r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
     44     r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
     45     r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
     46     return r;                                                                       \
     47 }                                                                                   \
     48                                                                                     \
     49 extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T##4 low, T##4 high) { \
     50     T##4 r;                                                                         \
     51     r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
     52     r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
     53     r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
     54     r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w);       \
     55     return r;                                                                       \
     56 }                                                                                   \
     57                                                                                     \
     58 extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T low, T high) {       \
     59     T##2 r;                                                                         \
     60     r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
     61     r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
     62     return r;                                                                       \
     63 }                                                                                   \
     64                                                                                     \
     65 extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T low, T high) {       \
     66     T##3 r;                                                                         \
     67     r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
     68     r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
     69     r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
     70     return r;                                                                       \
     71 }                                                                                   \
     72                                                                                     \
     73 extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T low, T high) {       \
     74     T##4 r;                                                                         \
     75     r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
     76     r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
     77     r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
     78     r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);               \
     79     return r;                                                                       \
     80 }
     81 
     82 #if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
     83 // These functions must be defined here if we are not using the SSE
     84 // implementation, which includes when we are built as part of the
     85 // debug runtime (libclcore_debug.bc) or compiling with debug info.
     86 
     87 _CLAMP(float);
     88 
     89 #else
     90 
     91 extern float __attribute__((overloadable)) clamp(float amount, float low, float high);
     92 extern float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high);
     93 extern float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high);
     94 extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
     95 extern float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high);
     96 extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
     97 extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
     98 
     99 #endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
    100 
    101 _CLAMP(half);
    102 _CLAMP(double);
    103 _CLAMP(char);
    104 _CLAMP(uchar);
    105 _CLAMP(short);
    106 _CLAMP(ushort);
    107 _CLAMP(int);
    108 _CLAMP(uint);
    109 _CLAMP(long);
    110 _CLAMP(ulong);
    111 
    112 #undef _CLAMP
    113 
    114 /*
    115  * FMAX
    116  */
    117 
    118 extern float __attribute__((overloadable)) fmax(float v1, float v2) {
    119     return v1 > v2 ? v1 : v2;
    120 }
    121 
    122 extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) {
    123     float2 r;
    124     r.x = v1.x > v2.x ? v1.x : v2.x;
    125     r.y = v1.y > v2.y ? v1.y : v2.y;
    126     return r;
    127 }
    128 
    129 extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) {
    130     float3 r;
    131     r.x = v1.x > v2.x ? v1.x : v2.x;
    132     r.y = v1.y > v2.y ? v1.y : v2.y;
    133     r.z = v1.z > v2.z ? v1.z : v2.z;
    134     return r;
    135 }
    136 
    137 extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) {
    138     float4 r;
    139     r.x = v1.x > v2.x ? v1.x : v2.x;
    140     r.y = v1.y > v2.y ? v1.y : v2.y;
    141     r.z = v1.z > v2.z ? v1.z : v2.z;
    142     r.w = v1.w > v2.w ? v1.w : v2.w;
    143     return r;
    144 }
    145 
    146 extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) {
    147     float2 r;
    148     r.x = v1.x > v2 ? v1.x : v2;
    149     r.y = v1.y > v2 ? v1.y : v2;
    150     return r;
    151 }
    152 
    153 extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) {
    154     float3 r;
    155     r.x = v1.x > v2 ? v1.x : v2;
    156     r.y = v1.y > v2 ? v1.y : v2;
    157     r.z = v1.z > v2 ? v1.z : v2;
    158     return r;
    159 }
    160 
    161 extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) {
    162     float4 r;
    163     r.x = v1.x > v2 ? v1.x : v2;
    164     r.y = v1.y > v2 ? v1.y : v2;
    165     r.z = v1.z > v2 ? v1.z : v2;
    166     r.w = v1.w > v2 ? v1.w : v2;
    167     return r;
    168 }
    169 
    170 extern float __attribute__((overloadable)) fmin(float v1, float v2) {
    171     return v1 < v2 ? v1 : v2;
    172 }
    173 
    174 
    175 /*
    176  * FMIN
    177  */
    178 extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) {
    179     float2 r;
    180     r.x = v1.x < v2.x ? v1.x : v2.x;
    181     r.y = v1.y < v2.y ? v1.y : v2.y;
    182     return r;
    183 }
    184 
    185 extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) {
    186     float3 r;
    187     r.x = v1.x < v2.x ? v1.x : v2.x;
    188     r.y = v1.y < v2.y ? v1.y : v2.y;
    189     r.z = v1.z < v2.z ? v1.z : v2.z;
    190     return r;
    191 }
    192 
    193 extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) {
    194     float4 r;
    195     r.x = v1.x < v2.x ? v1.x : v2.x;
    196     r.y = v1.y < v2.y ? v1.y : v2.y;
    197     r.z = v1.z < v2.z ? v1.z : v2.z;
    198     r.w = v1.w < v2.w ? v1.w : v2.w;
    199     return r;
    200 }
    201 
    202 extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) {
    203     float2 r;
    204     r.x = v1.x < v2 ? v1.x : v2;
    205     r.y = v1.y < v2 ? v1.y : v2;
    206     return r;
    207 }
    208 
    209 extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) {
    210     float3 r;
    211     r.x = v1.x < v2 ? v1.x : v2;
    212     r.y = v1.y < v2 ? v1.y : v2;
    213     r.z = v1.z < v2 ? v1.z : v2;
    214     return r;
    215 }
    216 
    217 extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) {
    218     float4 r;
    219     r.x = v1.x < v2 ? v1.x : v2;
    220     r.y = v1.y < v2 ? v1.y : v2;
    221     r.z = v1.z < v2 ? v1.z : v2;
    222     r.w = v1.w < v2 ? v1.w : v2;
    223     return r;
    224 }
    225 
    226 
    227 /*
    228  * MAX
    229  */
    230 
    231 extern char __attribute__((overloadable)) max(char v1, char v2) {
    232     return v1 > v2 ? v1 : v2;
    233 }
    234 
    235 extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) {
    236     char2 r;
    237     r.x = v1.x > v2.x ? v1.x : v2.x;
    238     r.y = v1.y > v2.y ? v1.y : v2.y;
    239     return r;
    240 }
    241 
    242 extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) {
    243     char3 r;
    244     r.x = v1.x > v2.x ? v1.x : v2.x;
    245     r.y = v1.y > v2.y ? v1.y : v2.y;
    246     r.z = v1.z > v2.z ? v1.z : v2.z;
    247     return r;
    248 }
    249 
    250 extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) {
    251     char4 r;
    252     r.x = v1.x > v2.x ? v1.x : v2.x;
    253     r.y = v1.y > v2.y ? v1.y : v2.y;
    254     r.z = v1.z > v2.z ? v1.z : v2.z;
    255     r.w = v1.w > v2.w ? v1.w : v2.w;
    256     return r;
    257 }
    258 
    259 extern short __attribute__((overloadable)) max(short v1, short v2) {
    260     return v1 > v2 ? v1 : v2;
    261 }
    262 
    263 extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) {
    264     short2 r;
    265     r.x = v1.x > v2.x ? v1.x : v2.x;
    266     r.y = v1.y > v2.y ? v1.y : v2.y;
    267     return r;
    268 }
    269 
    270 extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) {
    271     short3 r;
    272     r.x = v1.x > v2.x ? v1.x : v2.x;
    273     r.y = v1.y > v2.y ? v1.y : v2.y;
    274     r.z = v1.z > v2.z ? v1.z : v2.z;
    275     return r;
    276 }
    277 
    278 extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) {
    279     short4 r;
    280     r.x = v1.x > v2.x ? v1.x : v2.x;
    281     r.y = v1.y > v2.y ? v1.y : v2.y;
    282     r.z = v1.z > v2.z ? v1.z : v2.z;
    283     r.w = v1.w > v2.w ? v1.w : v2.w;
    284     return r;
    285 }
    286 
    287 extern int __attribute__((overloadable)) max(int v1, int v2) {
    288     return v1 > v2 ? v1 : v2;
    289 }
    290 
    291 extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) {
    292     int2 r;
    293     r.x = v1.x > v2.x ? v1.x : v2.x;
    294     r.y = v1.y > v2.y ? v1.y : v2.y;
    295     return r;
    296 }
    297 
    298 extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) {
    299     int3 r;
    300     r.x = v1.x > v2.x ? v1.x : v2.x;
    301     r.y = v1.y > v2.y ? v1.y : v2.y;
    302     r.z = v1.z > v2.z ? v1.z : v2.z;
    303     return r;
    304 }
    305 
    306 extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) {
    307     int4 r;
    308     r.x = v1.x > v2.x ? v1.x : v2.x;
    309     r.y = v1.y > v2.y ? v1.y : v2.y;
    310     r.z = v1.z > v2.z ? v1.z : v2.z;
    311     r.w = v1.w > v2.w ? v1.w : v2.w;
    312     return r;
    313 }
    314 
    315 extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
    316     return v1 > v2 ? v1 : v2;
    317 }
    318 
    319 extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) {
    320     uchar2 r;
    321     r.x = v1.x > v2.x ? v1.x : v2.x;
    322     r.y = v1.y > v2.y ? v1.y : v2.y;
    323     return r;
    324 }
    325 
    326 extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) {
    327     uchar3 r;
    328     r.x = v1.x > v2.x ? v1.x : v2.x;
    329     r.y = v1.y > v2.y ? v1.y : v2.y;
    330     r.z = v1.z > v2.z ? v1.z : v2.z;
    331     return r;
    332 }
    333 
    334 extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) {
    335     uchar4 r;
    336     r.x = v1.x > v2.x ? v1.x : v2.x;
    337     r.y = v1.y > v2.y ? v1.y : v2.y;
    338     r.z = v1.z > v2.z ? v1.z : v2.z;
    339     r.w = v1.w > v2.w ? v1.w : v2.w;
    340     return r;
    341 }
    342 
    343 extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
    344     return v1 > v2 ? v1 : v2;
    345 }
    346 
    347 extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) {
    348     ushort2 r;
    349     r.x = v1.x > v2.x ? v1.x : v2.x;
    350     r.y = v1.y > v2.y ? v1.y : v2.y;
    351     return r;
    352 }
    353 
    354 extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) {
    355     ushort3 r;
    356     r.x = v1.x > v2.x ? v1.x : v2.x;
    357     r.y = v1.y > v2.y ? v1.y : v2.y;
    358     r.z = v1.z > v2.z ? v1.z : v2.z;
    359     return r;
    360 }
    361 
    362 extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) {
    363     ushort4 r;
    364     r.x = v1.x > v2.x ? v1.x : v2.x;
    365     r.y = v1.y > v2.y ? v1.y : v2.y;
    366     r.z = v1.z > v2.z ? v1.z : v2.z;
    367     r.w = v1.w > v2.w ? v1.w : v2.w;
    368     return r;
    369 }
    370 
    371 extern uint __attribute__((overloadable)) max(uint v1, uint v2) {
    372     return v1 > v2 ? v1 : v2;
    373 }
    374 
    375 extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) {
    376     uint2 r;
    377     r.x = v1.x > v2.x ? v1.x : v2.x;
    378     r.y = v1.y > v2.y ? v1.y : v2.y;
    379     return r;
    380 }
    381 
    382 extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) {
    383     uint3 r;
    384     r.x = v1.x > v2.x ? v1.x : v2.x;
    385     r.y = v1.y > v2.y ? v1.y : v2.y;
    386     r.z = v1.z > v2.z ? v1.z : v2.z;
    387     return r;
    388 }
    389 
    390 extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) {
    391     uint4 r;
    392     r.x = v1.x > v2.x ? v1.x : v2.x;
    393     r.y = v1.y > v2.y ? v1.y : v2.y;
    394     r.z = v1.z > v2.z ? v1.z : v2.z;
    395     r.w = v1.w > v2.w ? v1.w : v2.w;
    396     return r;
    397 }
    398 
    399 extern float __attribute__((overloadable)) max(float v1, float v2) {
    400     return fmax(v1, v2);
    401 }
    402 
    403 extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
    404     return fmax(v1, v2);
    405 }
    406 
    407 extern float2 __attribute__((overloadable)) max(float2 v1, float v2) {
    408     return fmax(v1, v2);
    409 }
    410 
    411 extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
    412     return fmax(v1, v2);
    413 }
    414 
    415 extern float3 __attribute__((overloadable)) max(float3 v1, float v2) {
    416     return fmax(v1, v2);
    417 }
    418 
    419 extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
    420     return fmax(v1, v2);
    421 }
    422 
    423 extern float4 __attribute__((overloadable)) max(float4 v1, float v2) {
    424     return fmax(v1, v2);
    425 }
    426 
    427 
    428 /*
    429  * MIN
    430  */
    431 
    432 extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) {
    433     return v1 < v2 ? v1 : v2;
    434 }
    435 
    436 extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) {
    437     char2 r;
    438     r.x = v1.x < v2.x ? v1.x : v2.x;
    439     r.y = v1.y < v2.y ? v1.y : v2.y;
    440     return r;
    441 }
    442 
    443 extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) {
    444     char3 r;
    445     r.x = v1.x < v2.x ? v1.x : v2.x;
    446     r.y = v1.y < v2.y ? v1.y : v2.y;
    447     r.z = v1.z < v2.z ? v1.z : v2.z;
    448     return r;
    449 }
    450 
    451 extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) {
    452     char4 r;
    453     r.x = v1.x < v2.x ? v1.x : v2.x;
    454     r.y = v1.y < v2.y ? v1.y : v2.y;
    455     r.z = v1.z < v2.z ? v1.z : v2.z;
    456     r.w = v1.w < v2.w ? v1.w : v2.w;
    457     return r;
    458 }
    459 
    460 extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) {
    461     return v1 < v2 ? v1 : v2;
    462 }
    463 
    464 extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) {
    465     short2 r;
    466     r.x = v1.x < v2.x ? v1.x : v2.x;
    467     r.y = v1.y < v2.y ? v1.y : v2.y;
    468     return r;
    469 }
    470 
    471 extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) {
    472     short3 r;
    473     r.x = v1.x < v2.x ? v1.x : v2.x;
    474     r.y = v1.y < v2.y ? v1.y : v2.y;
    475     r.z = v1.z < v2.z ? v1.z : v2.z;
    476     return r;
    477 }
    478 
    479 extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) {
    480     short4 r;
    481     r.x = v1.x < v2.x ? v1.x : v2.x;
    482     r.y = v1.y < v2.y ? v1.y : v2.y;
    483     r.z = v1.z < v2.z ? v1.z : v2.z;
    484     r.w = v1.w < v2.w ? v1.w : v2.w;
    485     return r;
    486 }
    487 
    488 extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) {
    489     return v1 < v2 ? v1 : v2;
    490 }
    491 
    492 extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) {
    493     int2 r;
    494     r.x = v1.x < v2.x ? v1.x : v2.x;
    495     r.y = v1.y < v2.y ? v1.y : v2.y;
    496     return r;
    497 }
    498 
    499 extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) {
    500     int3 r;
    501     r.x = v1.x < v2.x ? v1.x : v2.x;
    502     r.y = v1.y < v2.y ? v1.y : v2.y;
    503     r.z = v1.z < v2.z ? v1.z : v2.z;
    504     return r;
    505 }
    506 
    507 extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) {
    508     int4 r;
    509     r.x = v1.x < v2.x ? v1.x : v2.x;
    510     r.y = v1.y < v2.y ? v1.y : v2.y;
    511     r.z = v1.z < v2.z ? v1.z : v2.z;
    512     r.w = v1.w < v2.w ? v1.w : v2.w;
    513     return r;
    514 }
    515 
    516 extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
    517     return v1 < v2 ? v1 : v2;
    518 }
    519 
    520 extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) {
    521     uchar2 r;
    522     r.x = v1.x < v2.x ? v1.x : v2.x;
    523     r.y = v1.y < v2.y ? v1.y : v2.y;
    524     return r;
    525 }
    526 
    527 extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) {
    528     uchar3 r;
    529     r.x = v1.x < v2.x ? v1.x : v2.x;
    530     r.y = v1.y < v2.y ? v1.y : v2.y;
    531     r.z = v1.z < v2.z ? v1.z : v2.z;
    532     return r;
    533 }
    534 
    535 extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) {
    536     uchar4 r;
    537     r.x = v1.x < v2.x ? v1.x : v2.x;
    538     r.y = v1.y < v2.y ? v1.y : v2.y;
    539     r.z = v1.z < v2.z ? v1.z : v2.z;
    540     r.w = v1.w < v2.w ? v1.w : v2.w;
    541     return r;
    542 }
    543 
    544 extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
    545     return v1 < v2 ? v1 : v2;
    546 }
    547 
    548 extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) {
    549     ushort2 r;
    550     r.x = v1.x < v2.x ? v1.x : v2.x;
    551     r.y = v1.y < v2.y ? v1.y : v2.y;
    552     return r;
    553 }
    554 
    555 extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) {
    556     ushort3 r;
    557     r.x = v1.x < v2.x ? v1.x : v2.x;
    558     r.y = v1.y < v2.y ? v1.y : v2.y;
    559     r.z = v1.z < v2.z ? v1.z : v2.z;
    560     return r;
    561 }
    562 
    563 extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) {
    564     ushort4 r;
    565     r.x = v1.x < v2.x ? v1.x : v2.x;
    566     r.y = v1.y < v2.y ? v1.y : v2.y;
    567     r.z = v1.z < v2.z ? v1.z : v2.z;
    568     r.w = v1.w < v2.w ? v1.w : v2.w;
    569     return r;
    570 }
    571 
    572 extern uint __attribute__((overloadable)) min(uint v1, uint v2) {
    573     return v1 < v2 ? v1 : v2;
    574 }
    575 
    576 extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) {
    577     uint2 r;
    578     r.x = v1.x < v2.x ? v1.x : v2.x;
    579     r.y = v1.y < v2.y ? v1.y : v2.y;
    580     return r;
    581 }
    582 
    583 extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) {
    584     uint3 r;
    585     r.x = v1.x < v2.x ? v1.x : v2.x;
    586     r.y = v1.y < v2.y ? v1.y : v2.y;
    587     r.z = v1.z < v2.z ? v1.z : v2.z;
    588     return r;
    589 }
    590 
    591 extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) {
    592     uint4 r;
    593     r.x = v1.x < v2.x ? v1.x : v2.x;
    594     r.y = v1.y < v2.y ? v1.y : v2.y;
    595     r.z = v1.z < v2.z ? v1.z : v2.z;
    596     r.w = v1.w < v2.w ? v1.w : v2.w;
    597     return r;
    598 }
    599 
    600 extern float __attribute__((overloadable)) min(float v1, float v2) {
    601     return fmin(v1, v2);
    602 }
    603 
    604 extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
    605     return fmin(v1, v2);
    606 }
    607 
    608 extern float2 __attribute__((overloadable)) min(float2 v1, float v2) {
    609     return fmin(v1, v2);
    610 }
    611 
    612 extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
    613     return fmin(v1, v2);
    614 }
    615 
    616 extern float3 __attribute__((overloadable)) min(float3 v1, float v2) {
    617     return fmin(v1, v2);
    618 }
    619 
    620 extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
    621     return fmin(v1, v2);
    622 }
    623 
    624 extern float4 __attribute__((overloadable)) min(float4 v1, float v2) {
    625     return fmin(v1, v2);
    626 }
    627 
    628 /*
    629  * YUV
    630  */
    631 
    632 extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
    633     short Y = ((short)y) - 16;
    634     short U = ((short)u) - 128;
    635     short V = ((short)v) - 128;
    636 
    637     short4 p;
    638     p.r = (Y * 298 + V * 409 + 128) >> 8;
    639     p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
    640     p.b = (Y * 298 + U * 516 + 128) >> 8;
    641     p.a = 255;
    642     p.r = rsClamp(p.r, (short)0, (short)255);
    643     p.g = rsClamp(p.g, (short)0, (short)255);
    644     p.b = rsClamp(p.b, (short)0, (short)255);
    645 
    646     return convert_uchar4(p);
    647 }
    648 
    649 /*
    650  * half_RECIP
    651  */
    652 
    653 extern float2 __attribute__((overloadable)) half_recip(float2 v) {
    654     return ((float2) 1.f) / v;
    655 }
    656 
    657 extern float3 __attribute__((overloadable)) half_recip(float3 v) {
    658     return ((float3) 1.f) / v;
    659 }
    660 
    661 extern float4 __attribute__((overloadable)) half_recip(float4 v) {
    662     return ((float4) 1.f) / v;
    663 }
    664 
    665 
    666 
    667 /*
    668  * half_rsqrt
    669  */
    670 
    671 extern float __attribute__((overloadable)) half_rsqrt(float v) {
    672     return 1.f / sqrt(v);
    673 }
    674 
    675 extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
    676     float2 r;
    677     r.x = half_rsqrt(v.x);
    678     r.y = half_rsqrt(v.y);
    679     return r;
    680 }
    681 
    682 extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
    683     float3 r;
    684     r.x = half_rsqrt(v.x);
    685     r.y = half_rsqrt(v.y);
    686     r.z = half_rsqrt(v.z);
    687     return r;
    688 }
    689 
    690 extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
    691     float4 r;
    692     r.x = half_rsqrt(v.x);
    693     r.y = half_rsqrt(v.y);
    694     r.z = half_rsqrt(v.z);
    695     r.w = half_rsqrt(v.w);
    696     return r;
    697 }
    698 
    699 /**
    700  * matrix ops
    701  */
    702 
    703 extern float4 __attribute__((overloadable))
    704 rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
    705     float4 ret;
    706     ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
    707     ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
    708     ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
    709     ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
    710     return ret;
    711 }
    712 
    713 extern float4 __attribute__((overloadable))
    714 rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
    715     float4 ret;
    716     ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
    717     ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
    718     ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
    719     ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
    720     return ret;
    721 }
    722 
    723 extern float4 __attribute__((overloadable))
    724 rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
    725     float4 ret;
    726     ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
    727     ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
    728     ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
    729     ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
    730     return ret;
    731 }
    732 
    733 extern float3 __attribute__((overloadable))
    734 rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
    735     float3 ret;
    736     ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
    737     ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
    738     ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
    739     return ret;
    740 }
    741 
    742 extern float3 __attribute__((overloadable))
    743 rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
    744     float3 ret;
    745     ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
    746     ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
    747     ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
    748     return ret;
    749 }
    750 
    751 /**
    752  * Pixel Ops
    753  */
    754 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
    755 {
    756     uchar4 c;
    757     c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
    758     c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
    759     c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
    760     c.w = 255;
    761     return c;
    762 }
    763 
    764 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
    765 {
    766     uchar4 c;
    767     c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
    768     c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
    769     c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
    770     c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f);
    771     return c;
    772 }
    773 
    774 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
    775 {
    776     color *= 255.f;
    777     color += 0.5f;
    778     color = clamp(color, 0.f, 255.f);
    779     uchar4 c = {color.x, color.y, color.z, 255};
    780     return c;
    781 }
    782 
    783 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
    784 {
    785     color *= 255.f;
    786     color += 0.5f;
    787     color = clamp(color, 0.f, 255.f);
    788     uchar4 c = {color.x, color.y, color.z, color.w};
    789     return c;
    790 }
    791