Home | History | Annotate | Download | only in arch
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rs_core.rsh"
     19 
     20 extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
     21 extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
     22 extern uchar4 __attribute__((overloadable)) convert_uchar4(float4);
     23 extern float4 __attribute__((overloadable)) convert_float4(uchar4);
     24 extern float __attribute__((overloadable)) sqrt(float);
     25 
     26 /*
     27  * CLAMP
     28  */
     29 #define _CLAMP(T) \
     30 extern T __attribute__((overloadable)) clamp(T amount, T low, T high) {             \
     31     return amount < low ? low : (amount > high ? high : amount);                    \
     32 }                                                                                   \
     33                                                                                     \
     34 extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T##2 low, T##2 high) { \
     35     T##2 r;                                                                         \
     36     r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
     37     r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
     38     return r;                                                                       \
     39 }                                                                                   \
     40                                                                                     \
     41 extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T##3 low, T##3 high) { \
     42     T##3 r;                                                                         \
     43     r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
     44     r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
     45     r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
     46     return r;                                                                       \
     47 }                                                                                   \
     48                                                                                     \
     49 extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T##4 low, T##4 high) { \
     50     T##4 r;                                                                         \
     51     r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
     52     r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
     53     r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
     54     r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w);       \
     55     return r;                                                                       \
     56 }                                                                                   \
     57                                                                                     \
     58 extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T low, T high) {       \
     59     T##2 r;                                                                         \
     60     r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
     61     r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
     62     return r;                                                                       \
     63 }                                                                                   \
     64                                                                                     \
     65 extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T low, T high) {       \
     66     T##3 r;                                                                         \
     67     r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
     68     r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
     69     r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
     70     return r;                                                                       \
     71 }                                                                                   \
     72                                                                                     \
     73 extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T low, T high) {       \
     74     T##4 r;                                                                         \
     75     r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
     76     r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
     77     r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
     78     r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);               \
     79     return r;                                                                       \
     80 }
     81 
     82 #if (!defined(__i386__) && !defined(__x86_64__)) || defined(RS_DEBUG_RUNTIME)
     83 // These functions must be defined here if we are not using the SSE
     84 // implementation, which includes when we are built as part of the
     85 // debug runtime (libclcore_debug.bc).
     86 
     87 _CLAMP(float);
     88 
     89 #else
     90 
     91 extern float __attribute__((overloadable)) clamp(float amount, float low, float high);
     92 extern float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high);
     93 extern float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high);
     94 extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
     95 extern float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high);
     96 extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
     97 extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
     98 
     99 #endif // (!defined(__i386__) && !defined(__x86_64__)) || defined(RS_DEBUG_RUNTIME)
    100 
    101 _CLAMP(double);
    102 _CLAMP(char);
    103 _CLAMP(uchar);
    104 _CLAMP(short);
    105 _CLAMP(ushort);
    106 _CLAMP(int);
    107 _CLAMP(uint);
    108 _CLAMP(long);
    109 _CLAMP(ulong);
    110 
    111 #undef _CLAMP
    112 
    113 /*
    114  * FMAX
    115  */
    116 
    117 extern float __attribute__((overloadable)) fmax(float v1, float v2) {
    118     return v1 > v2 ? v1 : v2;
    119 }
    120 
    121 extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) {
    122     float2 r;
    123     r.x = v1.x > v2.x ? v1.x : v2.x;
    124     r.y = v1.y > v2.y ? v1.y : v2.y;
    125     return r;
    126 }
    127 
    128 extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) {
    129     float3 r;
    130     r.x = v1.x > v2.x ? v1.x : v2.x;
    131     r.y = v1.y > v2.y ? v1.y : v2.y;
    132     r.z = v1.z > v2.z ? v1.z : v2.z;
    133     return r;
    134 }
    135 
    136 extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) {
    137     float4 r;
    138     r.x = v1.x > v2.x ? v1.x : v2.x;
    139     r.y = v1.y > v2.y ? v1.y : v2.y;
    140     r.z = v1.z > v2.z ? v1.z : v2.z;
    141     r.w = v1.w > v2.w ? v1.w : v2.w;
    142     return r;
    143 }
    144 
    145 extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) {
    146     float2 r;
    147     r.x = v1.x > v2 ? v1.x : v2;
    148     r.y = v1.y > v2 ? v1.y : v2;
    149     return r;
    150 }
    151 
    152 extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) {
    153     float3 r;
    154     r.x = v1.x > v2 ? v1.x : v2;
    155     r.y = v1.y > v2 ? v1.y : v2;
    156     r.z = v1.z > v2 ? v1.z : v2;
    157     return r;
    158 }
    159 
    160 extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) {
    161     float4 r;
    162     r.x = v1.x > v2 ? v1.x : v2;
    163     r.y = v1.y > v2 ? v1.y : v2;
    164     r.z = v1.z > v2 ? v1.z : v2;
    165     r.w = v1.w > v2 ? v1.w : v2;
    166     return r;
    167 }
    168 
    169 extern float __attribute__((overloadable)) fmin(float v1, float v2) {
    170     return v1 < v2 ? v1 : v2;
    171 }
    172 
    173 
    174 /*
    175  * FMIN
    176  */
    177 extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) {
    178     float2 r;
    179     r.x = v1.x < v2.x ? v1.x : v2.x;
    180     r.y = v1.y < v2.y ? v1.y : v2.y;
    181     return r;
    182 }
    183 
    184 extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) {
    185     float3 r;
    186     r.x = v1.x < v2.x ? v1.x : v2.x;
    187     r.y = v1.y < v2.y ? v1.y : v2.y;
    188     r.z = v1.z < v2.z ? v1.z : v2.z;
    189     return r;
    190 }
    191 
    192 extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) {
    193     float4 r;
    194     r.x = v1.x < v2.x ? v1.x : v2.x;
    195     r.y = v1.y < v2.y ? v1.y : v2.y;
    196     r.z = v1.z < v2.z ? v1.z : v2.z;
    197     r.w = v1.w < v2.w ? v1.w : v2.w;
    198     return r;
    199 }
    200 
    201 extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) {
    202     float2 r;
    203     r.x = v1.x < v2 ? v1.x : v2;
    204     r.y = v1.y < v2 ? v1.y : v2;
    205     return r;
    206 }
    207 
    208 extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) {
    209     float3 r;
    210     r.x = v1.x < v2 ? v1.x : v2;
    211     r.y = v1.y < v2 ? v1.y : v2;
    212     r.z = v1.z < v2 ? v1.z : v2;
    213     return r;
    214 }
    215 
    216 extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) {
    217     float4 r;
    218     r.x = v1.x < v2 ? v1.x : v2;
    219     r.y = v1.y < v2 ? v1.y : v2;
    220     r.z = v1.z < v2 ? v1.z : v2;
    221     r.w = v1.w < v2 ? v1.w : v2;
    222     return r;
    223 }
    224 
    225 
    226 /*
    227  * MAX
    228  */
    229 
    230 extern char __attribute__((overloadable)) max(char v1, char v2) {
    231     return v1 > v2 ? v1 : v2;
    232 }
    233 
    234 extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) {
    235     char2 r;
    236     r.x = v1.x > v2.x ? v1.x : v2.x;
    237     r.y = v1.y > v2.y ? v1.y : v2.y;
    238     return r;
    239 }
    240 
    241 extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) {
    242     char3 r;
    243     r.x = v1.x > v2.x ? v1.x : v2.x;
    244     r.y = v1.y > v2.y ? v1.y : v2.y;
    245     r.z = v1.z > v2.z ? v1.z : v2.z;
    246     return r;
    247 }
    248 
    249 extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) {
    250     char4 r;
    251     r.x = v1.x > v2.x ? v1.x : v2.x;
    252     r.y = v1.y > v2.y ? v1.y : v2.y;
    253     r.z = v1.z > v2.z ? v1.z : v2.z;
    254     r.w = v1.w > v2.w ? v1.w : v2.w;
    255     return r;
    256 }
    257 
    258 extern short __attribute__((overloadable)) max(short v1, short v2) {
    259     return v1 > v2 ? v1 : v2;
    260 }
    261 
    262 extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) {
    263     short2 r;
    264     r.x = v1.x > v2.x ? v1.x : v2.x;
    265     r.y = v1.y > v2.y ? v1.y : v2.y;
    266     return r;
    267 }
    268 
    269 extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) {
    270     short3 r;
    271     r.x = v1.x > v2.x ? v1.x : v2.x;
    272     r.y = v1.y > v2.y ? v1.y : v2.y;
    273     r.z = v1.z > v2.z ? v1.z : v2.z;
    274     return r;
    275 }
    276 
    277 extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) {
    278     short4 r;
    279     r.x = v1.x > v2.x ? v1.x : v2.x;
    280     r.y = v1.y > v2.y ? v1.y : v2.y;
    281     r.z = v1.z > v2.z ? v1.z : v2.z;
    282     r.w = v1.w > v2.w ? v1.w : v2.w;
    283     return r;
    284 }
    285 
    286 extern int __attribute__((overloadable)) max(int v1, int v2) {
    287     return v1 > v2 ? v1 : v2;
    288 }
    289 
    290 extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) {
    291     int2 r;
    292     r.x = v1.x > v2.x ? v1.x : v2.x;
    293     r.y = v1.y > v2.y ? v1.y : v2.y;
    294     return r;
    295 }
    296 
    297 extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) {
    298     int3 r;
    299     r.x = v1.x > v2.x ? v1.x : v2.x;
    300     r.y = v1.y > v2.y ? v1.y : v2.y;
    301     r.z = v1.z > v2.z ? v1.z : v2.z;
    302     return r;
    303 }
    304 
    305 extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) {
    306     int4 r;
    307     r.x = v1.x > v2.x ? v1.x : v2.x;
    308     r.y = v1.y > v2.y ? v1.y : v2.y;
    309     r.z = v1.z > v2.z ? v1.z : v2.z;
    310     r.w = v1.w > v2.w ? v1.w : v2.w;
    311     return r;
    312 }
    313 
    314 extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
    315     return v1 > v2 ? v1 : v2;
    316 }
    317 
    318 extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) {
    319     uchar2 r;
    320     r.x = v1.x > v2.x ? v1.x : v2.x;
    321     r.y = v1.y > v2.y ? v1.y : v2.y;
    322     return r;
    323 }
    324 
    325 extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) {
    326     uchar3 r;
    327     r.x = v1.x > v2.x ? v1.x : v2.x;
    328     r.y = v1.y > v2.y ? v1.y : v2.y;
    329     r.z = v1.z > v2.z ? v1.z : v2.z;
    330     return r;
    331 }
    332 
    333 extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) {
    334     uchar4 r;
    335     r.x = v1.x > v2.x ? v1.x : v2.x;
    336     r.y = v1.y > v2.y ? v1.y : v2.y;
    337     r.z = v1.z > v2.z ? v1.z : v2.z;
    338     r.w = v1.w > v2.w ? v1.w : v2.w;
    339     return r;
    340 }
    341 
    342 extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
    343     return v1 > v2 ? v1 : v2;
    344 }
    345 
    346 extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) {
    347     ushort2 r;
    348     r.x = v1.x > v2.x ? v1.x : v2.x;
    349     r.y = v1.y > v2.y ? v1.y : v2.y;
    350     return r;
    351 }
    352 
    353 extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) {
    354     ushort3 r;
    355     r.x = v1.x > v2.x ? v1.x : v2.x;
    356     r.y = v1.y > v2.y ? v1.y : v2.y;
    357     r.z = v1.z > v2.z ? v1.z : v2.z;
    358     return r;
    359 }
    360 
    361 extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) {
    362     ushort4 r;
    363     r.x = v1.x > v2.x ? v1.x : v2.x;
    364     r.y = v1.y > v2.y ? v1.y : v2.y;
    365     r.z = v1.z > v2.z ? v1.z : v2.z;
    366     r.w = v1.w > v2.w ? v1.w : v2.w;
    367     return r;
    368 }
    369 
    370 extern uint __attribute__((overloadable)) max(uint v1, uint v2) {
    371     return v1 > v2 ? v1 : v2;
    372 }
    373 
    374 extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) {
    375     uint2 r;
    376     r.x = v1.x > v2.x ? v1.x : v2.x;
    377     r.y = v1.y > v2.y ? v1.y : v2.y;
    378     return r;
    379 }
    380 
    381 extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) {
    382     uint3 r;
    383     r.x = v1.x > v2.x ? v1.x : v2.x;
    384     r.y = v1.y > v2.y ? v1.y : v2.y;
    385     r.z = v1.z > v2.z ? v1.z : v2.z;
    386     return r;
    387 }
    388 
    389 extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) {
    390     uint4 r;
    391     r.x = v1.x > v2.x ? v1.x : v2.x;
    392     r.y = v1.y > v2.y ? v1.y : v2.y;
    393     r.z = v1.z > v2.z ? v1.z : v2.z;
    394     r.w = v1.w > v2.w ? v1.w : v2.w;
    395     return r;
    396 }
    397 
    398 extern float __attribute__((overloadable)) max(float v1, float v2) {
    399     return fmax(v1, v2);
    400 }
    401 
    402 extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
    403     return fmax(v1, v2);
    404 }
    405 
    406 extern float2 __attribute__((overloadable)) max(float2 v1, float v2) {
    407     return fmax(v1, v2);
    408 }
    409 
    410 extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
    411     return fmax(v1, v2);
    412 }
    413 
    414 extern float3 __attribute__((overloadable)) max(float3 v1, float v2) {
    415     return fmax(v1, v2);
    416 }
    417 
    418 extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
    419     return fmax(v1, v2);
    420 }
    421 
    422 extern float4 __attribute__((overloadable)) max(float4 v1, float v2) {
    423     return fmax(v1, v2);
    424 }
    425 
    426 
    427 /*
    428  * MIN
    429  */
    430 
    431 extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) {
    432     return v1 < v2 ? v1 : v2;
    433 }
    434 
    435 extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) {
    436     char2 r;
    437     r.x = v1.x < v2.x ? v1.x : v2.x;
    438     r.y = v1.y < v2.y ? v1.y : v2.y;
    439     return r;
    440 }
    441 
    442 extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) {
    443     char3 r;
    444     r.x = v1.x < v2.x ? v1.x : v2.x;
    445     r.y = v1.y < v2.y ? v1.y : v2.y;
    446     r.z = v1.z < v2.z ? v1.z : v2.z;
    447     return r;
    448 }
    449 
    450 extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) {
    451     char4 r;
    452     r.x = v1.x < v2.x ? v1.x : v2.x;
    453     r.y = v1.y < v2.y ? v1.y : v2.y;
    454     r.z = v1.z < v2.z ? v1.z : v2.z;
    455     r.w = v1.w < v2.w ? v1.w : v2.w;
    456     return r;
    457 }
    458 
    459 extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) {
    460     return v1 < v2 ? v1 : v2;
    461 }
    462 
    463 extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) {
    464     short2 r;
    465     r.x = v1.x < v2.x ? v1.x : v2.x;
    466     r.y = v1.y < v2.y ? v1.y : v2.y;
    467     return r;
    468 }
    469 
    470 extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) {
    471     short3 r;
    472     r.x = v1.x < v2.x ? v1.x : v2.x;
    473     r.y = v1.y < v2.y ? v1.y : v2.y;
    474     r.z = v1.z < v2.z ? v1.z : v2.z;
    475     return r;
    476 }
    477 
    478 extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) {
    479     short4 r;
    480     r.x = v1.x < v2.x ? v1.x : v2.x;
    481     r.y = v1.y < v2.y ? v1.y : v2.y;
    482     r.z = v1.z < v2.z ? v1.z : v2.z;
    483     r.w = v1.w < v2.w ? v1.w : v2.w;
    484     return r;
    485 }
    486 
    487 extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) {
    488     return v1 < v2 ? v1 : v2;
    489 }
    490 
    491 extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) {
    492     int2 r;
    493     r.x = v1.x < v2.x ? v1.x : v2.x;
    494     r.y = v1.y < v2.y ? v1.y : v2.y;
    495     return r;
    496 }
    497 
    498 extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) {
    499     int3 r;
    500     r.x = v1.x < v2.x ? v1.x : v2.x;
    501     r.y = v1.y < v2.y ? v1.y : v2.y;
    502     r.z = v1.z < v2.z ? v1.z : v2.z;
    503     return r;
    504 }
    505 
    506 extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) {
    507     int4 r;
    508     r.x = v1.x < v2.x ? v1.x : v2.x;
    509     r.y = v1.y < v2.y ? v1.y : v2.y;
    510     r.z = v1.z < v2.z ? v1.z : v2.z;
    511     r.w = v1.w < v2.w ? v1.w : v2.w;
    512     return r;
    513 }
    514 
    515 extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
    516     return v1 < v2 ? v1 : v2;
    517 }
    518 
    519 extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) {
    520     uchar2 r;
    521     r.x = v1.x < v2.x ? v1.x : v2.x;
    522     r.y = v1.y < v2.y ? v1.y : v2.y;
    523     return r;
    524 }
    525 
    526 extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) {
    527     uchar3 r;
    528     r.x = v1.x < v2.x ? v1.x : v2.x;
    529     r.y = v1.y < v2.y ? v1.y : v2.y;
    530     r.z = v1.z < v2.z ? v1.z : v2.z;
    531     return r;
    532 }
    533 
    534 extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) {
    535     uchar4 r;
    536     r.x = v1.x < v2.x ? v1.x : v2.x;
    537     r.y = v1.y < v2.y ? v1.y : v2.y;
    538     r.z = v1.z < v2.z ? v1.z : v2.z;
    539     r.w = v1.w < v2.w ? v1.w : v2.w;
    540     return r;
    541 }
    542 
    543 extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
    544     return v1 < v2 ? v1 : v2;
    545 }
    546 
    547 extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) {
    548     ushort2 r;
    549     r.x = v1.x < v2.x ? v1.x : v2.x;
    550     r.y = v1.y < v2.y ? v1.y : v2.y;
    551     return r;
    552 }
    553 
    554 extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) {
    555     ushort3 r;
    556     r.x = v1.x < v2.x ? v1.x : v2.x;
    557     r.y = v1.y < v2.y ? v1.y : v2.y;
    558     r.z = v1.z < v2.z ? v1.z : v2.z;
    559     return r;
    560 }
    561 
    562 extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) {
    563     ushort4 r;
    564     r.x = v1.x < v2.x ? v1.x : v2.x;
    565     r.y = v1.y < v2.y ? v1.y : v2.y;
    566     r.z = v1.z < v2.z ? v1.z : v2.z;
    567     r.w = v1.w < v2.w ? v1.w : v2.w;
    568     return r;
    569 }
    570 
    571 extern uint __attribute__((overloadable)) min(uint v1, uint v2) {
    572     return v1 < v2 ? v1 : v2;
    573 }
    574 
    575 extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) {
    576     uint2 r;
    577     r.x = v1.x < v2.x ? v1.x : v2.x;
    578     r.y = v1.y < v2.y ? v1.y : v2.y;
    579     return r;
    580 }
    581 
    582 extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) {
    583     uint3 r;
    584     r.x = v1.x < v2.x ? v1.x : v2.x;
    585     r.y = v1.y < v2.y ? v1.y : v2.y;
    586     r.z = v1.z < v2.z ? v1.z : v2.z;
    587     return r;
    588 }
    589 
    590 extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) {
    591     uint4 r;
    592     r.x = v1.x < v2.x ? v1.x : v2.x;
    593     r.y = v1.y < v2.y ? v1.y : v2.y;
    594     r.z = v1.z < v2.z ? v1.z : v2.z;
    595     r.w = v1.w < v2.w ? v1.w : v2.w;
    596     return r;
    597 }
    598 
    599 extern float __attribute__((overloadable)) min(float v1, float v2) {
    600     return fmin(v1, v2);
    601 }
    602 
    603 extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
    604     return fmin(v1, v2);
    605 }
    606 
    607 extern float2 __attribute__((overloadable)) min(float2 v1, float v2) {
    608     return fmin(v1, v2);
    609 }
    610 
    611 extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
    612     return fmin(v1, v2);
    613 }
    614 
    615 extern float3 __attribute__((overloadable)) min(float3 v1, float v2) {
    616     return fmin(v1, v2);
    617 }
    618 
    619 extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
    620     return fmin(v1, v2);
    621 }
    622 
    623 extern float4 __attribute__((overloadable)) min(float4 v1, float v2) {
    624     return fmin(v1, v2);
    625 }
    626 
    627 /*
    628  * YUV
    629  */
    630 
    631 extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
    632     short Y = ((short)y) - 16;
    633     short U = ((short)u) - 128;
    634     short V = ((short)v) - 128;
    635 
    636     short4 p;
    637     p.r = (Y * 298 + V * 409 + 128) >> 8;
    638     p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
    639     p.b = (Y * 298 + U * 516 + 128) >> 8;
    640     p.a = 255;
    641     p.r = rsClamp(p.r, (short)0, (short)255);
    642     p.g = rsClamp(p.g, (short)0, (short)255);
    643     p.b = rsClamp(p.b, (short)0, (short)255);
    644 
    645     return convert_uchar4(p);
    646 }
    647 
    648 /*
    649  * half_RECIP
    650  */
    651 
    652 extern float2 __attribute__((overloadable)) half_recip(float2 v) {
    653     return ((float2) 1.f) / v;
    654 }
    655 
    656 extern float3 __attribute__((overloadable)) half_recip(float3 v) {
    657     return ((float3) 1.f) / v;
    658 }
    659 
    660 extern float4 __attribute__((overloadable)) half_recip(float4 v) {
    661     return ((float4) 1.f) / v;
    662 }
    663 
    664 
    665 
    666 /*
    667  * half_rsqrt
    668  */
    669 
    670 extern float __attribute__((overloadable)) half_rsqrt(float v) {
    671     return 1.f / sqrt(v);
    672 }
    673 
    674 extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
    675     float2 r;
    676     r.x = half_rsqrt(v.x);
    677     r.y = half_rsqrt(v.y);
    678     return r;
    679 }
    680 
    681 extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
    682     float3 r;
    683     r.x = half_rsqrt(v.x);
    684     r.y = half_rsqrt(v.y);
    685     r.z = half_rsqrt(v.z);
    686     return r;
    687 }
    688 
    689 extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
    690     float4 r;
    691     r.x = half_rsqrt(v.x);
    692     r.y = half_rsqrt(v.y);
    693     r.z = half_rsqrt(v.z);
    694     r.w = half_rsqrt(v.w);
    695     return r;
    696 }
    697 
    698 /**
    699  * matrix ops
    700  */
    701 
    702 extern float4 __attribute__((overloadable))
    703 rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
    704     float4 ret;
    705     ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
    706     ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
    707     ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
    708     ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
    709     return ret;
    710 }
    711 
    712 extern float4 __attribute__((overloadable))
    713 rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
    714     float4 ret;
    715     ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
    716     ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
    717     ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
    718     ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
    719     return ret;
    720 }
    721 
    722 extern float4 __attribute__((overloadable))
    723 rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
    724     float4 ret;
    725     ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
    726     ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
    727     ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
    728     ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
    729     return ret;
    730 }
    731 
    732 extern float3 __attribute__((overloadable))
    733 rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
    734     float3 ret;
    735     ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
    736     ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
    737     ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
    738     return ret;
    739 }
    740 
    741 extern float3 __attribute__((overloadable))
    742 rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
    743     float3 ret;
    744     ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
    745     ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
    746     ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
    747     return ret;
    748 }
    749 
    750 /**
    751  * Pixel Ops
    752  */
    753 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
    754 {
    755     uchar4 c;
    756     c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
    757     c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
    758     c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
    759     c.w = 255;
    760     return c;
    761 }
    762 
    763 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
    764 {
    765     uchar4 c;
    766     c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
    767     c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
    768     c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
    769     c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f);
    770     return c;
    771 }
    772 
    773 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
    774 {
    775     color *= 255.f;
    776     color += 0.5f;
    777     color = clamp(color, 0.f, 255.f);
    778     uchar4 c = {color.x, color.y, color.z, 255};
    779     return c;
    780 }
    781 
    782 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
    783 {
    784     color *= 255.f;
    785     color += 0.5f;
    786     color = clamp(color, 0.f, 255.f);
    787     uchar4 c = {color.x, color.y, color.z, color.w};
    788     return c;
    789 }
    790