Home | History | Annotate | Download | only in arch
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rs_types.rsh"
     19 
     20 extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
     21 extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
     22 extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
     23 extern uchar4 __attribute__((overloadable)) convert_uchar4(float4);
     24 extern float4 __attribute__((overloadable)) convert_float4(uchar4);
     25 extern float __attribute__((overloadable)) sqrt(float);
     26 
     27 /*
     28  * CLAMP
     29  */
     30 extern float __attribute__((overloadable)) clamp(float amount, float low, float high) {
     31     return amount < low ? low : (amount > high ? high : amount);
     32 }
     33 
     34 extern float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high) {
     35     float2 r;
     36     r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);
     37     r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);
     38     return r;
     39 }
     40 
     41 extern float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high) {
     42     float3 r;
     43     r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);
     44     r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);
     45     r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);
     46     return r;
     47 }
     48 
     49 extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high) {
     50     float4 r;
     51     r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);
     52     r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);
     53     r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);
     54     r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w);
     55     return r;
     56 }
     57 
     58 extern float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high) {
     59     float2 r;
     60     r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);
     61     r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);
     62     return r;
     63 }
     64 
     65 extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high) {
     66     float3 r;
     67     r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);
     68     r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);
     69     r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);
     70     return r;
     71 }
     72 
     73 extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high) {
     74     float4 r;
     75     r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);
     76     r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);
     77     r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);
     78     r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);
     79     return r;
     80 }
     81 
     82 
     83 /*
     84  * FMAX
     85  */
     86 
     87 extern float __attribute__((overloadable)) fmax(float v1, float v2) {
     88     return v1 > v2 ? v1 : v2;
     89 }
     90 
     91 extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) {
     92     float2 r;
     93     r.x = v1.x > v2.x ? v1.x : v2.x;
     94     r.y = v1.y > v2.y ? v1.y : v2.y;
     95     return r;
     96 }
     97 
     98 extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) {
     99     float3 r;
    100     r.x = v1.x > v2.x ? v1.x : v2.x;
    101     r.y = v1.y > v2.y ? v1.y : v2.y;
    102     r.z = v1.z > v2.z ? v1.z : v2.z;
    103     return r;
    104 }
    105 
    106 extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) {
    107     float4 r;
    108     r.x = v1.x > v2.x ? v1.x : v2.x;
    109     r.y = v1.y > v2.y ? v1.y : v2.y;
    110     r.z = v1.z > v2.z ? v1.z : v2.z;
    111     r.w = v1.w > v2.w ? v1.w : v2.w;
    112     return r;
    113 }
    114 
    115 extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) {
    116     float2 r;
    117     r.x = v1.x > v2 ? v1.x : v2;
    118     r.y = v1.y > v2 ? v1.y : v2;
    119     return r;
    120 }
    121 
    122 extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) {
    123     float3 r;
    124     r.x = v1.x > v2 ? v1.x : v2;
    125     r.y = v1.y > v2 ? v1.y : v2;
    126     r.z = v1.z > v2 ? v1.z : v2;
    127     return r;
    128 }
    129 
    130 extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) {
    131     float4 r;
    132     r.x = v1.x > v2 ? v1.x : v2;
    133     r.y = v1.y > v2 ? v1.y : v2;
    134     r.z = v1.z > v2 ? v1.z : v2;
    135     r.w = v1.w > v2 ? v1.w : v2;
    136     return r;
    137 }
    138 
    139 extern float __attribute__((overloadable)) fmin(float v1, float v2) {
    140     return v1 < v2 ? v1 : v2;
    141 }
    142 
    143 
    144 /*
    145  * FMIN
    146  */
    147 extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) {
    148     float2 r;
    149     r.x = v1.x < v2.x ? v1.x : v2.x;
    150     r.y = v1.y < v2.y ? v1.y : v2.y;
    151     return r;
    152 }
    153 
    154 extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) {
    155     float3 r;
    156     r.x = v1.x < v2.x ? v1.x : v2.x;
    157     r.y = v1.y < v2.y ? v1.y : v2.y;
    158     r.z = v1.z < v2.z ? v1.z : v2.z;
    159     return r;
    160 }
    161 
    162 extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) {
    163     float4 r;
    164     r.x = v1.x < v2.x ? v1.x : v2.x;
    165     r.y = v1.y < v2.y ? v1.y : v2.y;
    166     r.z = v1.z < v2.z ? v1.z : v2.z;
    167     r.w = v1.w < v2.w ? v1.w : v2.w;
    168     return r;
    169 }
    170 
    171 extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) {
    172     float2 r;
    173     r.x = v1.x < v2 ? v1.x : v2;
    174     r.y = v1.y < v2 ? v1.y : v2;
    175     return r;
    176 }
    177 
    178 extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) {
    179     float3 r;
    180     r.x = v1.x < v2 ? v1.x : v2;
    181     r.y = v1.y < v2 ? v1.y : v2;
    182     r.z = v1.z < v2 ? v1.z : v2;
    183     return r;
    184 }
    185 
    186 extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) {
    187     float4 r;
    188     r.x = v1.x < v2 ? v1.x : v2;
    189     r.y = v1.y < v2 ? v1.y : v2;
    190     r.z = v1.z < v2 ? v1.z : v2;
    191     r.w = v1.w < v2 ? v1.w : v2;
    192     return r;
    193 }
    194 
    195 
    196 /*
    197  * MAX
    198  */
    199 
    200 extern char __attribute__((overloadable)) max(char v1, char v2) {
    201     return v1 > v2 ? v1 : v2;
    202 }
    203 
    204 extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) {
    205     char2 r;
    206     r.x = v1.x > v2.x ? v1.x : v2.x;
    207     r.y = v1.y > v2.y ? v1.y : v2.y;
    208     return r;
    209 }
    210 
    211 extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) {
    212     char3 r;
    213     r.x = v1.x > v2.x ? v1.x : v2.x;
    214     r.y = v1.y > v2.y ? v1.y : v2.y;
    215     r.z = v1.z > v2.z ? v1.z : v2.z;
    216     return r;
    217 }
    218 
    219 extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) {
    220     char4 r;
    221     r.x = v1.x > v2.x ? v1.x : v2.x;
    222     r.y = v1.y > v2.y ? v1.y : v2.y;
    223     r.z = v1.z > v2.z ? v1.z : v2.z;
    224     r.w = v1.w > v2.w ? v1.w : v2.w;
    225     return r;
    226 }
    227 
    228 extern short __attribute__((overloadable)) max(short v1, short v2) {
    229     return v1 > v2 ? v1 : v2;
    230 }
    231 
    232 extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) {
    233     short2 r;
    234     r.x = v1.x > v2.x ? v1.x : v2.x;
    235     r.y = v1.y > v2.y ? v1.y : v2.y;
    236     return r;
    237 }
    238 
    239 extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) {
    240     short3 r;
    241     r.x = v1.x > v2.x ? v1.x : v2.x;
    242     r.y = v1.y > v2.y ? v1.y : v2.y;
    243     r.z = v1.z > v2.z ? v1.z : v2.z;
    244     return r;
    245 }
    246 
    247 extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) {
    248     short4 r;
    249     r.x = v1.x > v2.x ? v1.x : v2.x;
    250     r.y = v1.y > v2.y ? v1.y : v2.y;
    251     r.z = v1.z > v2.z ? v1.z : v2.z;
    252     r.w = v1.w > v2.w ? v1.w : v2.w;
    253     return r;
    254 }
    255 
    256 extern int __attribute__((overloadable)) max(int v1, int v2) {
    257     return v1 > v2 ? v1 : v2;
    258 }
    259 
    260 extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) {
    261     int2 r;
    262     r.x = v1.x > v2.x ? v1.x : v2.x;
    263     r.y = v1.y > v2.y ? v1.y : v2.y;
    264     return r;
    265 }
    266 
    267 extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) {
    268     int3 r;
    269     r.x = v1.x > v2.x ? v1.x : v2.x;
    270     r.y = v1.y > v2.y ? v1.y : v2.y;
    271     r.z = v1.z > v2.z ? v1.z : v2.z;
    272     return r;
    273 }
    274 
    275 extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) {
    276     int4 r;
    277     r.x = v1.x > v2.x ? v1.x : v2.x;
    278     r.y = v1.y > v2.y ? v1.y : v2.y;
    279     r.z = v1.z > v2.z ? v1.z : v2.z;
    280     r.w = v1.w > v2.w ? v1.w : v2.w;
    281     return r;
    282 }
    283 
    284 extern int64_t __attribute__((overloadable)) max(int64_t v1, int64_t v2) {
    285     return v1 > v2 ? v1 : v2;
    286 }
    287 
    288 extern long2 __attribute__((overloadable)) max(long2 v1, long2 v2) {
    289     long2 r;
    290     r.x = v1.x > v2.x ? v1.x : v2.x;
    291     r.y = v1.y > v2.y ? v1.y : v2.y;
    292     return r;
    293 }
    294 
    295 extern long3 __attribute__((overloadable)) max(long3 v1, long3 v2) {
    296     long3 r;
    297     r.x = v1.x > v2.x ? v1.x : v2.x;
    298     r.y = v1.y > v2.y ? v1.y : v2.y;
    299     r.z = v1.z > v2.z ? v1.z : v2.z;
    300     return r;
    301 }
    302 
    303 extern long4 __attribute__((overloadable)) max(long4 v1, long4 v2) {
    304     long4 r;
    305     r.x = v1.x > v2.x ? v1.x : v2.x;
    306     r.y = v1.y > v2.y ? v1.y : v2.y;
    307     r.z = v1.z > v2.z ? v1.z : v2.z;
    308     r.w = v1.w > v2.w ? v1.w : v2.w;
    309     return r;
    310 }
    311 
    312 extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
    313     return v1 > v2 ? v1 : v2;
    314 }
    315 
    316 extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) {
    317     uchar2 r;
    318     r.x = v1.x > v2.x ? v1.x : v2.x;
    319     r.y = v1.y > v2.y ? v1.y : v2.y;
    320     return r;
    321 }
    322 
    323 extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) {
    324     uchar3 r;
    325     r.x = v1.x > v2.x ? v1.x : v2.x;
    326     r.y = v1.y > v2.y ? v1.y : v2.y;
    327     r.z = v1.z > v2.z ? v1.z : v2.z;
    328     return r;
    329 }
    330 
    331 extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) {
    332     uchar4 r;
    333     r.x = v1.x > v2.x ? v1.x : v2.x;
    334     r.y = v1.y > v2.y ? v1.y : v2.y;
    335     r.z = v1.z > v2.z ? v1.z : v2.z;
    336     r.w = v1.w > v2.w ? v1.w : v2.w;
    337     return r;
    338 }
    339 
    340 extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
    341     return v1 > v2 ? v1 : v2;
    342 }
    343 
    344 extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) {
    345     ushort2 r;
    346     r.x = v1.x > v2.x ? v1.x : v2.x;
    347     r.y = v1.y > v2.y ? v1.y : v2.y;
    348     return r;
    349 }
    350 
    351 extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) {
    352     ushort3 r;
    353     r.x = v1.x > v2.x ? v1.x : v2.x;
    354     r.y = v1.y > v2.y ? v1.y : v2.y;
    355     r.z = v1.z > v2.z ? v1.z : v2.z;
    356     return r;
    357 }
    358 
    359 extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) {
    360     ushort4 r;
    361     r.x = v1.x > v2.x ? v1.x : v2.x;
    362     r.y = v1.y > v2.y ? v1.y : v2.y;
    363     r.z = v1.z > v2.z ? v1.z : v2.z;
    364     r.w = v1.w > v2.w ? v1.w : v2.w;
    365     return r;
    366 }
    367 
    368 extern uint __attribute__((overloadable)) max(uint v1, uint v2) {
    369     return v1 > v2 ? v1 : v2;
    370 }
    371 
    372 extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) {
    373     uint2 r;
    374     r.x = v1.x > v2.x ? v1.x : v2.x;
    375     r.y = v1.y > v2.y ? v1.y : v2.y;
    376     return r;
    377 }
    378 
    379 extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) {
    380     uint3 r;
    381     r.x = v1.x > v2.x ? v1.x : v2.x;
    382     r.y = v1.y > v2.y ? v1.y : v2.y;
    383     r.z = v1.z > v2.z ? v1.z : v2.z;
    384     return r;
    385 }
    386 
    387 extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) {
    388     uint4 r;
    389     r.x = v1.x > v2.x ? v1.x : v2.x;
    390     r.y = v1.y > v2.y ? v1.y : v2.y;
    391     r.z = v1.z > v2.z ? v1.z : v2.z;
    392     r.w = v1.w > v2.w ? v1.w : v2.w;
    393     return r;
    394 }
    395 
    396 extern ulong __attribute__((overloadable)) max(ulong v1, ulong v2) {
    397     return v1 > v2 ? v1 : v2;
    398 }
    399 
    400 extern ulong2 __attribute__((overloadable)) max(ulong2 v1, ulong2 v2) {
    401     ulong2 r;
    402     r.x = v1.x > v2.x ? v1.x : v2.x;
    403     r.y = v1.y > v2.y ? v1.y : v2.y;
    404     return r;
    405 }
    406 
    407 extern ulong3 __attribute__((overloadable)) max(ulong3 v1, ulong3 v2) {
    408     ulong3 r;
    409     r.x = v1.x > v2.x ? v1.x : v2.x;
    410     r.y = v1.y > v2.y ? v1.y : v2.y;
    411     r.z = v1.z > v2.z ? v1.z : v2.z;
    412     return r;
    413 }
    414 
    415 extern ulong4 __attribute__((overloadable)) max(ulong4 v1, ulong4 v2) {
    416     ulong4 r;
    417     r.x = v1.x > v2.x ? v1.x : v2.x;
    418     r.y = v1.y > v2.y ? v1.y : v2.y;
    419     r.z = v1.z > v2.z ? v1.z : v2.z;
    420     r.w = v1.w > v2.w ? v1.w : v2.w;
    421     return r;
    422 }
    423 
    424 extern float __attribute__((overloadable)) max(float v1, float v2) {
    425     return fmax(v1, v2);
    426 }
    427 
    428 extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
    429     return fmax(v1, v2);
    430 }
    431 
    432 extern float2 __attribute__((overloadable)) max(float2 v1, float v2) {
    433     return fmax(v1, v2);
    434 }
    435 
    436 extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
    437     return fmax(v1, v2);
    438 }
    439 
    440 extern float3 __attribute__((overloadable)) max(float3 v1, float v2) {
    441     return fmax(v1, v2);
    442 }
    443 
    444 extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
    445     return fmax(v1, v2);
    446 }
    447 
    448 extern float4 __attribute__((overloadable)) max(float4 v1, float v2) {
    449     return fmax(v1, v2);
    450 }
    451 
    452 
    453 /*
    454  * MIN
    455  */
    456 
    457 extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) {
    458     return v1 < v2 ? v1 : v2;
    459 }
    460 
    461 extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) {
    462     char2 r;
    463     r.x = v1.x < v2.x ? v1.x : v2.x;
    464     r.y = v1.y < v2.y ? v1.y : v2.y;
    465     return r;
    466 }
    467 
    468 extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) {
    469     char3 r;
    470     r.x = v1.x < v2.x ? v1.x : v2.x;
    471     r.y = v1.y < v2.y ? v1.y : v2.y;
    472     r.z = v1.z < v2.z ? v1.z : v2.z;
    473     return r;
    474 }
    475 
    476 extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) {
    477     char4 r;
    478     r.x = v1.x < v2.x ? v1.x : v2.x;
    479     r.y = v1.y < v2.y ? v1.y : v2.y;
    480     r.z = v1.z < v2.z ? v1.z : v2.z;
    481     r.w = v1.w < v2.w ? v1.w : v2.w;
    482     return r;
    483 }
    484 
    485 extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) {
    486     return v1 < v2 ? v1 : v2;
    487 }
    488 
    489 extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) {
    490     short2 r;
    491     r.x = v1.x < v2.x ? v1.x : v2.x;
    492     r.y = v1.y < v2.y ? v1.y : v2.y;
    493     return r;
    494 }
    495 
    496 extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) {
    497     short3 r;
    498     r.x = v1.x < v2.x ? v1.x : v2.x;
    499     r.y = v1.y < v2.y ? v1.y : v2.y;
    500     r.z = v1.z < v2.z ? v1.z : v2.z;
    501     return r;
    502 }
    503 
    504 extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) {
    505     short4 r;
    506     r.x = v1.x < v2.x ? v1.x : v2.x;
    507     r.y = v1.y < v2.y ? v1.y : v2.y;
    508     r.z = v1.z < v2.z ? v1.z : v2.z;
    509     r.w = v1.w < v2.w ? v1.w : v2.w;
    510     return r;
    511 }
    512 
    513 extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) {
    514     return v1 < v2 ? v1 : v2;
    515 }
    516 
    517 extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) {
    518     int2 r;
    519     r.x = v1.x < v2.x ? v1.x : v2.x;
    520     r.y = v1.y < v2.y ? v1.y : v2.y;
    521     return r;
    522 }
    523 
    524 extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) {
    525     int3 r;
    526     r.x = v1.x < v2.x ? v1.x : v2.x;
    527     r.y = v1.y < v2.y ? v1.y : v2.y;
    528     r.z = v1.z < v2.z ? v1.z : v2.z;
    529     return r;
    530 }
    531 
    532 extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) {
    533     int4 r;
    534     r.x = v1.x < v2.x ? v1.x : v2.x;
    535     r.y = v1.y < v2.y ? v1.y : v2.y;
    536     r.z = v1.z < v2.z ? v1.z : v2.z;
    537     r.w = v1.w < v2.w ? v1.w : v2.w;
    538     return r;
    539 }
    540 
    541 extern int64_t __attribute__((overloadable)) min(int64_t v1, int64_t v2) {
    542     return v1 < v2 ? v1 : v2;
    543 }
    544 
    545 extern long2 __attribute__((overloadable)) min(long2 v1, long2 v2) {
    546     long2 r;
    547     r.x = v1.x < v2.x ? v1.x : v2.x;
    548     r.y = v1.y < v2.y ? v1.y : v2.y;
    549     return r;
    550 }
    551 
    552 extern long3 __attribute__((overloadable)) min(long3 v1, long3 v2) {
    553     long3 r;
    554     r.x = v1.x < v2.x ? v1.x : v2.x;
    555     r.y = v1.y < v2.y ? v1.y : v2.y;
    556     r.z = v1.z < v2.z ? v1.z : v2.z;
    557     return r;
    558 }
    559 
    560 extern long4 __attribute__((overloadable)) min(long4 v1, long4 v2) {
    561     long4 r;
    562     r.x = v1.x < v2.x ? v1.x : v2.x;
    563     r.y = v1.y < v2.y ? v1.y : v2.y;
    564     r.z = v1.z < v2.z ? v1.z : v2.z;
    565     r.w = v1.w < v2.w ? v1.w : v2.w;
    566     return r;
    567 }
    568 
    569 extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
    570     return v1 < v2 ? v1 : v2;
    571 }
    572 
    573 extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) {
    574     uchar2 r;
    575     r.x = v1.x < v2.x ? v1.x : v2.x;
    576     r.y = v1.y < v2.y ? v1.y : v2.y;
    577     return r;
    578 }
    579 
    580 extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) {
    581     uchar3 r;
    582     r.x = v1.x < v2.x ? v1.x : v2.x;
    583     r.y = v1.y < v2.y ? v1.y : v2.y;
    584     r.z = v1.z < v2.z ? v1.z : v2.z;
    585     return r;
    586 }
    587 
    588 extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) {
    589     uchar4 r;
    590     r.x = v1.x < v2.x ? v1.x : v2.x;
    591     r.y = v1.y < v2.y ? v1.y : v2.y;
    592     r.z = v1.z < v2.z ? v1.z : v2.z;
    593     r.w = v1.w < v2.w ? v1.w : v2.w;
    594     return r;
    595 }
    596 
    597 extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
    598     return v1 < v2 ? v1 : v2;
    599 }
    600 
    601 extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) {
    602     ushort2 r;
    603     r.x = v1.x < v2.x ? v1.x : v2.x;
    604     r.y = v1.y < v2.y ? v1.y : v2.y;
    605     return r;
    606 }
    607 
    608 extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) {
    609     ushort3 r;
    610     r.x = v1.x < v2.x ? v1.x : v2.x;
    611     r.y = v1.y < v2.y ? v1.y : v2.y;
    612     r.z = v1.z < v2.z ? v1.z : v2.z;
    613     return r;
    614 }
    615 
    616 extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) {
    617     ushort4 r;
    618     r.x = v1.x < v2.x ? v1.x : v2.x;
    619     r.y = v1.y < v2.y ? v1.y : v2.y;
    620     r.z = v1.z < v2.z ? v1.z : v2.z;
    621     r.w = v1.w < v2.w ? v1.w : v2.w;
    622     return r;
    623 }
    624 
    625 extern uint __attribute__((overloadable)) min(uint v1, uint v2) {
    626     return v1 < v2 ? v1 : v2;
    627 }
    628 
    629 extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) {
    630     uint2 r;
    631     r.x = v1.x < v2.x ? v1.x : v2.x;
    632     r.y = v1.y < v2.y ? v1.y : v2.y;
    633     return r;
    634 }
    635 
    636 extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) {
    637     uint3 r;
    638     r.x = v1.x < v2.x ? v1.x : v2.x;
    639     r.y = v1.y < v2.y ? v1.y : v2.y;
    640     r.z = v1.z < v2.z ? v1.z : v2.z;
    641     return r;
    642 }
    643 
    644 extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) {
    645     uint4 r;
    646     r.x = v1.x < v2.x ? v1.x : v2.x;
    647     r.y = v1.y < v2.y ? v1.y : v2.y;
    648     r.z = v1.z < v2.z ? v1.z : v2.z;
    649     r.w = v1.w < v2.w ? v1.w : v2.w;
    650     return r;
    651 }
    652 
    653 extern ulong __attribute__((overloadable)) min(ulong v1, ulong v2) {
    654     return v1 < v2 ? v1 : v2;
    655 }
    656 
    657 extern ulong2 __attribute__((overloadable)) min(ulong2 v1, ulong2 v2) {
    658     ulong2 r;
    659     r.x = v1.x < v2.x ? v1.x : v2.x;
    660     r.y = v1.y < v2.y ? v1.y : v2.y;
    661     return r;
    662 }
    663 
    664 extern ulong3 __attribute__((overloadable)) min(ulong3 v1, ulong3 v2) {
    665     ulong3 r;
    666     r.x = v1.x < v2.x ? v1.x : v2.x;
    667     r.y = v1.y < v2.y ? v1.y : v2.y;
    668     r.z = v1.z < v2.z ? v1.z : v2.z;
    669     return r;
    670 }
    671 
    672 extern ulong4 __attribute__((overloadable)) min(ulong4 v1, ulong4 v2) {
    673     ulong4 r;
    674     r.x = v1.x < v2.x ? v1.x : v2.x;
    675     r.y = v1.y < v2.y ? v1.y : v2.y;
    676     r.z = v1.z < v2.z ? v1.z : v2.z;
    677     r.w = v1.w < v2.w ? v1.w : v2.w;
    678     return r;
    679 }
    680 
    681 extern float __attribute__((overloadable)) min(float v1, float v2) {
    682     return fmin(v1, v2);
    683 }
    684 
    685 extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
    686     return fmin(v1, v2);
    687 }
    688 
    689 extern float2 __attribute__((overloadable)) min(float2 v1, float v2) {
    690     return fmin(v1, v2);
    691 }
    692 
    693 extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
    694     return fmin(v1, v2);
    695 }
    696 
    697 extern float3 __attribute__((overloadable)) min(float3 v1, float v2) {
    698     return fmin(v1, v2);
    699 }
    700 
    701 extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
    702     return fmin(v1, v2);
    703 }
    704 
    705 extern float4 __attribute__((overloadable)) min(float4 v1, float v2) {
    706     return fmin(v1, v2);
    707 }
    708 
    709 /*
    710  * YUV
    711  */
    712 
    713 extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
    714     short Y = ((short)y) - 16;
    715     short U = ((short)u) - 128;
    716     short V = ((short)v) - 128;
    717 
    718     short4 p;
    719     p.r = (Y * 298 + V * 409 + 128) >> 8;
    720     p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
    721     p.b = (Y * 298 + U * 516 + 128) >> 8;
    722     p.a = 255;
    723     p.r = rsClamp(p.r, (short)0, (short)255);
    724     p.g = rsClamp(p.g, (short)0, (short)255);
    725     p.b = rsClamp(p.b, (short)0, (short)255);
    726 
    727     return convert_uchar4(p);
    728 }
    729 
    730 static float4 yuv_U_values = {0.f, -0.392f * 0.003921569f, +2.02 * 0.003921569f, 0.f};
    731 static float4 yuv_V_values = {1.603f * 0.003921569f, -0.815f * 0.003921569f, 0.f, 0.f};
    732 
    733 extern float4 __attribute__((overloadable)) rsYuvToRGBA_float4(uchar y, uchar u, uchar v) {
    734     float4 color = (float)y * 0.003921569f;
    735     float4 fU = ((float)u) - 128.f;
    736     float4 fV = ((float)v) - 128.f;
    737 
    738     color += fU * yuv_U_values;
    739     color += fV * yuv_V_values;
    740     color = clamp(color, 0.f, 1.f);
    741     return color;
    742 }
    743 
    744 
    745 /*
    746  * half_RECIP
    747  */
    748 
    749 extern float __attribute__((overloadable)) half_recip(float v) {
    750     // FIXME:  actual algorithm for generic approximate reciprocal
    751     return 1.f / v;
    752 }
    753 
    754 extern float2 __attribute__((overloadable)) half_recip(float2 v) {
    755     float2 r;
    756     r.x = half_recip(r.x);
    757     r.y = half_recip(r.y);
    758     return r;
    759 }
    760 
    761 extern float3 __attribute__((overloadable)) half_recip(float3 v) {
    762     float3 r;
    763     r.x = half_recip(r.x);
    764     r.y = half_recip(r.y);
    765     r.z = half_recip(r.z);
    766     return r;
    767 }
    768 
    769 extern float4 __attribute__((overloadable)) half_recip(float4 v) {
    770     float4 r;
    771     r.x = half_recip(r.x);
    772     r.y = half_recip(r.y);
    773     r.z = half_recip(r.z);
    774     r.w = half_recip(r.w);
    775     return r;
    776 }
    777 
    778 
    779 /*
    780  * half_SQRT
    781  */
    782 
    783 extern float __attribute__((overloadable)) half_sqrt(float v) {
    784     return sqrt(v);
    785 }
    786 
    787 extern float2 __attribute__((overloadable)) half_sqrt(float2 v) {
    788     float2 r;
    789     r.x = half_sqrt(v.x);
    790     r.y = half_sqrt(v.y);
    791     return r;
    792 }
    793 
    794 extern float3 __attribute__((overloadable)) half_sqrt(float3 v) {
    795     float3 r;
    796     r.x = half_sqrt(v.x);
    797     r.y = half_sqrt(v.y);
    798     r.z = half_sqrt(v.z);
    799     return r;
    800 }
    801 
    802 extern float4 __attribute__((overloadable)) half_sqrt(float4 v) {
    803     float4 r;
    804     r.x = half_sqrt(v.x);
    805     r.y = half_sqrt(v.y);
    806     r.z = half_sqrt(v.z);
    807     r.w = half_sqrt(v.w);
    808     return r;
    809 }
    810 
    811 
    812 /*
    813  * half_rsqrt
    814  */
    815 
    816 extern float __attribute__((overloadable)) half_rsqrt(float v) {
    817     return 1.f / sqrt(v);
    818 }
    819 
    820 extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
    821     float2 r;
    822     r.x = half_rsqrt(v.x);
    823     r.y = half_rsqrt(v.y);
    824     return r;
    825 }
    826 
    827 extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
    828     float3 r;
    829     r.x = half_rsqrt(v.x);
    830     r.y = half_rsqrt(v.y);
    831     r.z = half_rsqrt(v.z);
    832     return r;
    833 }
    834 
    835 extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
    836     float4 r;
    837     r.x = half_rsqrt(v.x);
    838     r.y = half_rsqrt(v.y);
    839     r.z = half_rsqrt(v.z);
    840     r.w = half_rsqrt(v.w);
    841     return r;
    842 }
    843 
    844 /**
    845  * matrix ops
    846  */
    847 
    848 extern float4 __attribute__((overloadable))
    849 rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
    850     float4 ret;
    851     ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
    852     ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
    853     ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
    854     ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
    855     return ret;
    856 }
    857 
    858 extern float4 __attribute__((overloadable))
    859 rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
    860     float4 ret;
    861     ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
    862     ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
    863     ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
    864     ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
    865     return ret;
    866 }
    867 
    868 extern float4 __attribute__((overloadable))
    869 rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
    870     float4 ret;
    871     ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
    872     ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
    873     ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
    874     ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
    875     return ret;
    876 }
    877 
    878 extern float3 __attribute__((overloadable))
    879 rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
    880     float3 ret;
    881     ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
    882     ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
    883     ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
    884     return ret;
    885 }
    886 
    887 extern float3 __attribute__((overloadable))
    888 rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
    889     float3 ret;
    890     ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
    891     ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
    892     ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
    893     return ret;
    894 }
    895 
    896 /**
    897  * Pixel Ops
    898  */
    899 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
    900 {
    901     uchar4 c;
    902     c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
    903     c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
    904     c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
    905     c.w = 255;
    906     return c;
    907 }
    908 
    909 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
    910 {
    911     uchar4 c;
    912     c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
    913     c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
    914     c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
    915     c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f);
    916     return c;
    917 }
    918 
    919 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
    920 {
    921     color *= 255.f;
    922     color += 0.5f;
    923     color = clamp(color, 0.f, 255.f);
    924     uchar4 c = {color.x, color.y, color.z, 255};
    925     return c;
    926 }
    927 
    928 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
    929 {
    930     color *= 255.f;
    931     color += 0.5f;
    932     color = clamp(color, 0.f, 255.f);
    933     uchar4 c = {color.x, color.y, color.z, color.w};
    934     return c;
    935 }
    936 
    937