1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18 #include "rs_types.rsh" 19 20 extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high); 21 extern uchar4 __attribute__((overloadable)) convert_uchar4(short4); 22 extern uchar4 __attribute__((overloadable)) convert_uchar4(float4); 23 extern float4 __attribute__((overloadable)) convert_float4(uchar4); 24 extern float __attribute__((overloadable)) sqrt(float); 25 26 /* 27 * CLAMP 28 */ 29 #define _CLAMP(T) \ 30 extern T __attribute__((overloadable)) clamp(T amount, T low, T high) { \ 31 return amount < low ? low : (amount > high ? high : amount); \ 32 } \ 33 \ 34 extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T##2 low, T##2 high) { \ 35 T##2 r; \ 36 r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x); \ 37 r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y); \ 38 return r; \ 39 } \ 40 \ 41 extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T##3 low, T##3 high) { \ 42 T##3 r; \ 43 r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x); \ 44 r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y); \ 45 r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z); \ 46 return r; \ 47 } \ 48 \ 49 extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T##4 low, T##4 high) { \ 50 T##4 r; \ 51 r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x); \ 52 r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y); \ 53 r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z); \ 54 r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w); \ 55 return r; \ 56 } \ 57 \ 58 extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T low, T high) { \ 59 T##2 r; \ 60 r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); \ 61 r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); \ 62 return r; \ 63 } \ 64 \ 65 extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T low, T high) { \ 66 T##3 r; \ 67 r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); \ 68 r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); \ 69 r.z = amount.z < low ? low : (amount.z > high ? high : amount.z); \ 70 return r; \ 71 } \ 72 \ 73 extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T low, T high) { \ 74 T##4 r; \ 75 r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); \ 76 r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); \ 77 r.z = amount.z < low ? low : (amount.z > high ? high : amount.z); \ 78 r.w = amount.w < low ? low : (amount.w > high ? high : amount.w); \ 79 return r; \ 80 } 81 82 #if !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3) 83 84 _CLAMP(float); 85 86 #else 87 88 extern float __attribute__((overloadable)) clamp(float amount, float low, float high); 89 extern float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high); 90 extern float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high); 91 extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high); 92 extern float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high); 93 extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high); 94 extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high); 95 96 #endif // !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3) 97 98 _CLAMP(double); 99 _CLAMP(char); 100 _CLAMP(uchar); 101 _CLAMP(short); 102 _CLAMP(ushort); 103 _CLAMP(int); 104 _CLAMP(uint); 105 _CLAMP(long); 106 _CLAMP(ulong); 107 108 #undef _CLAMP 109 110 /* 111 * FMAX 112 */ 113 114 extern float __attribute__((overloadable)) fmax(float v1, float v2) { 115 return v1 > v2 ? v1 : v2; 116 } 117 118 extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) { 119 float2 r; 120 r.x = v1.x > v2.x ? v1.x : v2.x; 121 r.y = v1.y > v2.y ? v1.y : v2.y; 122 return r; 123 } 124 125 extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) { 126 float3 r; 127 r.x = v1.x > v2.x ? v1.x : v2.x; 128 r.y = v1.y > v2.y ? v1.y : v2.y; 129 r.z = v1.z > v2.z ? v1.z : v2.z; 130 return r; 131 } 132 133 extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) { 134 float4 r; 135 r.x = v1.x > v2.x ? v1.x : v2.x; 136 r.y = v1.y > v2.y ? v1.y : v2.y; 137 r.z = v1.z > v2.z ? v1.z : v2.z; 138 r.w = v1.w > v2.w ? v1.w : v2.w; 139 return r; 140 } 141 142 extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) { 143 float2 r; 144 r.x = v1.x > v2 ? v1.x : v2; 145 r.y = v1.y > v2 ? v1.y : v2; 146 return r; 147 } 148 149 extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) { 150 float3 r; 151 r.x = v1.x > v2 ? v1.x : v2; 152 r.y = v1.y > v2 ? v1.y : v2; 153 r.z = v1.z > v2 ? v1.z : v2; 154 return r; 155 } 156 157 extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) { 158 float4 r; 159 r.x = v1.x > v2 ? v1.x : v2; 160 r.y = v1.y > v2 ? v1.y : v2; 161 r.z = v1.z > v2 ? v1.z : v2; 162 r.w = v1.w > v2 ? v1.w : v2; 163 return r; 164 } 165 166 extern float __attribute__((overloadable)) fmin(float v1, float v2) { 167 return v1 < v2 ? v1 : v2; 168 } 169 170 171 /* 172 * FMIN 173 */ 174 extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) { 175 float2 r; 176 r.x = v1.x < v2.x ? v1.x : v2.x; 177 r.y = v1.y < v2.y ? v1.y : v2.y; 178 return r; 179 } 180 181 extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) { 182 float3 r; 183 r.x = v1.x < v2.x ? v1.x : v2.x; 184 r.y = v1.y < v2.y ? v1.y : v2.y; 185 r.z = v1.z < v2.z ? v1.z : v2.z; 186 return r; 187 } 188 189 extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) { 190 float4 r; 191 r.x = v1.x < v2.x ? v1.x : v2.x; 192 r.y = v1.y < v2.y ? v1.y : v2.y; 193 r.z = v1.z < v2.z ? v1.z : v2.z; 194 r.w = v1.w < v2.w ? v1.w : v2.w; 195 return r; 196 } 197 198 extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) { 199 float2 r; 200 r.x = v1.x < v2 ? v1.x : v2; 201 r.y = v1.y < v2 ? v1.y : v2; 202 return r; 203 } 204 205 extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) { 206 float3 r; 207 r.x = v1.x < v2 ? v1.x : v2; 208 r.y = v1.y < v2 ? v1.y : v2; 209 r.z = v1.z < v2 ? v1.z : v2; 210 return r; 211 } 212 213 extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) { 214 float4 r; 215 r.x = v1.x < v2 ? v1.x : v2; 216 r.y = v1.y < v2 ? v1.y : v2; 217 r.z = v1.z < v2 ? v1.z : v2; 218 r.w = v1.w < v2 ? v1.w : v2; 219 return r; 220 } 221 222 223 /* 224 * MAX 225 */ 226 227 extern char __attribute__((overloadable)) max(char v1, char v2) { 228 return v1 > v2 ? v1 : v2; 229 } 230 231 extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) { 232 char2 r; 233 r.x = v1.x > v2.x ? v1.x : v2.x; 234 r.y = v1.y > v2.y ? v1.y : v2.y; 235 return r; 236 } 237 238 extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) { 239 char3 r; 240 r.x = v1.x > v2.x ? v1.x : v2.x; 241 r.y = v1.y > v2.y ? v1.y : v2.y; 242 r.z = v1.z > v2.z ? v1.z : v2.z; 243 return r; 244 } 245 246 extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) { 247 char4 r; 248 r.x = v1.x > v2.x ? v1.x : v2.x; 249 r.y = v1.y > v2.y ? v1.y : v2.y; 250 r.z = v1.z > v2.z ? v1.z : v2.z; 251 r.w = v1.w > v2.w ? v1.w : v2.w; 252 return r; 253 } 254 255 extern short __attribute__((overloadable)) max(short v1, short v2) { 256 return v1 > v2 ? v1 : v2; 257 } 258 259 extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) { 260 short2 r; 261 r.x = v1.x > v2.x ? v1.x : v2.x; 262 r.y = v1.y > v2.y ? v1.y : v2.y; 263 return r; 264 } 265 266 extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) { 267 short3 r; 268 r.x = v1.x > v2.x ? v1.x : v2.x; 269 r.y = v1.y > v2.y ? v1.y : v2.y; 270 r.z = v1.z > v2.z ? v1.z : v2.z; 271 return r; 272 } 273 274 extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) { 275 short4 r; 276 r.x = v1.x > v2.x ? v1.x : v2.x; 277 r.y = v1.y > v2.y ? v1.y : v2.y; 278 r.z = v1.z > v2.z ? v1.z : v2.z; 279 r.w = v1.w > v2.w ? v1.w : v2.w; 280 return r; 281 } 282 283 extern int __attribute__((overloadable)) max(int v1, int v2) { 284 return v1 > v2 ? v1 : v2; 285 } 286 287 extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) { 288 int2 r; 289 r.x = v1.x > v2.x ? v1.x : v2.x; 290 r.y = v1.y > v2.y ? v1.y : v2.y; 291 return r; 292 } 293 294 extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) { 295 int3 r; 296 r.x = v1.x > v2.x ? v1.x : v2.x; 297 r.y = v1.y > v2.y ? v1.y : v2.y; 298 r.z = v1.z > v2.z ? v1.z : v2.z; 299 return r; 300 } 301 302 extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) { 303 int4 r; 304 r.x = v1.x > v2.x ? v1.x : v2.x; 305 r.y = v1.y > v2.y ? v1.y : v2.y; 306 r.z = v1.z > v2.z ? v1.z : v2.z; 307 r.w = v1.w > v2.w ? v1.w : v2.w; 308 return r; 309 } 310 311 extern int64_t __attribute__((overloadable)) max(int64_t v1, int64_t v2) { 312 return v1 > v2 ? v1 : v2; 313 } 314 315 extern long2 __attribute__((overloadable)) max(long2 v1, long2 v2) { 316 long2 r; 317 r.x = v1.x > v2.x ? v1.x : v2.x; 318 r.y = v1.y > v2.y ? v1.y : v2.y; 319 return r; 320 } 321 322 extern long3 __attribute__((overloadable)) max(long3 v1, long3 v2) { 323 long3 r; 324 r.x = v1.x > v2.x ? v1.x : v2.x; 325 r.y = v1.y > v2.y ? v1.y : v2.y; 326 r.z = v1.z > v2.z ? v1.z : v2.z; 327 return r; 328 } 329 330 extern long4 __attribute__((overloadable)) max(long4 v1, long4 v2) { 331 long4 r; 332 r.x = v1.x > v2.x ? v1.x : v2.x; 333 r.y = v1.y > v2.y ? v1.y : v2.y; 334 r.z = v1.z > v2.z ? v1.z : v2.z; 335 r.w = v1.w > v2.w ? v1.w : v2.w; 336 return r; 337 } 338 339 extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) { 340 return v1 > v2 ? v1 : v2; 341 } 342 343 extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) { 344 uchar2 r; 345 r.x = v1.x > v2.x ? v1.x : v2.x; 346 r.y = v1.y > v2.y ? v1.y : v2.y; 347 return r; 348 } 349 350 extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) { 351 uchar3 r; 352 r.x = v1.x > v2.x ? v1.x : v2.x; 353 r.y = v1.y > v2.y ? v1.y : v2.y; 354 r.z = v1.z > v2.z ? v1.z : v2.z; 355 return r; 356 } 357 358 extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) { 359 uchar4 r; 360 r.x = v1.x > v2.x ? v1.x : v2.x; 361 r.y = v1.y > v2.y ? v1.y : v2.y; 362 r.z = v1.z > v2.z ? v1.z : v2.z; 363 r.w = v1.w > v2.w ? v1.w : v2.w; 364 return r; 365 } 366 367 extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) { 368 return v1 > v2 ? v1 : v2; 369 } 370 371 extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) { 372 ushort2 r; 373 r.x = v1.x > v2.x ? v1.x : v2.x; 374 r.y = v1.y > v2.y ? v1.y : v2.y; 375 return r; 376 } 377 378 extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) { 379 ushort3 r; 380 r.x = v1.x > v2.x ? v1.x : v2.x; 381 r.y = v1.y > v2.y ? v1.y : v2.y; 382 r.z = v1.z > v2.z ? v1.z : v2.z; 383 return r; 384 } 385 386 extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) { 387 ushort4 r; 388 r.x = v1.x > v2.x ? v1.x : v2.x; 389 r.y = v1.y > v2.y ? v1.y : v2.y; 390 r.z = v1.z > v2.z ? v1.z : v2.z; 391 r.w = v1.w > v2.w ? v1.w : v2.w; 392 return r; 393 } 394 395 extern uint __attribute__((overloadable)) max(uint v1, uint v2) { 396 return v1 > v2 ? v1 : v2; 397 } 398 399 extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) { 400 uint2 r; 401 r.x = v1.x > v2.x ? v1.x : v2.x; 402 r.y = v1.y > v2.y ? v1.y : v2.y; 403 return r; 404 } 405 406 extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) { 407 uint3 r; 408 r.x = v1.x > v2.x ? v1.x : v2.x; 409 r.y = v1.y > v2.y ? v1.y : v2.y; 410 r.z = v1.z > v2.z ? v1.z : v2.z; 411 return r; 412 } 413 414 extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) { 415 uint4 r; 416 r.x = v1.x > v2.x ? v1.x : v2.x; 417 r.y = v1.y > v2.y ? v1.y : v2.y; 418 r.z = v1.z > v2.z ? v1.z : v2.z; 419 r.w = v1.w > v2.w ? v1.w : v2.w; 420 return r; 421 } 422 423 extern ulong __attribute__((overloadable)) max(ulong v1, ulong v2) { 424 return v1 > v2 ? v1 : v2; 425 } 426 427 extern ulong2 __attribute__((overloadable)) max(ulong2 v1, ulong2 v2) { 428 ulong2 r; 429 r.x = v1.x > v2.x ? v1.x : v2.x; 430 r.y = v1.y > v2.y ? v1.y : v2.y; 431 return r; 432 } 433 434 extern ulong3 __attribute__((overloadable)) max(ulong3 v1, ulong3 v2) { 435 ulong3 r; 436 r.x = v1.x > v2.x ? v1.x : v2.x; 437 r.y = v1.y > v2.y ? v1.y : v2.y; 438 r.z = v1.z > v2.z ? v1.z : v2.z; 439 return r; 440 } 441 442 extern ulong4 __attribute__((overloadable)) max(ulong4 v1, ulong4 v2) { 443 ulong4 r; 444 r.x = v1.x > v2.x ? v1.x : v2.x; 445 r.y = v1.y > v2.y ? v1.y : v2.y; 446 r.z = v1.z > v2.z ? v1.z : v2.z; 447 r.w = v1.w > v2.w ? v1.w : v2.w; 448 return r; 449 } 450 451 extern float __attribute__((overloadable)) max(float v1, float v2) { 452 return fmax(v1, v2); 453 } 454 455 extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) { 456 return fmax(v1, v2); 457 } 458 459 extern float2 __attribute__((overloadable)) max(float2 v1, float v2) { 460 return fmax(v1, v2); 461 } 462 463 extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) { 464 return fmax(v1, v2); 465 } 466 467 extern float3 __attribute__((overloadable)) max(float3 v1, float v2) { 468 return fmax(v1, v2); 469 } 470 471 extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) { 472 return fmax(v1, v2); 473 } 474 475 extern float4 __attribute__((overloadable)) max(float4 v1, float v2) { 476 return fmax(v1, v2); 477 } 478 479 480 /* 481 * MIN 482 */ 483 484 extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) { 485 return v1 < v2 ? v1 : v2; 486 } 487 488 extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) { 489 char2 r; 490 r.x = v1.x < v2.x ? v1.x : v2.x; 491 r.y = v1.y < v2.y ? v1.y : v2.y; 492 return r; 493 } 494 495 extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) { 496 char3 r; 497 r.x = v1.x < v2.x ? v1.x : v2.x; 498 r.y = v1.y < v2.y ? v1.y : v2.y; 499 r.z = v1.z < v2.z ? v1.z : v2.z; 500 return r; 501 } 502 503 extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) { 504 char4 r; 505 r.x = v1.x < v2.x ? v1.x : v2.x; 506 r.y = v1.y < v2.y ? v1.y : v2.y; 507 r.z = v1.z < v2.z ? v1.z : v2.z; 508 r.w = v1.w < v2.w ? v1.w : v2.w; 509 return r; 510 } 511 512 extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) { 513 return v1 < v2 ? v1 : v2; 514 } 515 516 extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) { 517 short2 r; 518 r.x = v1.x < v2.x ? v1.x : v2.x; 519 r.y = v1.y < v2.y ? v1.y : v2.y; 520 return r; 521 } 522 523 extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) { 524 short3 r; 525 r.x = v1.x < v2.x ? v1.x : v2.x; 526 r.y = v1.y < v2.y ? v1.y : v2.y; 527 r.z = v1.z < v2.z ? v1.z : v2.z; 528 return r; 529 } 530 531 extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) { 532 short4 r; 533 r.x = v1.x < v2.x ? v1.x : v2.x; 534 r.y = v1.y < v2.y ? v1.y : v2.y; 535 r.z = v1.z < v2.z ? v1.z : v2.z; 536 r.w = v1.w < v2.w ? v1.w : v2.w; 537 return r; 538 } 539 540 extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) { 541 return v1 < v2 ? v1 : v2; 542 } 543 544 extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) { 545 int2 r; 546 r.x = v1.x < v2.x ? v1.x : v2.x; 547 r.y = v1.y < v2.y ? v1.y : v2.y; 548 return r; 549 } 550 551 extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) { 552 int3 r; 553 r.x = v1.x < v2.x ? v1.x : v2.x; 554 r.y = v1.y < v2.y ? v1.y : v2.y; 555 r.z = v1.z < v2.z ? v1.z : v2.z; 556 return r; 557 } 558 559 extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) { 560 int4 r; 561 r.x = v1.x < v2.x ? v1.x : v2.x; 562 r.y = v1.y < v2.y ? v1.y : v2.y; 563 r.z = v1.z < v2.z ? v1.z : v2.z; 564 r.w = v1.w < v2.w ? v1.w : v2.w; 565 return r; 566 } 567 568 extern int64_t __attribute__((overloadable)) min(int64_t v1, int64_t v2) { 569 return v1 < v2 ? v1 : v2; 570 } 571 572 extern long2 __attribute__((overloadable)) min(long2 v1, long2 v2) { 573 long2 r; 574 r.x = v1.x < v2.x ? v1.x : v2.x; 575 r.y = v1.y < v2.y ? v1.y : v2.y; 576 return r; 577 } 578 579 extern long3 __attribute__((overloadable)) min(long3 v1, long3 v2) { 580 long3 r; 581 r.x = v1.x < v2.x ? v1.x : v2.x; 582 r.y = v1.y < v2.y ? v1.y : v2.y; 583 r.z = v1.z < v2.z ? v1.z : v2.z; 584 return r; 585 } 586 587 extern long4 __attribute__((overloadable)) min(long4 v1, long4 v2) { 588 long4 r; 589 r.x = v1.x < v2.x ? v1.x : v2.x; 590 r.y = v1.y < v2.y ? v1.y : v2.y; 591 r.z = v1.z < v2.z ? v1.z : v2.z; 592 r.w = v1.w < v2.w ? v1.w : v2.w; 593 return r; 594 } 595 596 extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) { 597 return v1 < v2 ? v1 : v2; 598 } 599 600 extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) { 601 uchar2 r; 602 r.x = v1.x < v2.x ? v1.x : v2.x; 603 r.y = v1.y < v2.y ? v1.y : v2.y; 604 return r; 605 } 606 607 extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) { 608 uchar3 r; 609 r.x = v1.x < v2.x ? v1.x : v2.x; 610 r.y = v1.y < v2.y ? v1.y : v2.y; 611 r.z = v1.z < v2.z ? v1.z : v2.z; 612 return r; 613 } 614 615 extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) { 616 uchar4 r; 617 r.x = v1.x < v2.x ? v1.x : v2.x; 618 r.y = v1.y < v2.y ? v1.y : v2.y; 619 r.z = v1.z < v2.z ? v1.z : v2.z; 620 r.w = v1.w < v2.w ? v1.w : v2.w; 621 return r; 622 } 623 624 extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) { 625 return v1 < v2 ? v1 : v2; 626 } 627 628 extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) { 629 ushort2 r; 630 r.x = v1.x < v2.x ? v1.x : v2.x; 631 r.y = v1.y < v2.y ? v1.y : v2.y; 632 return r; 633 } 634 635 extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) { 636 ushort3 r; 637 r.x = v1.x < v2.x ? v1.x : v2.x; 638 r.y = v1.y < v2.y ? v1.y : v2.y; 639 r.z = v1.z < v2.z ? v1.z : v2.z; 640 return r; 641 } 642 643 extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) { 644 ushort4 r; 645 r.x = v1.x < v2.x ? v1.x : v2.x; 646 r.y = v1.y < v2.y ? v1.y : v2.y; 647 r.z = v1.z < v2.z ? v1.z : v2.z; 648 r.w = v1.w < v2.w ? v1.w : v2.w; 649 return r; 650 } 651 652 extern uint __attribute__((overloadable)) min(uint v1, uint v2) { 653 return v1 < v2 ? v1 : v2; 654 } 655 656 extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) { 657 uint2 r; 658 r.x = v1.x < v2.x ? v1.x : v2.x; 659 r.y = v1.y < v2.y ? v1.y : v2.y; 660 return r; 661 } 662 663 extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) { 664 uint3 r; 665 r.x = v1.x < v2.x ? v1.x : v2.x; 666 r.y = v1.y < v2.y ? v1.y : v2.y; 667 r.z = v1.z < v2.z ? v1.z : v2.z; 668 return r; 669 } 670 671 extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) { 672 uint4 r; 673 r.x = v1.x < v2.x ? v1.x : v2.x; 674 r.y = v1.y < v2.y ? v1.y : v2.y; 675 r.z = v1.z < v2.z ? v1.z : v2.z; 676 r.w = v1.w < v2.w ? v1.w : v2.w; 677 return r; 678 } 679 680 extern ulong __attribute__((overloadable)) min(ulong v1, ulong v2) { 681 return v1 < v2 ? v1 : v2; 682 } 683 684 extern ulong2 __attribute__((overloadable)) min(ulong2 v1, ulong2 v2) { 685 ulong2 r; 686 r.x = v1.x < v2.x ? v1.x : v2.x; 687 r.y = v1.y < v2.y ? v1.y : v2.y; 688 return r; 689 } 690 691 extern ulong3 __attribute__((overloadable)) min(ulong3 v1, ulong3 v2) { 692 ulong3 r; 693 r.x = v1.x < v2.x ? v1.x : v2.x; 694 r.y = v1.y < v2.y ? v1.y : v2.y; 695 r.z = v1.z < v2.z ? v1.z : v2.z; 696 return r; 697 } 698 699 extern ulong4 __attribute__((overloadable)) min(ulong4 v1, ulong4 v2) { 700 ulong4 r; 701 r.x = v1.x < v2.x ? v1.x : v2.x; 702 r.y = v1.y < v2.y ? v1.y : v2.y; 703 r.z = v1.z < v2.z ? v1.z : v2.z; 704 r.w = v1.w < v2.w ? v1.w : v2.w; 705 return r; 706 } 707 708 extern float __attribute__((overloadable)) min(float v1, float v2) { 709 return fmin(v1, v2); 710 } 711 712 extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) { 713 return fmin(v1, v2); 714 } 715 716 extern float2 __attribute__((overloadable)) min(float2 v1, float v2) { 717 return fmin(v1, v2); 718 } 719 720 extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) { 721 return fmin(v1, v2); 722 } 723 724 extern float3 __attribute__((overloadable)) min(float3 v1, float v2) { 725 return fmin(v1, v2); 726 } 727 728 extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) { 729 return fmin(v1, v2); 730 } 731 732 extern float4 __attribute__((overloadable)) min(float4 v1, float v2) { 733 return fmin(v1, v2); 734 } 735 736 /* 737 * YUV 738 */ 739 740 extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) { 741 short Y = ((short)y) - 16; 742 short U = ((short)u) - 128; 743 short V = ((short)v) - 128; 744 745 short4 p; 746 p.r = (Y * 298 + V * 409 + 128) >> 8; 747 p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8; 748 p.b = (Y * 298 + U * 516 + 128) >> 8; 749 p.a = 255; 750 p.r = rsClamp(p.r, (short)0, (short)255); 751 p.g = rsClamp(p.g, (short)0, (short)255); 752 p.b = rsClamp(p.b, (short)0, (short)255); 753 754 return convert_uchar4(p); 755 } 756 757 static float4 yuv_U_values = {0.f, -0.392f * 0.003921569f, +2.02 * 0.003921569f, 0.f}; 758 static float4 yuv_V_values = {1.603f * 0.003921569f, -0.815f * 0.003921569f, 0.f, 0.f}; 759 760 extern float4 __attribute__((overloadable)) rsYuvToRGBA_float4(uchar y, uchar u, uchar v) { 761 float4 color = (float)y * 0.003921569f; 762 float4 fU = ((float)u) - 128.f; 763 float4 fV = ((float)v) - 128.f; 764 765 color += fU * yuv_U_values; 766 color += fV * yuv_V_values; 767 color = clamp(color, 0.f, 1.f); 768 return color; 769 } 770 771 772 /* 773 * half_RECIP 774 */ 775 776 extern float __attribute__((overloadable)) half_recip(float v) { 777 // FIXME: actual algorithm for generic approximate reciprocal 778 return 1.f / v; 779 } 780 781 extern float2 __attribute__((overloadable)) half_recip(float2 v) { 782 float2 r; 783 r.x = half_recip(r.x); 784 r.y = half_recip(r.y); 785 return r; 786 } 787 788 extern float3 __attribute__((overloadable)) half_recip(float3 v) { 789 float3 r; 790 r.x = half_recip(r.x); 791 r.y = half_recip(r.y); 792 r.z = half_recip(r.z); 793 return r; 794 } 795 796 extern float4 __attribute__((overloadable)) half_recip(float4 v) { 797 float4 r; 798 r.x = half_recip(r.x); 799 r.y = half_recip(r.y); 800 r.z = half_recip(r.z); 801 r.w = half_recip(r.w); 802 return r; 803 } 804 805 806 /* 807 * half_SQRT 808 */ 809 810 extern float __attribute__((overloadable)) half_sqrt(float v) { 811 return sqrt(v); 812 } 813 814 extern float2 __attribute__((overloadable)) half_sqrt(float2 v) { 815 float2 r; 816 r.x = half_sqrt(v.x); 817 r.y = half_sqrt(v.y); 818 return r; 819 } 820 821 extern float3 __attribute__((overloadable)) half_sqrt(float3 v) { 822 float3 r; 823 r.x = half_sqrt(v.x); 824 r.y = half_sqrt(v.y); 825 r.z = half_sqrt(v.z); 826 return r; 827 } 828 829 extern float4 __attribute__((overloadable)) half_sqrt(float4 v) { 830 float4 r; 831 r.x = half_sqrt(v.x); 832 r.y = half_sqrt(v.y); 833 r.z = half_sqrt(v.z); 834 r.w = half_sqrt(v.w); 835 return r; 836 } 837 838 839 /* 840 * half_rsqrt 841 */ 842 843 extern float __attribute__((overloadable)) half_rsqrt(float v) { 844 return 1.f / sqrt(v); 845 } 846 847 extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) { 848 float2 r; 849 r.x = half_rsqrt(v.x); 850 r.y = half_rsqrt(v.y); 851 return r; 852 } 853 854 extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) { 855 float3 r; 856 r.x = half_rsqrt(v.x); 857 r.y = half_rsqrt(v.y); 858 r.z = half_rsqrt(v.z); 859 return r; 860 } 861 862 extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) { 863 float4 r; 864 r.x = half_rsqrt(v.x); 865 r.y = half_rsqrt(v.y); 866 r.z = half_rsqrt(v.z); 867 r.w = half_rsqrt(v.w); 868 return r; 869 } 870 871 /** 872 * matrix ops 873 */ 874 875 extern float4 __attribute__((overloadable)) 876 rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) { 877 float4 ret; 878 ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w); 879 ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w); 880 ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w); 881 ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w); 882 return ret; 883 } 884 885 extern float4 __attribute__((overloadable)) 886 rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) { 887 float4 ret; 888 ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12]; 889 ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13]; 890 ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14]; 891 ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15]; 892 return ret; 893 } 894 895 extern float4 __attribute__((overloadable)) 896 rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) { 897 float4 ret; 898 ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12]; 899 ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13]; 900 ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14]; 901 ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15]; 902 return ret; 903 } 904 905 extern float3 __attribute__((overloadable)) 906 rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) { 907 float3 ret; 908 ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z); 909 ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z); 910 ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z); 911 return ret; 912 } 913 914 extern float3 __attribute__((overloadable)) 915 rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) { 916 float3 ret; 917 ret.x = (m->m[0] * in.x) + (m->m[3] * in.y); 918 ret.y = (m->m[1] * in.x) + (m->m[4] * in.y); 919 ret.z = (m->m[2] * in.x) + (m->m[5] * in.y); 920 return ret; 921 } 922 923 /** 924 * Pixel Ops 925 */ 926 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b) 927 { 928 uchar4 c; 929 c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f); 930 c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f); 931 c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f); 932 c.w = 255; 933 return c; 934 } 935 936 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a) 937 { 938 uchar4 c; 939 c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f); 940 c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f); 941 c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f); 942 c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f); 943 return c; 944 } 945 946 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color) 947 { 948 color *= 255.f; 949 color += 0.5f; 950 color = clamp(color, 0.f, 255.f); 951 uchar4 c = {color.x, color.y, color.z, 255}; 952 return c; 953 } 954 955 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color) 956 { 957 color *= 255.f; 958 color += 0.5f; 959 color = clamp(color, 0.f, 255.f); 960 uchar4 c = {color.x, color.y, color.z, color.w}; 961 return c; 962 } 963